Merge branch 'master' into zk_inject_timeout

2024-11-26 17:41:59 +00:00 · 2023-03-27 19:46:10 +02:00 · 2023-03-27 19:46:10 +02:00 · ff209db129
commit ff209db129
parent c47dcb0805 21fdb67e03
333 changed files with 6890 additions and 2644 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -180,7 +180,6 @@ if (NOT CMAKE_BUILD_TYPE_UC STREQUAL "RELEASE")
    # Can be lld or ld-lld or lld-13 or /path/to/lld.
    if (LINKER_NAME MATCHES "lld" AND OS_LINUX)
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--gdb-index")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--gdb-index")
        message (STATUS "Adding .gdb-index via --gdb-index linker option.")
    endif ()
 endif()
--- a/PreLoad.cmake
+++ b/PreLoad.cmake
@ -19,8 +19,8 @@ endif()
 if (NOT "$ENV{CFLAGS}" STREQUAL ""
    OR NOT "$ENV{CXXFLAGS}" STREQUAL ""
    OR NOT "$ENV{LDFLAGS}" STREQUAL ""
-    OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_SHARED_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
-    OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_SHARED_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)
+    OR CMAKE_C_FLAGS OR CMAKE_CXX_FLAGS OR CMAKE_EXE_LINKER_FLAGS OR CMAKE_MODULE_LINKER_FLAGS
+    OR CMAKE_C_FLAGS_INIT OR CMAKE_CXX_FLAGS_INIT OR CMAKE_EXE_LINKER_FLAGS_INIT OR CMAKE_MODULE_LINKER_FLAGS_INIT)

    # if $ENV
    message("CFLAGS: $ENV{CFLAGS}")
@ -36,7 +36,6 @@ if (NOT "$ENV{CFLAGS}" STREQUAL ""
    message("CMAKE_C_FLAGS_INIT: ${CMAKE_C_FLAGS_INIT}")
    message("CMAKE_CXX_FLAGS_INIT: ${CMAKE_CXX_FLAGS_INIT}")
    message("CMAKE_EXE_LINKER_FLAGS_INIT: ${CMAKE_EXE_LINKER_FLAGS_INIT}")
-    message("CMAKE_SHARED_LINKER_FLAGS_INIT: ${CMAKE_SHARED_LINKER_FLAGS_INIT}")
    message("CMAKE_MODULE_LINKER_FLAGS_INIT: ${CMAKE_MODULE_LINKER_FLAGS_INIT}")

    message(FATAL_ERROR "
--- a/base/base/wide_integer_impl.h
+++ b/base/base/wide_integer_impl.h
@ -732,9 +732,10 @@ public:
            if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs)))
                return is_negative(rhs);

+            integer<Bits, Signed> t = rhs;
            for (unsigned i = 0; i < item_count; ++i)
            {
-                base_type rhs_item = get_item(rhs, big(i));
+                base_type rhs_item = get_item(t, big(i));

                if (lhs.items[big(i)] != rhs_item)
                    return lhs.items[big(i)] > rhs_item;
@ -757,9 +758,10 @@ public:
            if (std::numeric_limits<T>::is_signed && (is_negative(lhs) != is_negative(rhs)))
                return is_negative(lhs);

+            integer<Bits, Signed> t = rhs;
            for (unsigned i = 0; i < item_count; ++i)
            {
-                base_type rhs_item = get_item(rhs, big(i));
+                base_type rhs_item = get_item(t, big(i));

                if (lhs.items[big(i)] != rhs_item)
                    return lhs.items[big(i)] < rhs_item;
@ -779,9 +781,10 @@ public:
    {
        if constexpr (should_keep_size<T>())
        {
+            integer<Bits, Signed> t = rhs;
            for (unsigned i = 0; i < item_count; ++i)
            {
-                base_type rhs_item = get_item(rhs, any(i));
+                base_type rhs_item = get_item(t, any(i));

                if (lhs.items[any(i)] != rhs_item)
                    return false;
--- a/cmake/linux/toolchain-riscv64.cmake
+++ b/cmake/linux/toolchain-riscv64.cmake
@ -22,7 +22,6 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")

 set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=bfd")
-set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=bfd")

 # Currently, lld does not work with the error:
 # ld.lld: error: section size decrease is too large
--- a/cmake/linux/toolchain-x86_64.cmake
+++ b/cmake/linux/toolchain-x86_64.cmake
@ -30,7 +30,6 @@ set (CMAKE_SYSROOT "${TOOLCHAIN_PATH}/x86_64-linux-gnu/libc")
 set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
-set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")
 set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --gcc-toolchain=${TOOLCHAIN_PATH}")

--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -95,10 +95,8 @@ if (LINKER_NAME)
        configure_file ("${CMAKE_CURRENT_SOURCE_DIR}/cmake/ld.lld.in" "${LLD_WRAPPER}" @ONLY)

        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_WRAPPER}")
    else ()
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
    endif ()

 endif ()
--- a/docker/test/performance-comparison/download.sh
+++ b/docker/test/performance-comparison/download.sh
@ -3,7 +3,9 @@ set -ex
 set -o pipefail
 trap "exit" INT TERM
 trap 'kill $(jobs -pr) ||:' EXIT
+S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
 BUILD_NAME=${BUILD_NAME:-package_release}
+export S3_URL BUILD_NAME

 mkdir db0 ||:
 mkdir left ||:
@ -28,8 +30,9 @@ function download
    # Historically there were various paths for the performance test package.
    # Test all of them.
    declare -a urls_to_try=(
-        "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
-        "https://s3.amazonaws.com/clickhouse-builds/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
+        "$S3_URL/PRs/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
+        "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tar.zst"
+        "$S3_URL/$left_pr/$left_sha/$BUILD_NAME/performance.tgz"
    )

    for path in "${urls_to_try[@]}"
--- a/docker/test/performance-comparison/entrypoint.sh
+++ b/docker/test/performance-comparison/entrypoint.sh
@ -6,11 +6,7 @@ export CHPC_CHECK_START_TIMESTAMP

 S3_URL=${S3_URL:="https://clickhouse-builds.s3.amazonaws.com"}
 BUILD_NAME=${BUILD_NAME:-package_release}
-
-COMMON_BUILD_PREFIX="/clickhouse_build_check"
-if [[ $S3_URL == *"s3.amazonaws.com"* ]]; then
-    COMMON_BUILD_PREFIX=""
-fi
+export S3_URL BUILD_NAME

 # Sometimes AWS responde with DNS error and it's impossible to retry it with
 # current curl version options.
@ -66,8 +62,9 @@ function find_reference_sha
        # test all of them.
        unset found
        declare -a urls_to_try=(
-            "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
-            "https://s3.amazonaws.com/clickhouse-builds/0/$REF_SHA/$BUILD_NAME/performance.tgz"
+            "$S3_URL/PRs/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
+            "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tar.zst"
+            "$S3_URL/0/$REF_SHA/$BUILD_NAME/performance.tgz"
        )
        for path in "${urls_to_try[@]}"
        do
@ -92,10 +89,15 @@ chmod 777 workspace output
 cd workspace

 # Download the package for the version we are going to test.
-if curl_with_retry "$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
-then
-    right_path="$S3_URL/$PR_TO_TEST/$SHA_TO_TEST$COMMON_BUILD_PREFIX/$BUILD_NAME/performance.tar.zst"
-fi
+# A temporary solution for migrating into PRs directory
+for prefix in "$S3_URL/PRs" "$S3_URL";
+do
+    if curl_with_retry "$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
+    then
+        right_path="$prefix/$PR_TO_TEST/$SHA_TO_TEST/$BUILD_NAME/performance.tar.zst"
+        break
+    fi
+done

 mkdir right
 wget -nv -nd -c "$right_path" -O- | tar -C right --no-same-owner --strip-components=1 --zstd --extract --verbose
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -26,6 +26,7 @@ logging.basicConfig(
 total_start_seconds = time.perf_counter()
 stage_start_seconds = total_start_seconds

+
 # Thread executor that does not hides exception that happens during function
 # execution, and rethrows it after join()
 class SafeThread(Thread):
@ -158,6 +159,7 @@ for e in subst_elems:

    available_parameters[name] = values

+
 # Takes parallel lists of templates, substitutes them with all combos of
 # parameters. The set of parameters is determined based on the first list.
 # Note: keep the order of queries -- sometimes we have DROP IF EXISTS
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -670,7 +670,6 @@ if args.report == "main":
    )

 elif args.report == "all-queries":
-
    print((header_template.format()))

    add_tested_commits()
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@ -10,31 +10,38 @@ import requests
 import tempfile


-DEFAULT_URL = 'https://clickhouse-datasets.s3.amazonaws.com'
+DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"

 AVAILABLE_DATASETS = {
-    'hits': 'hits_v1.tar',
-    'visits': 'visits_v1.tar',
+    "hits": "hits_v1.tar",
+    "visits": "visits_v1.tar",
 }

 RETRIES_COUNT = 5

+
 def _get_temp_file_name():
-    return os.path.join(tempfile._get_default_tempdir(), next(tempfile._get_candidate_names()))
+    return os.path.join(
+        tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
+    )
+

 def build_url(base_url, dataset):
-    return os.path.join(base_url, dataset, 'partitions', AVAILABLE_DATASETS[dataset])
+    return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
+

 def dowload_with_progress(url, path):
    logging.info("Downloading from %s to temp path %s", url, path)
    for i in range(RETRIES_COUNT):
        try:
-            with open(path, 'wb') as f:
+            with open(path, "wb") as f:
                response = requests.get(url, stream=True)
                response.raise_for_status()
-                total_length = response.headers.get('content-length')
+                total_length = response.headers.get("content-length")
                if total_length is None or int(total_length) == 0:
-                    logging.info("No content-length, will download file without progress")
+                    logging.info(
+                        "No content-length, will download file without progress"
+                    )
                    f.write(response.content)
                else:
                    dl = 0
@ -46,7 +53,11 @@ def dowload_with_progress(url, path):
                        if sys.stdout.isatty():
                            done = int(50 * dl / total_length)
                            percent = int(100 * float(dl) / total_length)
-                            sys.stdout.write("\r[{}{}] {}%".format('=' * done, ' ' * (50-done), percent))
+                            sys.stdout.write(
+                                "\r[{}{}] {}%".format(
+                                    "=" * done, " " * (50 - done), percent
+                                )
+                            )
                            sys.stdout.flush()
            break
        except Exception as ex:
@ -56,14 +67,21 @@ def dowload_with_progress(url, path):
            if os.path.exists(path):
                os.remove(path)
    else:
-        raise Exception("Cannot download dataset from {}, all retries exceeded".format(url))
+        raise Exception(
+            "Cannot download dataset from {}, all retries exceeded".format(url)
+        )

    sys.stdout.write("\n")
    logging.info("Downloading finished")

+
 def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
-    logging.info("Will unpack data from temp path %s to clickhouse db %s", tar_path, clickhouse_path)
-    with tarfile.open(tar_path, 'r') as comp_file:
+    logging.info(
+        "Will unpack data from temp path %s to clickhouse db %s",
+        tar_path,
+        clickhouse_path,
+    )
+    with tarfile.open(tar_path, "r") as comp_file:
        comp_file.extractall(path=clickhouse_path)
    logging.info("Unpack finished")

@ -72,15 +90,21 @@ if __name__ == "__main__":
    logging.basicConfig(level=logging.INFO)

    parser = argparse.ArgumentParser(
-        description="Simple tool for dowloading datasets for clickhouse from S3")
+        description="Simple tool for dowloading datasets for clickhouse from S3"
+    )

-    parser.add_argument('--dataset-names', required=True, nargs='+', choices=list(AVAILABLE_DATASETS.keys()))
-    parser.add_argument('--url-prefix', default=DEFAULT_URL)
-    parser.add_argument('--clickhouse-data-path', default='/var/lib/clickhouse/')
+    parser.add_argument(
+        "--dataset-names",
+        required=True,
+        nargs="+",
+        choices=list(AVAILABLE_DATASETS.keys()),
+    )
+    parser.add_argument("--url-prefix", default=DEFAULT_URL)
+    parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")

    args = parser.parse_args()
    datasets = args.dataset_names
-    logging.info("Will fetch following datasets: %s", ', '.join(datasets))
+    logging.info("Will fetch following datasets: %s", ", ".join(datasets))
    for dataset in datasets:
        logging.info("Processing %s", dataset)
        temp_archive_path = _get_temp_file_name()
@ -92,10 +116,11 @@ if __name__ == "__main__":
            logging.info("Some exception occured %s", str(ex))
            raise
        finally:
-            logging.info("Will remove downloaded file %s from filesystem if it exists", temp_archive_path)
+            logging.info(
+                "Will remove downloaded file %s from filesystem if it exists",
+                temp_archive_path,
+            )
            if os.path.exists(temp_archive_path):
                os.remove(temp_archive_path)
        logging.info("Processing of %s finished", dataset)
    logging.info("Fetch finished, enjoy your tables!")
-
-
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -11,13 +11,14 @@ RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
    aspell \
    curl \
    git \
+    file \
    libxml2-utils \
    moreutils \
    python3-fuzzywuzzy \
    python3-pip \
    shellcheck \
    yamllint \
-    && pip3 install black==22.8.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
+    && pip3 install black==23.1.0 boto3 codespell==2.2.1 dohq-artifactory mypy PyGithub unidiff pylint==2.6.2 \
    && apt-get clean \
    && rm -rf /root/.cache/pip

--- a/docs/en/getting-started/example-datasets/covid19.md
+++ b/docs/en/getting-started/example-datasets/covid19.md
@ -0,0 +1,265 @@
+---
+slug: /en/getting-started/example-datasets/covid19
+sidebar_label: COVID-19 Open-Data
+---
+
+# COVID-19 Open-Data
+
+COVID-19 Open-Data attempts to assemble the largest Covid-19 epidemiological database, in addition to a powerful set of expansive covariates. It includes open, publicly sourced, licensed data relating to demographics, economy, epidemiology, geography, health, hospitalizations, mobility, government response, weather, and more.
+
+The details are in GitHub [here](https://github.com/GoogleCloudPlatform/covid-19-open-data).
+
+It's easy to insert this data into ClickHouse...
+
+:::note
+The following commands were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud). You can easily run them on a local install as well.
+:::
+
+1. Let's see what the data looks like:
+
+```sql
+DESCRIBE url(
+    'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
+    'CSVWithNames'
+);
+```
+
+The CSV file has 10 columns:
+
+```response
+┌─name─────────────────┬─type─────────────┐
+│ date                 │ Nullable(String) │
+│ location_key         │ Nullable(String) │
+│ new_confirmed        │ Nullable(Int64)  │
+│ new_deceased         │ Nullable(Int64)  │
+│ new_recovered        │ Nullable(Int64)  │
+│ new_tested           │ Nullable(Int64)  │
+│ cumulative_confirmed │ Nullable(Int64)  │
+│ cumulative_deceased  │ Nullable(Int64)  │
+│ cumulative_recovered │ Nullable(Int64)  │
+│ cumulative_tested    │ Nullable(Int64)  │
+└──────────────────────┴──────────────────┘
+
+10 rows in set. Elapsed: 0.745 sec.
+```
+
+2. Now let's view some of the rows:
+
+```sql
+SELECT *
+FROM url('https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv')
+LIMIT 100;
+```
+
+Notice the `url` function easily reads data from a CSV file:
+
+```response
+┌─c1─────────┬─c2───────────┬─c3────────────┬─c4───────────┬─c5────────────┬─c6─────────┬─c7───────────────────┬─c8──────────────────┬─c9───────────────────┬─c10───────────────┐
+│ date       │ location_key │ new_confirmed │ new_deceased │ new_recovered │ new_tested │ cumulative_confirmed │ cumulative_deceased │ cumulative_recovered │ cumulative_tested │
+│ 2020-04-03 │ AD           │ 24            │ 1            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 466                  │ 17                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-04 │ AD           │ 57            │ 0            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 523                  │ 17                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-05 │ AD           │ 17            │ 4            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 540                  │ 21                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-06 │ AD           │ 11            │ 1            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 551                  │ 22                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-07 │ AD           │ 15            │ 2            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 566                  │ 24                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+│ 2020-04-08 │ AD           │ 23            │ 2            │ ᴺᵁᴸᴸ          │ ᴺᵁᴸᴸ       │ 589                  │ 26                  │ ᴺᵁᴸᴸ                 │ ᴺᵁᴸᴸ              │
+└────────────┴──────────────┴───────────────┴──────────────┴───────────────┴────────────┴──────────────────────┴─────────────────────┴──────────────────────┴───────────────────┘
+```
+
+3. We will create a table now that we know what the data looks like:
+
+```sql
+CREATE TABLE covid19 (
+    date Date,
+    location_key LowCardinality(String),
+    new_confirmed Int32,
+    new_deceased Int32,
+    new_recovered Int32,
+    new_tested Int32,
+    cumulative_confirmed Int32,
+    cumulative_deceased Int32,
+    cumulative_recovered Int32,
+    cumulative_tested Int32
+)
+ENGINE = MergeTree
+ORDER BY (location_key, date);
+```
+
+4. The following command inserts the entire dataset into the `covid19` table:
+
+```sql
+INSERT INTO covid19
+   SELECT *
+   FROM
+      url(
+        'https://storage.googleapis.com/covid19-open-data/v3/epidemiology.csv',
+        CSVWithNames,
+        'date Date,
+        location_key LowCardinality(String),
+        new_confirmed Int32,
+        new_deceased Int32,
+        new_recovered Int32,
+        new_tested Int32,
+        cumulative_confirmed Int32,
+        cumulative_deceased Int32,
+        cumulative_recovered Int32,
+        cumulative_tested Int32'
+    );
+```
+
+5. It goes pretty quick - let's see how many rows were inserted:
+
+```sql
+SELECT formatReadableQuantity(count())
+FROM covid19;
+```
+
+```response
+┌─formatReadableQuantity(count())─┐
+│ 12.53 million                   │
+└─────────────────────────────────┘
+```
+
+6. Let's see how many total cases of Covid-19 were recorded:
+
+```sql
+SELECT formatReadableQuantity(sum(new_confirmed))
+FROM covid19;
+```
+
+```response
+┌─formatReadableQuantity(sum(new_confirmed))─┐
+│ 1.39 billion                               │
+└────────────────────────────────────────────┘
+```
+
+7. You will notice the data has a lot of 0's for dates - either weekends or days where numbers were not reported each day. We can use a window function to smooth out the daily averages of new cases:
+
+```sql
+SELECT
+   AVG(new_confirmed) OVER (PARTITION BY location_key ORDER BY date ROWS BETWEEN 2 PRECEDING AND 2 FOLLOWING) AS cases_smoothed,
+   new_confirmed,
+   location_key,
+   date
+FROM covid19;
+```
+
+8. This query determines the latest values for each location. We can't use `max(date)` because not all countries reported every day, so we grab the last row using `ROW_NUMBER`:
+
+```sql
+WITH latest_deaths_data AS
+   ( SELECT location_key,
+            date,
+            new_deceased,
+            new_confirmed,
+            ROW_NUMBER() OVER (PARTITION BY location_key ORDER BY date DESC) as rn
+     FROM covid19)
+SELECT location_key,
+       date,
+       new_deceased,
+       new_confirmed,
+       rn
+FROM latest_deaths_data
+WHERE rn=1;
+```
+
+9. We can use `lagInFrame` to determine the `LAG` of new cases each day. In this query we filter by the `US_DC` location:
+
+```sql
+SELECT
+   new_confirmed - lagInFrame(new_confirmed,1) OVER (PARTITION BY location_key ORDER BY date) AS confirmed_cases_delta,
+   new_confirmed,
+   location_key,
+   date
+FROM covid19
+WHERE location_key = 'US_DC';
+```
+
+The response look like:
+
+```response
+┌─confirmed_cases_delta─┬─new_confirmed─┬─location_key─┬───────date─┐
+│                     0 │             0 │ US_DC        │ 2020-03-08 │
+│                     2 │             2 │ US_DC        │ 2020-03-09 │
+│                    -2 │             0 │ US_DC        │ 2020-03-10 │
+│                     6 │             6 │ US_DC        │ 2020-03-11 │
+│                    -6 │             0 │ US_DC        │ 2020-03-12 │
+│                     0 │             0 │ US_DC        │ 2020-03-13 │
+│                     6 │             6 │ US_DC        │ 2020-03-14 │
+│                    -5 │             1 │ US_DC        │ 2020-03-15 │
+│                     4 │             5 │ US_DC        │ 2020-03-16 │
+│                     4 │             9 │ US_DC        │ 2020-03-17 │
+│                    -1 │             8 │ US_DC        │ 2020-03-18 │
+│                    24 │            32 │ US_DC        │ 2020-03-19 │
+│                   -26 │             6 │ US_DC        │ 2020-03-20 │
+│                    15 │            21 │ US_DC        │ 2020-03-21 │
+│                    -3 │            18 │ US_DC        │ 2020-03-22 │
+│                     3 │            21 │ US_DC        │ 2020-03-23 │
+```
+
+10. This query calculates the percentage of change in new cases each day, and includes a simple `increase` or `decrease` column in the result set:
+
+```sql
+WITH confirmed_lag AS (
+  SELECT
+    *,
+    lagInFrame(new_confirmed) OVER(
+      PARTITION BY location_key
+      ORDER BY date
+    ) AS confirmed_previous_day
+  FROM covid19
+),
+confirmed_percent_change AS (
+  SELECT
+    *,
+    COALESCE(ROUND((new_confirmed - confirmed_previous_day) / confirmed_previous_day * 100), 0) AS percent_change
+  FROM confirmed_lag
+)
+SELECT
+  date,
+  new_confirmed,
+  percent_change,
+  CASE
+    WHEN percent_change > 0 THEN 'increase'
+    WHEN percent_change = 0 THEN 'no change'
+    ELSE 'decrease'
+  END AS trend
+FROM confirmed_percent_change
+WHERE location_key = 'US_DC';
+```
+
+The results look like
+
+```response
+┌───────date─┬─new_confirmed─┬─percent_change─┬─trend─────┐
+│ 2020-03-08 │             0 │            nan │ decrease  │
+│ 2020-03-09 │             2 │            inf │ increase  │
+│ 2020-03-10 │             0 │           -100 │ decrease  │
+│ 2020-03-11 │             6 │            inf │ increase  │
+│ 2020-03-12 │             0 │           -100 │ decrease  │
+│ 2020-03-13 │             0 │            nan │ decrease  │
+│ 2020-03-14 │             6 │            inf │ increase  │
+│ 2020-03-15 │             1 │            -83 │ decrease  │
+│ 2020-03-16 │             5 │            400 │ increase  │
+│ 2020-03-17 │             9 │             80 │ increase  │
+│ 2020-03-18 │             8 │            -11 │ decrease  │
+│ 2020-03-19 │            32 │            300 │ increase  │
+│ 2020-03-20 │             6 │            -81 │ decrease  │
+│ 2020-03-21 │            21 │            250 │ increase  │
+│ 2020-03-22 │            18 │            -14 │ decrease  │
+│ 2020-03-23 │            21 │             17 │ increase  │
+│ 2020-03-24 │            46 │            119 │ increase  │
+│ 2020-03-25 │            48 │              4 │ increase  │
+│ 2020-03-26 │            36 │            -25 │ decrease  │
+│ 2020-03-27 │            37 │              3 │ increase  │
+│ 2020-03-28 │            38 │              3 │ increase  │
+│ 2020-03-29 │            59 │             55 │ increase  │
+│ 2020-03-30 │            94 │             59 │ increase  │
+│ 2020-03-31 │            91 │             -3 │ decrease  │
+│ 2020-04-01 │            67 │            -26 │ decrease  │
+│ 2020-04-02 │           104 │             55 │ increase  │
+│ 2020-04-03 │           145 │             39 │ increase  │
+```
+
+:::note
+As mentioned in the [GitHub repo](https://github.com/GoogleCloudPlatform/covid-19-open-data), the datset is no longer updated as of September 15, 2022.
+:::
--- a/docs/en/getting-started/example-datasets/youtube-dislikes.md
+++ b/docs/en/getting-started/example-datasets/youtube-dislikes.md
@ -0,0 +1,219 @@
+---
+slug: /en/getting-started/example-datasets/youtube-dislikes
+sidebar_label: YouTube Dislikes
+description: A collection is dislikes of YouTube videos.
+---
+
+# YouTube dataset of dislikes
+
+In November of 2021, YouTube removed the public ***dislike*** count from all of its videos. While creators can still see the number of dislikes, viewers can only see how many ***likes*** a video has received.
+
+:::important
+The dataset has over 4.55 billion records, so be careful just copying-and-pasting the commands below unless your resources can handle that type of volume. The commands below were executed on a **Production** instance of [ClickHouse Cloud](https://clickhouse.cloud).
+:::
+
+The data is in a JSON format and can be downloaded from [archive.org](https://archive.org/download/dislikes_youtube_2021_12_video_json_files). We have made this same data available in S3 so that it can be downloaded more efficiently into a ClickHouse Cloud instance.
+
+Here are the steps to create a table in ClickHouse Cloud and insert the data.
+
+:::note
+The steps below will easily work on a local install of ClickHouse too. The only change would be to use the `s3` function instead of `s3cluster` (unless you have a cluster configured - in which case change `default` to the name of your cluster).
+:::
+
+## Step-by-step instructions
+
+1. Let's see what the data looks like. The `s3cluster` table function returns a table, so we can `DESCRIBE` the reult:
+
+```sql
+DESCRIBE s3Cluster(
+    'default',
+    'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
+    'JSONLines'
+);
+```
+
+ClickHouse infers the following schema from the JSON file:
+
+```response
+┌─name────────────────┬─type─────────────────────────────────┐
+│ id                  │ Nullable(String)                     │
+│ fetch_date          │ Nullable(Int64)                      │
+│ upload_date         │ Nullable(String)                     │
+│ title               │ Nullable(String)                     │
+│ uploader_id         │ Nullable(String)                     │
+│ uploader            │ Nullable(String)                     │
+│ uploader_sub_count  │ Nullable(Int64)                      │
+│ is_age_limit        │ Nullable(Bool)                       │
+│ view_count          │ Nullable(Int64)                      │
+│ like_count          │ Nullable(Int64)                      │
+│ dislike_count       │ Nullable(Int64)                      │
+│ is_crawlable        │ Nullable(Bool)                       │
+│ is_live_content     │ Nullable(Bool)                       │
+│ has_subtitles       │ Nullable(Bool)                       │
+│ is_ads_enabled      │ Nullable(Bool)                       │
+│ is_comments_enabled │ Nullable(Bool)                       │
+│ description         │ Nullable(String)                     │
+│ rich_metadata       │ Array(Map(String, Nullable(String))) │
+│ super_titles        │ Array(Map(String, Nullable(String))) │
+│ uploader_badges     │ Nullable(String)                     │
+│ video_badges        │ Nullable(String)                     │
+└─────────────────────┴──────────────────────────────────────┘
+```
+
+2. Based on the inferred schema, we cleaned up the data types and added a primary key. Define the following table:
+
+```sql
+CREATE TABLE youtube
+(
+    `id` String,
+    `fetch_date` DateTime,
+    `upload_date` String,
+    `title` String,
+    `uploader_id` String,
+    `uploader` String,
+    `uploader_sub_count` Int64,
+    `is_age_limit` Bool,
+    `view_count` Int64,
+    `like_count` Int64,
+    `dislike_count` Int64,
+    `is_crawlable` Bool,
+    `has_subtitles` Bool,
+    `is_ads_enabled` Bool,
+    `is_comments_enabled` Bool,
+    `description` String,
+    `rich_metadata` Array(Map(String, String)),
+    `super_titles` Array(Map(String, String)),
+    `uploader_badges` String,
+    `video_badges` String
+)
+ENGINE = MergeTree
+ORDER BY (upload_date, uploader);
+```
+
+3. The following command streams the records from the S3 files into the `youtube` table.
+
+:::important
+This inserts a lot of data - 4.65 billion rows. If you do not want the entire dataset, simply add a `LIMIT` clause with the desired number of rows.
+:::
+
+```sql
+INSERT INTO youtube
+SETTINGS input_format_null_as_default = 1
+SELECT
+    id,
+    parseDateTimeBestEffortUS(toString(fetch_date)) AS fetch_date,
+    upload_date,
+    ifNull(title, '') AS title,
+    uploader_id,
+    ifNull(uploader, '') AS uploader,
+    uploader_sub_count,
+    is_age_limit,
+    view_count,
+    like_count,
+    dislike_count,
+    is_crawlable,
+    has_subtitles,
+    is_ads_enabled,
+    is_comments_enabled,
+    ifNull(description, '') AS description,
+    rich_metadata,
+    super_titles,
+    ifNull(uploader_badges, '') AS uploader_badges,
+    ifNull(video_badges, '') AS video_badges
+FROM s3Cluster(
+       'default',
+       'https://clickhouse-public-datasets.s3.amazonaws.com/youtube/original/files/*.zst',
+       'JSONLines'
+    );
+```
+
+4. Open a new tab in the SQL Console of ClickHouse Cloud (or a new `clickhouse-client` window) and watch the count increase. It will take a while to insert 4.56B rows, depending on your server resources. (Withtout any tweaking of settings, it takes about 4.5 hours.)
+
+```sql
+SELECT formatReadableQuantity(count())
+FROM youtube
+```
+
+```response
+┌─formatReadableQuantity(count())─┐
+│ 4.56 billion                    │
+└─────────────────────────────────┘
+```
+
+5. Once the data is inserted, go ahead and count the number of dislikes of your favorite videos or channels. Let's see how many videos were uploaded by ClickHouse:
+
+```sql
+SELECT count()
+FROM youtube
+WHERE uploader = 'ClickHouse';
+```
+
+```response
+┌─count()─┐
+│      84 │
+└─────────┘
+
+1 row in set. Elapsed: 0.570 sec. Processed 237.57 thousand rows, 5.77 MB (416.54 thousand rows/s., 10.12 MB/s.)
+```
+
+:::note
+The query above runs so quickly because we chose `uploader` as the first column of the primary key - so it only had to process 237k rows.
+:::
+
+6. Let's look and likes and dislikes of ClickHouse videos:
+
+```sql
+SELECT
+    title,
+    like_count,
+    dislike_count
+FROM youtube
+WHERE uploader = 'ClickHouse'
+ORDER BY dislike_count DESC;
+```
+
+The response looks like:
+
+```response
+┌─title────────────────────────────────────────────────────────────────────────────────────────────────┬─like_count─┬─dislike_count─┐
+│ ClickHouse v21.11 Release Webinar                                                                    │         52 │             3 │
+│ ClickHouse Introduction                                                                              │         97 │             3 │
+│ Casa Modelo Algarve                                                                                  │        180 │             3 │
+│ Профайлер запросов:  трудный путь                                                                    │         33 │             3 │
+│ ClickHouse в Курсометре                                                                              │          4 │             2 │
+│ 10 Good Reasons to Use ClickHouse                                                                    │         27 │             2 │
+...
+
+84 rows in set. Elapsed: 0.013 sec. Processed 155.65 thousand rows, 16.94 MB (11.96 million rows/s., 1.30 GB/s.)
+```
+
+7. Here is a search for videos with **ClickHouse** in the `title` or `description` fields:
+
+```sql
+SELECT
+    view_count,
+    like_count,
+    dislike_count,
+    concat('https://youtu.be/', id) AS url,
+    title
+FROM youtube
+WHERE (title ILIKE '%ClickHouse%') OR (description ILIKE '%ClickHouse%')
+ORDER BY
+    like_count DESC,
+    view_count DESC
+```
+
+This query has to process every row, and also parse through two columns of strings. Even then, we get decent performance at 4.15M rows/second:
+
+```response
+1174 rows in set. Elapsed: 1099.368 sec. Processed 4.56 billion rows, 1.98 TB (4.15 million rows/s., 1.80 GB/s.)
+```
+
+The results look like:
+
+```response
+┌─view_count─┬─like_count─┬─dislike_count─┬─url──────────────────────────┬─title──────────────────────────────────────────────────────────────────────────────────────────────────┐
+│       1919 │         63 │             1 │ https://youtu.be/b9MeoOtAivQ │ ClickHouse v21.10 Release Webinar                                                                      │
+│       8710 │         62 │             4 │ https://youtu.be/PeV1mC2z--M │ What is JDBC DriverManager? | JDBC                                                                     │
+│       3534 │         62 │             1 │ https://youtu.be/8nWRhK9gw10 │ CLICKHOUSE - Arquitetura Modular                                                                       │
+```
--- a/docs/en/operations/external-authenticators/ldap.md
+++ b/docs/en/operations/external-authenticators/ldap.md
@ -120,7 +120,7 @@ Query:
 CREATE USER my_user IDENTIFIED WITH ldap SERVER 'my_ldap_server';
 ```

-## LDAP Exernal User Directory {#ldap-external-user-directory}
+## LDAP External User Directory {#ldap-external-user-directory}

 In addition to the locally defined users, a remote LDAP server can be used as a source of user definitions. To achieve this, specify previously defined LDAP server name (see [LDAP Server Definition](#ldap-server-definition)) in the `ldap` section inside the `users_directories` section of the `config.xml` file.

--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@ -85,8 +85,8 @@ make the matching more natural, all query-level settings related to the query ca

 If the query was aborted due to an exception or user cancellation, no entry is written into the query cache.

-The size of the query cache, the maximum number of cache entries and the maximum size of cache entries (in bytes and in records) can
-be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).
+The size of the query cache in bytes, the maximum number of cache entries and the maximum size of individual cache entries (in bytes and in
+records) can be configured using different [server configuration options](server-configuration-parameters/settings.md#server_configuration_parameters_query-cache).

 To define how long a query must run at least such that its result can be cached, you can use setting
 [query_cache_min_query_duration](settings/settings.md#query-cache-min-query-duration). For example, the result of query
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1361,7 +1361,7 @@ If the table does not exist, ClickHouse will create it. If the structure of the

 The following settings are available:

-   `size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
+-   `max_size`: The maximum cache size in bytes. 0 means the query cache is disabled. Default value: `1073741824` (1 GiB).
 -   `max_entries`: The maximum number of `SELECT` query results stored in the cache. Default value: `1024`.
 -   `max_entry_size`: The maximum size in bytes `SELECT` query results may have to be saved in the cache. Default value: `1048576` (1 MiB).
 -   `max_entry_rows`: The maximum number of rows `SELECT` query results may have to be saved in the cache. Default value: `30000000` (30 mil).
@ -1369,7 +1369,7 @@ The following settings are available:
 Changed settings take effect immediately.

 :::warning
-Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `size` or disable the query cache altogether.
+Data for the query cache is allocated in DRAM. If memory is scarce, make sure to set a small value for `max_size` or disable the query cache altogether.
 :::

 **Example**
@ -1882,6 +1882,16 @@ The update is performed asynchronously, in a separate system thread.
 Manage executing [distributed ddl queries](../../sql-reference/distributed-ddl.md)  (CREATE, DROP, ALTER, RENAME) on cluster.
 Works only if [ZooKeeper](#server-settings_zookeeper) is enabled.

+The configurable settings within `<distributed_ddl>` include:
+
+- **path**: the path in Keeper for the `task_queue` for DDL queries
+- **profile**: the profile used to execute the DDL queries
+- **pool_size**: how many `ON CLUSTER` queries can be run simultaneously
+- **max_tasks_in_queue**: the maximum number of tasks that can be in the queue. Default is 1,000
+- **task_max_lifetime**: delete node if its age is greater than this value. Default is `7 * 24 * 60 * 60` (a week in seconds)
+- **cleanup_delay_period**:  cleaning starts after new node event is received if the last cleaning wasn't made sooner than `cleanup_delay_period` seconds ago. Default is 60 seconds
+
+
 **Example**

 ```xml
--- a/docs/en/operations/system-tables/dropped_tables.md
+++ b/docs/en/operations/system-tables/dropped_tables.md
@ -0,0 +1,37 @@
+---
+slug: /en/operations/system-tables/dropped_tables
+---
+# dropped_tables
+
+Contains information about tables that drop table has been executed but data cleanup has not been actually performed.
+
+Columns:
+
+-   `index` ([UInt32](../../sql-reference/data-types/int-uint.md)) — Index in marked_dropped_tables queue.
+-   `database` ([String](../../sql-reference/data-types/string.md)) — Database.
+-   `table` ([String](../../sql-reference/data-types/string.md)) — Table name.
+-   `uuid` ([UUID](../../sql-reference/data-types/uuid.md)) — Table uuid.
+-   `engine` ([String](../../sql-reference/data-types/string.md)) — Table engine name.
+-   `metadata_dropped_path` ([String](../../sql-reference/data-types/string.md)) — Path of table's metadata file in metadate_dropped directory.
+-   `table_dropped_time` ([DateTime](../../sql-reference/data-types/datetime.md)) — The time when the next attempt to remove table's data is scheduled on. Usually it's the table when the table was dropped plus `database_atomic_delay_before_drop_table_sec`
+
+**Example**
+
+The following example shows how to get information about dropped_tables.
+
+``` sql
+SELECT *
+FROM system.dropped_tables\G
+```
+
+``` text
+Row 1:
+──────
+index:                 0
+database:              default
+table:                 test
+uuid:                  03141bb2-e97a-4d7c-a172-95cc066bb3bd
+engine:                MergeTree
+metadata_dropped_path: /data/ClickHouse/build/programs/data/metadata_dropped/default.test.03141bb2-e97a-4d7c-a172-95cc066bb3bd.sql
+table_dropped_time:    2023-03-16 23:43:31
+```
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -1238,7 +1238,7 @@ Formats a Time according to the given Format string. Format is a constant expres

 formatDateTime uses MySQL datetime format style, refer to https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format.

-The opposite operation of this function is [formatDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#formatdatetime).
+The opposite operation of this function is [parseDateTime](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTime).

 Alias: `DATE_FORMAT`.

@ -1334,7 +1334,7 @@ Result:

 Similar to formatDateTime, except that it formats datetime in Joda style instead of MySQL style. Refer to https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html.

-The opposite operation of this function is [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#formatdatetimeinjodasyntax).
+The opposite operation of this function is [parseDateTimeInJodaSyntax](/docs/en/sql-reference/functions/type-conversion-functions.md#type_conversion_functions-parseDateTimeInJodaSyntax).

 **Replacement fields**

--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@ -1148,9 +1148,10 @@ Result:
 └───────────────────────────┴──────────────────────────────┘
 ```

-## parseDateTime
+## parseDateTime {#type_conversion_functions-parseDateTime}

 Converts a [String](/docs/en/sql-reference/data-types/string.md) to [DateTime](/docs/en/sql-reference/data-types/datetime.md) according to a [MySQL format string](https://dev.mysql.com/doc/refman/8.0/en/date-and-time-functions.html#function_date-format).
+
 This function is the opposite operation of function [formatDateTime](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTime).

 **Syntax**
@ -1163,6 +1164,7 @@ parseDateTime(str, format[, timezone])

 -   `str` — the String to be parsed
 -   `format` — the format string
+-   `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.

 **Returned value(s)**

@ -1186,9 +1188,10 @@ SELECT parseDateTime('2021-01-04+23:00:00', '%Y-%m-%d+%H:%i:%s')

 Alias: `TO_TIMESTAMP`.

-## parseDateTimeInJodaSyntax
+## parseDateTimeInJodaSyntax {#type_conversion_functions-parseDateTimeInJodaSyntax}

 Similar to [parseDateTime](#parsedatetime), except that the format string is in [Joda](https://joda-time.sourceforge.net/apidocs/org/joda/time/format/DateTimeFormat.html) instead of MySQL syntax.
+
 This function is the opposite operation of function [formatDateTimeInJodaSyntax](/docs/en/sql-reference/functions/date-time-functions.md#date_time_functions-formatDateTimeInJodaSyntax).

 **Syntax**
@ -1201,6 +1204,7 @@ parseDateTimeInJodaSyntax(str, format[, timezone])

 -   `str` — the String to be parsed
 -   `format` — the format string
+-   `timezone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). Optional.

 **Returned value(s)**

--- a/docs/en/sql-reference/statements/alter/projection.md
+++ b/docs/en/sql-reference/statements/alter/projection.md
@ -128,7 +128,7 @@ SELECT
    user_agent,
    sum(pages_visited)
 FROM visits
-GROUP BY user_id
+GROUP BY user_agent
 ```

 As mentioned before, we could review the `system.query_log` table. On the `projections` field we have the name of the projection used or empty if none has been used:
--- a/docs/en/sql-reference/statements/create/user.md
+++ b/docs/en/sql-reference/statements/create/user.md
@ -36,7 +36,61 @@ There are multiple ways of user identification:
 -   `IDENTIFIED WITH kerberos` or `IDENTIFIED WITH kerberos REALM 'realm'`
 -   `IDENTIFIED WITH ssl_certificate CN 'mysite.com:user'`

-For identification with sha256_hash using `SALT` - hash must be calculated from concatination of 'password' and 'salt'.
+## Examples
+
+1. The following username is `name1` and does not require a password - which obviously doesn't provide much security:
+
+    ```sql
+    CREATE USER name1 NOT IDENTIFIED
+    ```
+
+2. To specify a plaintext password:
+
+    ```sql
+    CREATE USER name2 IDENTIFIED WITH plaintext_password BY 'my_password'
+    ```
+
+    :::warning
+    The password is stored in a SQL text file in `/var/lib/clickhouse/access`, so it's not a good idea to use `plaintext_password`. Try `sha256_password` instead, as demonstrated next...
+    :::
+
+3. The best option is to use a password that is hashed using SHA-256. ClickHouse will hash the password for you when you specify `IDENTIFIED WITH sha256_password`. For example:
+
+    ```sql
+    CREATE USER name3 IDENTIFIED WITH sha256_password BY 'my_password'
+    ```
+
+    Notice ClickHouse generates and runs the following command for you:
+
+    ```response
+    CREATE USER name3
+    IDENTIFIED WITH sha256_hash
+    BY '8B3404953FCAA509540617F082DB13B3E0734F90FF6365C19300CC6A6EA818D6'
+    SALT 'D6489D8B5692D82FF944EA6415785A8A8A1AF33825456AFC554487725A74A609'
+    ```
+
+    The `name3` user can now login using `my_password`, but the password is stored as the hashed value above. THe following SQL file was created in `/var/lib/clickhouse/access` and gets executed at server startup:
+
+    ```bash
+    /var/lib/clickhouse/access $ cat 3843f510-6ebd-a52d-72ac-e021686d8a93.sql
+    ATTACH USER name3 IDENTIFIED WITH sha256_hash BY '0C268556C1680BEF0640AAC1E7187566704208398DA31F03D18C74F5C5BE5053' SALT '4FB16307F5E10048196966DD7E6876AE53DE6A1D1F625488482C75F14A5097C7';
+    ```
+
+    :::note
+    If you have already created a hash value and corresponding salt value for a username, then you can use `IDENTIFIED WITH sha256_hash BY 'hash'` or `IDENTIFIED WITH sha256_hash BY 'hash' SALT 'salt'`. For identification with `sha256_hash` using `SALT` - hash must be calculated from concatenation of 'password' and 'salt'.
+    :::
+
+4. The `double_sha1_password` is not typically needed, but comes in handy when working with clients that require it (like the MySQL interface):
+
+    ```sql
+    CREATE USER name4 IDENTIFIED WITH double_sha1_password BY 'my_password'
+    ```
+
+    ClickHouse generates and runs the following query:
+
+    ```response
+    CREATE USER name4 IDENTIFIED WITH double_sha1_hash BY 'CCD3A959D6A004B9C3807B728BC2E55B67E10518'
+    ```

 ## User Host

--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@ -21,13 +21,13 @@ Note that if you use this statement to get `CREATE` query of system tables, you
 Prints a list of all databases.

 ```sql
-SHOW DATABASES [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
+SHOW DATABASES [[NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
 ```

 This statement is identical to the query:

 ```sql
-SELECT name FROM system.databases [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
+SELECT name FROM system.databases [WHERE name [NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE filename] [FORMAT format]
 ```

 ### Examples
@ -117,7 +117,7 @@ $ watch -n1 "clickhouse-client --query='SHOW PROCESSLIST'"
 Displays a list of tables.

 ```sql
-SHOW [TEMPORARY] TABLES [{FROM | IN} <db>] [LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
+SHOW [FULL] [TEMPORARY] TABLES [{FROM | IN} <db>] [[NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
 ```

 If the `FROM` clause is not specified, the query returns the list of tables from the current database.
@ -125,7 +125,7 @@ If the `FROM` clause is not specified, the query returns the list of tables from
 This statement is identical to the query:

 ```sql
-SELECT name FROM system.tables [WHERE name LIKE | ILIKE | NOT LIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
+SELECT name FROM system.tables [WHERE name [NOT] LIKE | ILIKE '<pattern>'] [LIMIT <N>] [INTO OUTFILE <filename>] [FORMAT <format>]
 ```

 ### Examples
@ -370,7 +370,7 @@ Returns a list of clusters. All available clusters are listed in the [system.clu

 ``` sql
 SHOW CLUSTER '<name>'
-SHOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
+SHOW CLUSTERS [[NOT] LIKE|ILIKE '<pattern>'] [LIMIT <N>]
 ```
 ### Examples

--- a/docs/zh/faq/general/columnar-database.md
+++ b/docs/zh/faq/general/columnar-database.md
@ -7,20 +7,20 @@ sidebar_position: 101

 # 什么是列存储数据库? {#what-is-a-columnar-database}

-列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是，影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse就是这样一个典型的例子。
+列存储数据库独立存储每个列的数据。这只允许从磁盘读取任何给定查询中使用的列的数据。其代价是，影响整行的操作会按比例变得更昂贵。列存储数据库的同义词是面向列的数据库管理系统。ClickHouse 就是这样一个典型的例子。

 列存储数据库的主要优点是:

 - 查询只使用许多列其中的少数列。
-— 聚合对大量数据的查询。
-— 按列压缩。
+- 聚合对大量数据的查询。
+- 按列压缩。

 下面是构建报表时传统的面向行系统和柱状数据库之间的区别:

 **传统行存储**
-!(传统行存储)(https://clickhouse.com/docs/en/images/row-oriented.gif)
+![传统行存储](https://clickhouse.com/docs/assets/images/row-oriented-3e6fd5aa48e3075202d242b4799da8fa.gif)

 **列存储**
-!(列存储)(https://clickhouse.com/docs/en/images/column-oriented.gif)
+![列存储](https://clickhouse.com/docs/assets/images/column-oriented-d082e49b7743d4ded32c7952bfdb028f.gif)

-列存储数据库是分析应用程序的首选，因为它允许在一个表中有许多列以防万一，但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的，因为和数据仓库一样，它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。
+列存储数据库是分析应用程序的首选，因为它允许在一个表中有许多列以防万一，但不会在读取查询执行时为未使用的列付出代价。面向列的数据库是为大数据处理而设计的，因为和数据仓库一样，它们通常使用分布式的低成本硬件集群来提高吞吐量。ClickHouse 结合了[分布式](../../engines/table-engines/special/distributed.md)和[复制式](../../engines/table-engines/mergetree-family/replication.md)两类表。
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -1517,7 +1517,7 @@

    <!-- Configuration for the query cache -->
    <!-- <query_cache> -->
-    <!--     <size>1073741824</size> -->
+    <!--     <max_size>1073741824</max_size> -->
    <!--     <max_entries>1024</max_entries> -->
    <!--     <max_entry_size>1048576</max_entry_size> -->
    <!--     <max_entry_rows>30000000</max_entry_rows> -->
--- a/src/Access/ReplicatedAccessStorage.cpp
+++ b/src/Access/ReplicatedAccessStorage.cpp
@ -674,18 +674,16 @@ void ReplicatedAccessStorage::backup(BackupEntriesCollector & backup_entries_col
        backup_entries_collector.getContext()->getAccessControl());

    auto backup_coordination = backup_entries_collector.getBackupCoordination();
-    String current_host_id = backup_entries_collector.getBackupSettings().host_id;
-    backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, current_host_id, backup_entry_with_path.first);
+    backup_coordination->addReplicatedAccessFilePath(zookeeper_path, type, backup_entry_with_path.first);

    backup_entries_collector.addPostTask(
        [backup_entry = backup_entry_with_path.second,
         zookeeper_path = zookeeper_path,
         type,
-         current_host_id,
         &backup_entries_collector,
         backup_coordination]
        {
-            for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type, current_host_id))
+            for (const String & path : backup_coordination->getReplicatedAccessFilePaths(zookeeper_path, type))
                backup_entries_collector.addBackupEntry(path, backup_entry);
        });
 }
--- a/src/Analyzer/ArrayJoinNode.cpp
+++ b/src/Analyzer/ArrayJoinNode.cpp
@ -49,7 +49,7 @@ QueryTreeNodePtr ArrayJoinNode::cloneImpl() const
    return std::make_shared<ArrayJoinNode>(getTableExpression(), getJoinExpressionsNode(), is_left);
 }

-ASTPtr ArrayJoinNode::toASTImpl() const
+ASTPtr ArrayJoinNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto array_join_ast = std::make_shared<ASTArrayJoin>();
    array_join_ast->kind = is_left ? ASTArrayJoin::Kind::Left : ASTArrayJoin::Kind::Inner;
@ -63,9 +63,9 @@ ASTPtr ArrayJoinNode::toASTImpl() const

        auto * column_node = array_join_expression->as<ColumnNode>();
        if (column_node && column_node->getExpression())
-            array_join_expression_ast = column_node->getExpression()->toAST();
+            array_join_expression_ast = column_node->getExpression()->toAST(options);
        else
-            array_join_expression_ast = array_join_expression->toAST();
+            array_join_expression_ast = array_join_expression->toAST(options);

        array_join_expression_ast->setAlias(array_join_expression->getAlias());
        array_join_expressions_ast->children.push_back(std::move(array_join_expression_ast));
@ -75,7 +75,7 @@ ASTPtr ArrayJoinNode::toASTImpl() const
    array_join_ast->expression_list = array_join_ast->children.back();

    ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
-    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index]);
+    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[table_expression_child_index], options);

    auto array_join_query_element_ast = std::make_shared<ASTTablesInSelectQueryElement>();
    array_join_query_element_ast->children.push_back(std::move(array_join_ast));
--- a/src/Analyzer/ArrayJoinNode.h
+++ b/src/Analyzer/ArrayJoinNode.h
@ -99,7 +99,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    bool is_left = false;
--- a/src/Analyzer/ColumnNode.cpp
+++ b/src/Analyzer/ColumnNode.cpp
@ -91,12 +91,12 @@ QueryTreeNodePtr ColumnNode::cloneImpl() const
    return std::make_shared<ColumnNode>(column, getSourceWeakPointer());
 }

-ASTPtr ColumnNode::toASTImpl() const
+ASTPtr ColumnNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    std::vector<std::string> column_identifier_parts;

    auto column_source = getColumnSourceOrNull();
-    if (column_source)
+    if (column_source && options.fully_qualified_identifiers)
    {
        auto node_type = column_source->getNodeType();
        if (node_type == QueryTreeNodeType::TABLE ||
--- a/src/Analyzer/ColumnNode.h
+++ b/src/Analyzer/ColumnNode.h
@ -132,7 +132,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    const QueryTreeNodeWeakPtr & getSourceWeakPointer() const
--- a/src/Analyzer/ColumnTransformers.cpp
+++ b/src/Analyzer/ColumnTransformers.cpp
@ -91,7 +91,7 @@ QueryTreeNodePtr ApplyColumnTransformerNode::cloneImpl() const
    return std::make_shared<ApplyColumnTransformerNode>(getExpressionNode());
 }

-ASTPtr ApplyColumnTransformerNode::toASTImpl() const
+ASTPtr ApplyColumnTransformerNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto ast_apply_transformer = std::make_shared<ASTColumnsApplyTransformer>();
    const auto & expression_node = getExpressionNode();
@ -100,14 +100,14 @@ ASTPtr ApplyColumnTransformerNode::toASTImpl() const
    {
        auto & function_expression = expression_node->as<FunctionNode &>();
        ast_apply_transformer->func_name = function_expression.getFunctionName();
-        ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST();
+        ast_apply_transformer->parameters = function_expression.getParametersNode()->toAST(options);
    }
    else
    {
        auto & lambda_expression = expression_node->as<LambdaNode &>();
        if (!lambda_expression.getArgumentNames().empty())
            ast_apply_transformer->lambda_arg = lambda_expression.getArgumentNames()[0];
-        ast_apply_transformer->lambda = lambda_expression.toAST();
+        ast_apply_transformer->lambda = lambda_expression.toAST(options);
    }

    return ast_apply_transformer;
@ -227,7 +227,7 @@ QueryTreeNodePtr ExceptColumnTransformerNode::cloneImpl() const
    return std::make_shared<ExceptColumnTransformerNode>(except_column_names, is_strict);
 }

-ASTPtr ExceptColumnTransformerNode::toASTImpl() const
+ASTPtr ExceptColumnTransformerNode::toASTImpl(const ConvertToASTOptions & /* options */) const
 {
    auto ast_except_transformer = std::make_shared<ASTColumnsExceptTransformer>();

@ -334,7 +334,7 @@ QueryTreeNodePtr ReplaceColumnTransformerNode::cloneImpl() const
    return result_replace_transformer;
 }

-ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
+ASTPtr ReplaceColumnTransformerNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto ast_replace_transformer = std::make_shared<ASTColumnsReplaceTransformer>();

@ -347,7 +347,7 @@ ASTPtr ReplaceColumnTransformerNode::toASTImpl() const
    {
        auto replacement_ast = std::make_shared<ASTColumnsReplaceTransformer::Replacement>();
        replacement_ast->name = replacements_names[i];
-        replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST());
+        replacement_ast->children.push_back(replacement_expressions_nodes[i]->toAST(options));
        ast_replace_transformer->children.push_back(std::move(replacement_ast));
    }

--- a/src/Analyzer/ColumnTransformers.h
+++ b/src/Analyzer/ColumnTransformers.h
@ -141,7 +141,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    ApplyColumnTransformerType apply_transformer_type = ApplyColumnTransformerType::LAMBDA;
@ -220,7 +220,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    ExceptColumnTransformerType except_transformer_type;
@ -298,7 +298,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    ListNode & getReplacements()
--- a/src/Analyzer/ConstantNode.cpp
+++ b/src/Analyzer/ConstantNode.cpp
@ -75,11 +75,14 @@ QueryTreeNodePtr ConstantNode::cloneImpl() const
    return std::make_shared<ConstantNode>(constant_value, source_expression);
 }

-ASTPtr ConstantNode::toASTImpl() const
+ASTPtr ConstantNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    const auto & constant_value_literal = constant_value->getValue();
    auto constant_value_ast = std::make_shared<ASTLiteral>(constant_value_literal);

+    if (!options.add_cast_for_constants)
+        return constant_value_ast;
+
    bool need_to_add_cast_function = false;
    auto constant_value_literal_type = constant_value_literal.getType();
    WhichDataType constant_value_type(constant_value->getType());
--- a/src/Analyzer/ConstantNode.h
+++ b/src/Analyzer/ConstantNode.h
@ -83,7 +83,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    ConstantValuePtr constant_value;
--- a/src/Analyzer/FunctionNode.cpp
+++ b/src/Analyzer/FunctionNode.cpp
@ -197,7 +197,7 @@ QueryTreeNodePtr FunctionNode::cloneImpl() const
    return result_function;
 }

-ASTPtr FunctionNode::toASTImpl() const
+ASTPtr FunctionNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto function_ast = std::make_shared<ASTFunction>();

@ -212,12 +212,12 @@ ASTPtr FunctionNode::toASTImpl() const
    const auto & parameters = getParameters();
    if (!parameters.getNodes().empty())
    {
-        function_ast->children.push_back(parameters.toAST());
+        function_ast->children.push_back(parameters.toAST(options));
        function_ast->parameters = function_ast->children.back();
    }

    const auto & arguments = getArguments();
-    function_ast->children.push_back(arguments.toAST());
+    function_ast->children.push_back(arguments.toAST(options));
    function_ast->arguments = function_ast->children.back();

    auto window_node = getWindowNode();
@ -226,7 +226,7 @@ ASTPtr FunctionNode::toASTImpl() const
        if (auto * identifier_node = window_node->as<IdentifierNode>())
            function_ast->window_name = identifier_node->getIdentifier().getFullName();
        else
-            function_ast->window_definition = window_node->toAST();
+            function_ast->window_definition = window_node->toAST(options);
    }

    return function_ast;
--- a/src/Analyzer/FunctionNode.h
+++ b/src/Analyzer/FunctionNode.h
@ -209,7 +209,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    String function_name;
--- a/src/Analyzer/IQueryTreeNode.cpp
+++ b/src/Analyzer/IQueryTreeNode.cpp
@ -331,9 +331,9 @@ QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const QueryTreeNodePtr & node_t
    return cloneAndReplace(replacement_map);
 }

-ASTPtr IQueryTreeNode::toAST() const
+ASTPtr IQueryTreeNode::toAST(const ConvertToASTOptions & options) const
 {
-    auto converted_node = toASTImpl();
+    auto converted_node = toASTImpl(options);

    if (auto * ast_with_alias = dynamic_cast<ASTWithAlias *>(converted_node.get()))
        converted_node->setAlias(alias);
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@ -181,8 +181,17 @@ public:
      */
    String formatOriginalASTForErrorMessage() const;

+    struct ConvertToASTOptions
+    {
+        /// Add _CAST if constant litral type is different from column type
+        bool add_cast_for_constants = true;
+
+        /// Identifiers are fully qualified (`database.table.column`), otherwise names are just column names (`column`)
+        bool fully_qualified_identifiers = true;
+    };
+
    /// Convert query tree to AST
-    ASTPtr toAST() const;
+    ASTPtr toAST(const ConvertToASTOptions & options = { .add_cast_for_constants = true, .fully_qualified_identifiers = true }) const;

    /// Convert query tree to AST and then format it for error message.
    String formatConvertedASTForErrorMessage() const;
@ -258,7 +267,7 @@ protected:
    virtual QueryTreeNodePtr cloneImpl() const = 0;

    /// Subclass must convert its internal state and its children to AST
-    virtual ASTPtr toASTImpl() const = 0;
+    virtual ASTPtr toASTImpl(const ConvertToASTOptions & options) const = 0;

    QueryTreeNodes children;
    QueryTreeWeakNodes weak_pointers;
--- a/src/Analyzer/IdentifierNode.cpp
+++ b/src/Analyzer/IdentifierNode.cpp
@ -58,7 +58,7 @@ QueryTreeNodePtr IdentifierNode::cloneImpl() const
    return std::make_shared<IdentifierNode>(identifier);
 }

-ASTPtr IdentifierNode::toASTImpl() const
+ASTPtr IdentifierNode::toASTImpl(const ConvertToASTOptions & /* options */) const
 {
    auto identifier_parts = identifier.getParts();
    return std::make_shared<ASTIdentifier>(std::move(identifier_parts));
--- a/src/Analyzer/IdentifierNode.h
+++ b/src/Analyzer/IdentifierNode.h
@ -59,7 +59,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    Identifier identifier;
--- a/src/Analyzer/InterpolateNode.cpp
+++ b/src/Analyzer/InterpolateNode.cpp
@ -44,11 +44,11 @@ QueryTreeNodePtr InterpolateNode::cloneImpl() const
    return std::make_shared<InterpolateNode>(nullptr /*expression*/, nullptr /*interpolate_expression*/);
 }

-ASTPtr InterpolateNode::toASTImpl() const
+ASTPtr InterpolateNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto result = std::make_shared<ASTInterpolateElement>();
-    result->column = getExpression()->toAST()->getColumnName();
-    result->children.push_back(getInterpolateExpression()->toAST());
+    result->column = getExpression()->toAST(options)->getColumnName();
+    result->children.push_back(getInterpolateExpression()->toAST(options));
    result->expr = result->children.back();

    return result;
--- a/src/Analyzer/InterpolateNode.h
+++ b/src/Analyzer/InterpolateNode.h
@ -59,7 +59,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    static constexpr size_t expression_child_index = 0;
--- a/src/Analyzer/JoinNode.cpp
+++ b/src/Analyzer/JoinNode.cpp
@ -99,17 +99,17 @@ QueryTreeNodePtr JoinNode::cloneImpl() const
    return std::make_shared<JoinNode>(getLeftTableExpression(), getRightTableExpression(), getJoinExpression(), locality, strictness, kind);
 }

-ASTPtr JoinNode::toASTImpl() const
+ASTPtr JoinNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();

-    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index]);
+    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[left_table_expression_child_index], options);

    size_t join_table_index = tables_in_select_query_ast->children.size();

    auto join_ast = toASTTableJoin();

-    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index]);
+    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, children[right_table_expression_child_index], options);

    auto & table_element = tables_in_select_query_ast->children.at(join_table_index)->as<ASTTablesInSelectQueryElement &>();
    table_element.children.push_back(std::move(join_ast));
--- a/src/Analyzer/JoinNode.h
+++ b/src/Analyzer/JoinNode.h
@ -148,7 +148,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    JoinLocality locality = JoinLocality::Unspecified;
--- a/src/Analyzer/LambdaNode.cpp
+++ b/src/Analyzer/LambdaNode.cpp
@ -65,17 +65,17 @@ QueryTreeNodePtr LambdaNode::cloneImpl() const
    return std::make_shared<LambdaNode>(argument_names, getExpression());
 }

-ASTPtr LambdaNode::toASTImpl() const
+ASTPtr LambdaNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto lambda_function_arguments_ast = std::make_shared<ASTExpressionList>();

    auto tuple_function = std::make_shared<ASTFunction>();
    tuple_function->name = "tuple";
-    tuple_function->children.push_back(children[arguments_child_index]->toAST());
+    tuple_function->children.push_back(children[arguments_child_index]->toAST(options));
    tuple_function->arguments = tuple_function->children.back();

    lambda_function_arguments_ast->children.push_back(std::move(tuple_function));
-    lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST());
+    lambda_function_arguments_ast->children.push_back(children[expression_child_index]->toAST(options));

    auto lambda_function_ast = std::make_shared<ASTFunction>();
    lambda_function_ast->name = "lambda";
--- a/src/Analyzer/LambdaNode.h
+++ b/src/Analyzer/LambdaNode.h
@ -98,7 +98,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    Names argument_names;
--- a/src/Analyzer/ListNode.cpp
+++ b/src/Analyzer/ListNode.cpp
@ -54,7 +54,7 @@ QueryTreeNodePtr ListNode::cloneImpl() const
    return std::make_shared<ListNode>();
 }

-ASTPtr ListNode::toASTImpl() const
+ASTPtr ListNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto expression_list_ast = std::make_shared<ASTExpressionList>();

@ -62,7 +62,7 @@ ASTPtr ListNode::toASTImpl() const
    expression_list_ast->children.resize(children_size);

    for (size_t i = 0; i < children_size; ++i)
-        expression_list_ast->children[i] = children[i]->toAST();
+        expression_list_ast->children[i] = children[i]->toAST(options);

    return expression_list_ast;
 }
--- a/src/Analyzer/ListNode.h
+++ b/src/Analyzer/ListNode.h
@ -57,7 +57,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;
 };

 }
--- a/src/Analyzer/MatcherNode.cpp
+++ b/src/Analyzer/MatcherNode.cpp
@ -204,7 +204,7 @@ QueryTreeNodePtr MatcherNode::cloneImpl() const
    return matcher_node;
 }

-ASTPtr MatcherNode::toASTImpl() const
+ASTPtr MatcherNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    ASTPtr result;
    ASTPtr transformers;
@ -216,7 +216,7 @@ ASTPtr MatcherNode::toASTImpl() const
        transformers = std::make_shared<ASTColumnsTransformerList>();

        for (const auto & column_transformer : column_transformers)
-            transformers->children.push_back(column_transformer->toAST());
+            transformers->children.push_back(column_transformer->toAST(options));
    }

    if (matcher_type == MatcherNodeType::ASTERISK)
--- a/src/Analyzer/MatcherNode.h
+++ b/src/Analyzer/MatcherNode.h
@ -148,7 +148,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    explicit MatcherNode(MatcherNodeType matcher_type_,
--- a/src/Analyzer/QueryNode.cpp
+++ b/src/Analyzer/QueryNode.cpp
@ -259,7 +259,7 @@ QueryTreeNodePtr QueryNode::cloneImpl() const
    return result_query_node;
 }

-ASTPtr QueryNode::toASTImpl() const
+ASTPtr QueryNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto select_query = std::make_shared<ASTSelectQuery>();
    select_query->distinct = is_distinct;
@ -271,9 +271,9 @@ ASTPtr QueryNode::toASTImpl() const
    select_query->group_by_all = is_group_by_all;

    if (hasWith())
-        select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::WITH, getWith().toAST(options));

-    auto projection_ast = getProjection().toAST();
+    auto projection_ast = getProjection().toAST(options);
    auto & projection_expression_list_ast = projection_ast->as<ASTExpressionList &>();
    size_t projection_expression_list_ast_children_size = projection_expression_list_ast.children.size();
    if (projection_expression_list_ast_children_size != getProjection().getNodes().size())
@ -293,44 +293,44 @@ ASTPtr QueryNode::toASTImpl() const
    select_query->setExpression(ASTSelectQuery::Expression::SELECT, std::move(projection_ast));

    ASTPtr tables_in_select_query_ast = std::make_shared<ASTTablesInSelectQuery>();
-    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree());
+    addTableExpressionOrJoinIntoTablesInSelectQuery(tables_in_select_query_ast, getJoinTree(), options);
    select_query->setExpression(ASTSelectQuery::Expression::TABLES, std::move(tables_in_select_query_ast));

    if (getPrewhere())
-        select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, getPrewhere()->toAST(options));

    if (getWhere())
-        select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::WHERE, getWhere()->toAST(options));

    if (!is_group_by_all && hasGroupBy())
-        select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::GROUP_BY, getGroupBy().toAST(options));

    if (hasHaving())
-        select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::HAVING, getHaving()->toAST(options));

    if (hasWindow())
-        select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::WINDOW, getWindow().toAST(options));

    if (hasOrderBy())
-        select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::ORDER_BY, getOrderBy().toAST(options));

    if (hasInterpolate())
-        select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::INTERPOLATE, getInterpolate()->toAST(options));

    if (hasLimitByLimit())
-        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_LENGTH, getLimitByLimit()->toAST(options));

    if (hasLimitByOffset())
-        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY_OFFSET, getLimitByOffset()->toAST(options));

    if (hasLimitBy())
-        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_BY, getLimitBy().toAST(options));

    if (hasLimit())
-        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, getLimit()->toAST(options));

    if (hasOffset())
-        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST());
+        select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, getOffset()->toAST(options));

    if (hasSettingsChanges())
    {
--- a/src/Analyzer/QueryNode.h
+++ b/src/Analyzer/QueryNode.h
@ -575,7 +575,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    bool is_subquery = false;
--- a/src/Analyzer/QueryTreeBuilder.cpp
+++ b/src/Analyzer/QueryTreeBuilder.cpp
@ -838,8 +838,14 @@ QueryTreeNodePtr QueryTreeBuilder::buildJoinTree(const ASTPtr & tables_in_select
                    const auto & function_arguments_list = table_function_expression.arguments->as<ASTExpressionList &>().children;
                    for (const auto & argument : function_arguments_list)
                    {
+                        if (!node->getSettingsChanges().empty())
+                            throw Exception(ErrorCodes::LOGICAL_ERROR, "Table function '{}' has arguments after SETTINGS",
+                                table_function_expression.formatForErrorMessage());
+
                        if (argument->as<ASTSelectQuery>() || argument->as<ASTSelectWithUnionQuery>() || argument->as<ASTSelectIntersectExceptQuery>())
                            node->getArguments().getNodes().push_back(buildSelectOrUnionExpression(argument, false /*is_subquery*/, {} /*cte_name*/, context));
+                        else if (const auto * ast_set = argument->as<ASTSetQuery>())
+                            node->setSettingsChanges(ast_set->changes);
                        else
                            node->getArguments().getNodes().push_back(buildExpression(argument, context));
                    }
--- a/src/Analyzer/SortNode.cpp
+++ b/src/Analyzer/SortNode.cpp
@ -109,7 +109,7 @@ QueryTreeNodePtr SortNode::cloneImpl() const
    return std::make_shared<SortNode>(nullptr /*expression*/, sort_direction, nulls_sort_direction, collator, with_fill);
 }

-ASTPtr SortNode::toASTImpl() const
+ASTPtr SortNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto result = std::make_shared<ASTOrderByElement>();
    result->direction = sort_direction == SortDirection::ASCENDING ? 1 : -1;
@ -120,10 +120,10 @@ ASTPtr SortNode::toASTImpl() const
    result->nulls_direction_was_explicitly_specified = nulls_sort_direction.has_value();

    result->with_fill = with_fill;
-    result->fill_from = hasFillFrom() ? getFillFrom()->toAST() : nullptr;
-    result->fill_to = hasFillTo() ? getFillTo()->toAST() : nullptr;
-    result->fill_step = hasFillStep() ? getFillStep()->toAST() : nullptr;
-    result->children.push_back(getExpression()->toAST());
+    result->fill_from = hasFillFrom() ? getFillFrom()->toAST(options) : nullptr;
+    result->fill_to = hasFillTo() ? getFillTo()->toAST(options) : nullptr;
+    result->fill_step = hasFillStep() ? getFillStep()->toAST(options) : nullptr;
+    result->children.push_back(getExpression()->toAST(options));

    if (collator)
    {
--- a/src/Analyzer/SortNode.h
+++ b/src/Analyzer/SortNode.h
@ -137,7 +137,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    static constexpr size_t sort_expression_child_index = 0;
--- a/src/Analyzer/TableFunctionNode.cpp
+++ b/src/Analyzer/TableFunctionNode.cpp
@ -7,6 +7,7 @@
 #include <Storages/IStorage.h>

 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTSetQuery.h>

 #include <Interpreters/Context.h>

@ -71,6 +72,13 @@ void TableFunctionNode::dumpTreeImpl(WriteBuffer & buffer, FormatState & format_
        buffer << '\n' << std::string(indent + 2, ' ') << "ARGUMENTS\n";
        arguments.dumpTreeImpl(buffer, format_state, indent + 4);
    }
+
+    if (!settings_changes.empty())
+    {
+        buffer << '\n' << std::string(indent + 2, ' ') << "SETTINGS";
+        for (const auto & change : settings_changes)
+            buffer << fmt::format(" {}={}", change.name, toString(change.value));
+    }
 }

 bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
@ -82,6 +90,9 @@ bool TableFunctionNode::isEqualImpl(const IQueryTreeNode & rhs) const
    if (storage && rhs_typed.storage)
        return storage_id == rhs_typed.storage_id;

+    if (settings_changes != rhs_typed.settings_changes)
+        return false;
+
    return table_expression_modifiers == rhs_typed.table_expression_modifiers;
 }

@ -99,6 +110,17 @@ void TableFunctionNode::updateTreeHashImpl(HashState & state) const

    if (table_expression_modifiers)
        table_expression_modifiers->updateTreeHash(state);
+
+    state.update(settings_changes.size());
+    for (const auto & change : settings_changes)
+    {
+        state.update(change.name.size());
+        state.update(change.name);
+
+        const auto & value_dump = change.value.dump();
+        state.update(value_dump.size());
+        state.update(value_dump);
+    }
 }

 QueryTreeNodePtr TableFunctionNode::cloneImpl() const
@ -109,20 +131,29 @@ QueryTreeNodePtr TableFunctionNode::cloneImpl() const
    result->storage_id = storage_id;
    result->storage_snapshot = storage_snapshot;
    result->table_expression_modifiers = table_expression_modifiers;
+    result->settings_changes = settings_changes;

    return result;
 }

-ASTPtr TableFunctionNode::toASTImpl() const
+ASTPtr TableFunctionNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto table_function_ast = std::make_shared<ASTFunction>();

    table_function_ast->name = table_function_name;

    const auto & arguments = getArguments();
-    table_function_ast->children.push_back(arguments.toAST());
+    table_function_ast->children.push_back(arguments.toAST(options));
    table_function_ast->arguments = table_function_ast->children.back();

+    if (!settings_changes.empty())
+    {
+        auto settings_ast = std::make_shared<ASTSetQuery>();
+        settings_ast->changes = settings_changes;
+        settings_ast->is_standalone = false;
+        table_function_ast->arguments->children.push_back(std::move(settings_ast));
+    }
+
    return table_function_ast;
 }

--- a/src/Analyzer/TableFunctionNode.h
+++ b/src/Analyzer/TableFunctionNode.h
@ -1,5 +1,7 @@
 #pragma once

+#include <Common/SettingsChanges.h>
+
 #include <Storages/IStorage_fwd.h>
 #include <Storages/TableLockHolder.h>
 #include <Storages/StorageSnapshot.h>
@ -122,6 +124,18 @@ public:
        return table_expression_modifiers;
    }

+    /// Get settings changes passed to table function
+    const SettingsChanges & getSettingsChanges() const
+    {
+        return settings_changes;
+    }
+
+    /// Set settings changes passed as last argument to table function
+    void setSettingsChanges(SettingsChanges settings_changes_)
+    {
+        settings_changes = std::move(settings_changes_);
+    }
+
    /// Set table expression modifiers
    void setTableExpressionModifiers(TableExpressionModifiers table_expression_modifiers_value)
    {
@ -142,7 +156,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    String table_function_name;
@ -151,6 +165,7 @@ private:
    StorageID storage_id;
    StorageSnapshotPtr storage_snapshot;
    std::optional<TableExpressionModifiers> table_expression_modifiers;
+    SettingsChanges settings_changes;

    static constexpr size_t arguments_child_index = 0;
    static constexpr size_t children_size = arguments_child_index + 1;
--- a/src/Analyzer/TableNode.cpp
+++ b/src/Analyzer/TableNode.cpp
@ -86,7 +86,7 @@ QueryTreeNodePtr TableNode::cloneImpl() const
    return result_table_node;
 }

-ASTPtr TableNode::toASTImpl() const
+ASTPtr TableNode::toASTImpl(const ConvertToASTOptions & /* options */) const
 {
    if (!temporary_table_name.empty())
        return std::make_shared<ASTTableIdentifier>(temporary_table_name);
--- a/src/Analyzer/TableNode.h
+++ b/src/Analyzer/TableNode.h
@ -106,7 +106,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    StoragePtr storage;
--- a/src/Analyzer/UnionNode.cpp
+++ b/src/Analyzer/UnionNode.cpp
@ -140,12 +140,12 @@ QueryTreeNodePtr UnionNode::cloneImpl() const
    return result_union_node;
 }

-ASTPtr UnionNode::toASTImpl() const
+ASTPtr UnionNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto select_with_union_query = std::make_shared<ASTSelectWithUnionQuery>();
    select_with_union_query->union_mode = union_mode;
    select_with_union_query->is_normalized = true;
-    select_with_union_query->children.push_back(getQueriesNode()->toAST());
+    select_with_union_query->children.push_back(getQueriesNode()->toAST(options));
    select_with_union_query->list_of_selects = select_with_union_query->children.back();

    if (is_subquery)
--- a/src/Analyzer/UnionNode.h
+++ b/src/Analyzer/UnionNode.h
@ -143,7 +143,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    bool is_subquery = false;
--- a/src/Analyzer/Utils.cpp
+++ b/src/Analyzer/Utils.cpp
@ -268,7 +268,7 @@ static ASTPtr convertIntoTableExpressionAST(const QueryTreeNodePtr & table_expre
    return result_table_expression;
 }

-void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression)
+void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options)
 {
    auto table_expression_node_type = table_expression->getNodeType();

@ -297,7 +297,7 @@ void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_q
            [[fallthrough]];
        case QueryTreeNodeType::JOIN:
        {
-            auto table_expression_tables_in_select_query_ast = table_expression->toAST();
+            auto table_expression_tables_in_select_query_ast = table_expression->toAST(convert_to_ast_options);
            tables_in_select_query_ast->children.reserve(table_expression_tables_in_select_query_ast->children.size());
            for (auto && table_element_ast : table_expression_tables_in_select_query_ast->children)
                tables_in_select_query_ast->children.push_back(std::move(table_element_ast));
--- a/src/Analyzer/Utils.h
+++ b/src/Analyzer/Utils.h
@ -40,7 +40,7 @@ std::optional<bool> tryExtractConstantFromConditionNode(const QueryTreeNodePtr &
 /** Add table expression in tables in select query children.
  * If table expression node is not of identifier node, table node, query node, table function node, join node or array join node type throws logical error exception.
  */
-void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression);
+void addTableExpressionOrJoinIntoTablesInSelectQuery(ASTPtr & tables_in_select_query_ast, const QueryTreeNodePtr & table_expression, const IQueryTreeNode::ConvertToASTOptions & convert_to_ast_options);

 /// Extract table, table function, query, union from join tree
 QueryTreeNodes extractTableExpressions(const QueryTreeNodePtr & join_tree_node);
--- a/src/Analyzer/WindowNode.cpp
+++ b/src/Analyzer/WindowNode.cpp
@ -107,7 +107,7 @@ QueryTreeNodePtr WindowNode::cloneImpl() const
    return window_node;
 }

-ASTPtr WindowNode::toASTImpl() const
+ASTPtr WindowNode::toASTImpl(const ConvertToASTOptions & options) const
 {
    auto window_definition = std::make_shared<ASTWindowDefinition>();

@ -115,13 +115,13 @@ ASTPtr WindowNode::toASTImpl() const

    if (hasPartitionBy())
    {
-        window_definition->children.push_back(getPartitionByNode()->toAST());
+        window_definition->children.push_back(getPartitionByNode()->toAST(options));
        window_definition->partition_by = window_definition->children.back();
    }

    if (hasOrderBy())
    {
-        window_definition->children.push_back(getOrderByNode()->toAST());
+        window_definition->children.push_back(getOrderByNode()->toAST(options));
        window_definition->order_by = window_definition->children.back();
    }

@ -132,7 +132,7 @@ ASTPtr WindowNode::toASTImpl() const

    if (hasFrameBeginOffset())
    {
-        window_definition->children.push_back(getFrameBeginOffsetNode()->toAST());
+        window_definition->children.push_back(getFrameBeginOffsetNode()->toAST(options));
        window_definition->frame_begin_offset = window_definition->children.back();
    }

@ -140,7 +140,7 @@ ASTPtr WindowNode::toASTImpl() const
    window_definition->frame_end_preceding = window_frame.end_preceding;
    if (hasFrameEndOffset())
    {
-        window_definition->children.push_back(getFrameEndOffsetNode()->toAST());
+        window_definition->children.push_back(getFrameEndOffsetNode()->toAST(options));
        window_definition->frame_end_offset = window_definition->children.back();
    }

--- a/src/Analyzer/WindowNode.h
+++ b/src/Analyzer/WindowNode.h
@ -175,7 +175,7 @@ protected:

    QueryTreeNodePtr cloneImpl() const override;

-    ASTPtr toASTImpl() const override;
+    ASTPtr toASTImpl(const ConvertToASTOptions & options) const override;

 private:
    static constexpr size_t order_by_child_index = 0;
--- a/src/Analyzer/tests/gtest_query_tree_node.cpp
+++ b/src/Analyzer/tests/gtest_query_tree_node.cpp
@ -36,7 +36,7 @@ public:
        return std::make_shared<SourceNode>();
    }

-    ASTPtr toASTImpl() const override
+    ASTPtr toASTImpl(const ConvertToASTOptions & /* options */) const override
    {
        return nullptr;
    }
--- a/src/Backups/BackupCoordinationLocal.cpp
+++ b/src/Backups/BackupCoordinationLocal.cpp
@ -13,20 +13,20 @@ using FileInfo = IBackupCoordination::FileInfo;
 BackupCoordinationLocal::BackupCoordinationLocal() = default;
 BackupCoordinationLocal::~BackupCoordinationLocal() = default;

-void BackupCoordinationLocal::setStage(const String &, const String &, const String &)
+void BackupCoordinationLocal::setStage(const String &, const String &)
 {
 }

-void BackupCoordinationLocal::setError(const String &, const Exception &)
+void BackupCoordinationLocal::setError(const Exception &)
 {
 }

-Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &)
+Strings BackupCoordinationLocal::waitForStage(const String &)
 {
    return {};
 }

-Strings BackupCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
+Strings BackupCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
 {
    return {};
 }
@ -70,29 +70,29 @@ Strings BackupCoordinationLocal::getReplicatedDataPaths(const String & table_sha
 }


-void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
+void BackupCoordinationLocal::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
 {
    std::lock_guard lock{mutex};
-    replicated_access.addFilePath(access_zk_path, access_entity_type, host_id, file_path);
+    replicated_access.addFilePath(access_zk_path, access_entity_type, "", file_path);
 }

-Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
+Strings BackupCoordinationLocal::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
 {
    std::lock_guard lock{mutex};
-    return replicated_access.getFilePaths(access_zk_path, access_entity_type, host_id);
+    return replicated_access.getFilePaths(access_zk_path, access_entity_type, "");
 }


-void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
+void BackupCoordinationLocal::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
 {
    std::lock_guard lock{mutex};
-    replicated_sql_objects.addDirectory(loader_zk_path, object_type, host_id, dir_path);
+    replicated_sql_objects.addDirectory(loader_zk_path, object_type, "", dir_path);
 }

-Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const
+Strings BackupCoordinationLocal::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
 {
    std::lock_guard lock{mutex};
-    return replicated_sql_objects.getDirectories(loader_zk_path, object_type, host_id);
+    return replicated_sql_objects.getDirectories(loader_zk_path, object_type, "");
 }


--- a/src/Backups/BackupCoordinationLocal.h
+++ b/src/Backups/BackupCoordinationLocal.h
@ -21,10 +21,10 @@ public:
    BackupCoordinationLocal();
    ~BackupCoordinationLocal() override;

-    void setStage(const String & current_host, const String & new_stage, const String & message) override;
-    void setError(const String & current_host, const Exception & exception) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
+    void setStage(const String & new_stage, const String & message) override;
+    void setError(const Exception & exception) override;
+    Strings waitForStage(const String & stage_to_wait) override;
+    Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;

    void addReplicatedPartNames(const String & table_shared_id, const String & table_name_for_logs, const String & replica_name,
                                const std::vector<PartNameAndChecksum> & part_names_and_checksums) override;
@ -37,11 +37,11 @@ public:
    void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
    Strings getReplicatedDataPaths(const String & table_shared_id) const override;

-    void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
-    Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
+    void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
+    Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;

-    void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) override;
-    Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const override;
+    void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
+    Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;

    void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
    void updateFileInfo(const FileInfo & file_info) override;
--- a/src/Backups/BackupCoordinationRemote.cpp
+++ b/src/Backups/BackupCoordinationRemote.cpp
@ -166,17 +166,30 @@ namespace
    }
 }

+size_t BackupCoordinationRemote::findCurrentHostIndex(const Strings & all_hosts, const String & current_host)
+{
+    auto it = std::find(all_hosts.begin(), all_hosts.end(), current_host);
+    if (it == all_hosts.end())
+        return 0;
+    return it - all_hosts.begin();
+}
+
 BackupCoordinationRemote::BackupCoordinationRemote(
-    const BackupKeeperSettings & keeper_settings_,
-    const String & root_zookeeper_path_,
-    const String & backup_uuid_,
    zkutil::GetZooKeeper get_zookeeper_,
+    const String & root_zookeeper_path_,
+    const BackupKeeperSettings & keeper_settings_,
+    const String & backup_uuid_,
+    const Strings & all_hosts_,
+    const String & current_host_,
    bool is_internal_)
-    : keeper_settings(keeper_settings_)
+    : get_zookeeper(get_zookeeper_)
    , root_zookeeper_path(root_zookeeper_path_)
    , zookeeper_path(root_zookeeper_path_ + "/backup-" + backup_uuid_)
+    , keeper_settings(keeper_settings_)
    , backup_uuid(backup_uuid_)
-    , get_zookeeper(get_zookeeper_)
+    , all_hosts(all_hosts_)
+    , current_host(current_host_)
+    , current_host_index(findCurrentHostIndex(all_hosts, current_host))
    , is_internal(is_internal_)
 {
    zookeeper_retries_info = ZooKeeperRetriesInfo(
@ -251,22 +264,22 @@ void BackupCoordinationRemote::removeAllNodes()
 }


-void BackupCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
+void BackupCoordinationRemote::setStage(const String & new_stage, const String & message)
 {
    stage_sync->set(current_host, new_stage, message);
 }

-void BackupCoordinationRemote::setError(const String & current_host, const Exception & exception)
+void BackupCoordinationRemote::setError(const Exception & exception)
 {
    stage_sync->setError(current_host, exception);
 }

-Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
+Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait)
 {
    return stage_sync->wait(all_hosts, stage_to_wait);
 }

-Strings BackupCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
+Strings BackupCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
 {
    return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
 }
@ -403,7 +416,7 @@ void BackupCoordinationRemote::prepareReplicatedTables() const
 }


-void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path)
+void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path)
 {
    {
        std::lock_guard lock{mutex};
@ -416,15 +429,15 @@ void BackupCoordinationRemote::addReplicatedAccessFilePath(const String & access
    zk->createIfNotExists(path, "");
    path += "/" + AccessEntityTypeInfo::get(access_entity_type).name;
    zk->createIfNotExists(path, "");
-    path += "/" + host_id;
+    path += "/" + current_host;
    zk->createIfNotExists(path, file_path);
 }

-Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const
+Strings BackupCoordinationRemote::getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const
 {
    std::lock_guard lock{mutex};
    prepareReplicatedAccess();
-    return replicated_access->getFilePaths(access_zk_path, access_entity_type, host_id);
+    return replicated_access->getFilePaths(access_zk_path, access_entity_type, current_host);
 }

 void BackupCoordinationRemote::prepareReplicatedAccess() const
@ -453,7 +466,7 @@ void BackupCoordinationRemote::prepareReplicatedAccess() const
    }
 }

-void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path)
+void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path)
 {
    {
        std::lock_guard lock{mutex};
@ -474,15 +487,15 @@ void BackupCoordinationRemote::addReplicatedSQLObjectsDir(const String & loader_
    }

    zk->createIfNotExists(path, "");
-    path += "/" + host_id;
+    path += "/" + current_host;
    zk->createIfNotExists(path, dir_path);
 }

-Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const
+Strings BackupCoordinationRemote::getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const
 {
    std::lock_guard lock{mutex};
    prepareReplicatedSQLObjects();
-    return replicated_sql_objects->getDirectories(loader_zk_path, object_type, host_id);
+    return replicated_sql_objects->getDirectories(loader_zk_path, object_type, current_host);
 }

 void BackupCoordinationRemote::prepareReplicatedSQLObjects() const
@ -827,5 +840,4 @@ bool BackupCoordinationRemote::hasConcurrentBackups(const std::atomic<size_t> &)
    return false;
 }

-
 }
--- a/src/Backups/BackupCoordinationRemote.h
+++ b/src/Backups/BackupCoordinationRemote.h
@ -27,17 +27,20 @@ public:
    };

    BackupCoordinationRemote(
-        const BackupKeeperSettings & keeper_settings_,
-        const String & root_zookeeper_path_,
-        const String & backup_uuid_,
        zkutil::GetZooKeeper get_zookeeper_,
+        const String & root_zookeeper_path_,
+        const BackupKeeperSettings & keeper_settings_,
+        const String & backup_uuid_,
+        const Strings & all_hosts_,
+        const String & current_host_,
        bool is_internal_);
+
    ~BackupCoordinationRemote() override;

-    void setStage(const String & current_host, const String & new_stage, const String & message) override;
-    void setError(const String & current_host, const Exception & exception) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
+    void setStage(const String & new_stage, const String & message) override;
+    void setError(const Exception & exception) override;
+    Strings waitForStage(const String & stage_to_wait) override;
+    Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;

    void addReplicatedPartNames(
        const String & table_shared_id,
@ -58,11 +61,11 @@ public:
    void addReplicatedDataPath(const String & table_shared_id, const String & data_path) override;
    Strings getReplicatedDataPaths(const String & table_shared_id) const override;

-    void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) override;
-    Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const override;
+    void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) override;
+    Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const override;

-    void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) override;
-    Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const override;
+    void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) override;
+    Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const override;

    void addFileInfo(const FileInfo & file_info, bool & is_data_file_required) override;
    void updateFileInfo(const FileInfo & file_info) override;
@ -78,6 +81,8 @@ public:

    bool hasConcurrentBackups(const std::atomic<size_t> & num_active_backups) const override;

+    static size_t findCurrentHostIndex(const Strings & all_hosts, const String & current_host);
+
 private:
    zkutil::ZooKeeperPtr getZooKeeper() const;
    zkutil::ZooKeeperPtr getZooKeeperNoLock() const;
@ -91,11 +96,14 @@ private:
    void prepareReplicatedAccess() const;
    void prepareReplicatedSQLObjects() const;

-    const BackupKeeperSettings keeper_settings;
+    const zkutil::GetZooKeeper get_zookeeper;
    const String root_zookeeper_path;
    const String zookeeper_path;
+    const BackupKeeperSettings keeper_settings;
    const String backup_uuid;
-    const zkutil::GetZooKeeper get_zookeeper;
+    const Strings all_hosts;
+    const String current_host;
+    const size_t current_host_index;
    const bool is_internal;

    mutable ZooKeeperRetriesInfo zookeeper_retries_info;
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@ -133,22 +133,22 @@ Strings BackupEntriesCollector::setStage(const String & new_stage, const String
    LOG_TRACE(log, fmt::runtime(toUpperFirst(new_stage)));
    current_stage = new_stage;

-    backup_coordination->setStage(backup_settings.host_id, new_stage, message);
+    backup_coordination->setStage(new_stage, message);

    if (new_stage == Stage::formatGatheringMetadata(1))
    {
-        return backup_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
+        return backup_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
    }
    else if (new_stage.starts_with(Stage::GATHERING_METADATA))
    {
        auto current_time = std::chrono::steady_clock::now();
        auto end_of_timeout = std::max(current_time, consistent_metadata_snapshot_end_time);
        return backup_coordination->waitForStage(
-            all_hosts, new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
+            new_stage, std::chrono::duration_cast<std::chrono::milliseconds>(end_of_timeout - current_time));
    }
    else
    {
-        return backup_coordination->waitForStage(all_hosts, new_stage);
+        return backup_coordination->waitForStage(new_stage);
    }
 }

--- a/src/Backups/BackupUtils.cpp
+++ b/src/Backups/BackupUtils.cpp
@ -1,10 +1,7 @@
 #include <Backups/BackupUtils.h>
-#include <Backups/IBackup.h>
-#include <Backups/RestoreSettings.h>
 #include <Access/Common/AccessRightsElement.h>
 #include <Databases/DDLRenamingVisitor.h>
 #include <Interpreters/DatabaseCatalog.h>
-#include <Common/scope_guard_safe.h>
 #include <Common/setThreadName.h>


@ -60,140 +57,6 @@ DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & e
 }


-void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool)
-{
-    size_t num_active_jobs = 0;
-    std::mutex mutex;
-    std::condition_variable event;
-    std::exception_ptr exception;
-
-    bool always_single_threaded = !backup->supportsWritingInMultipleThreads();
-    auto thread_group = CurrentThread::getGroup();
-
-    for (auto & name_and_entry : backup_entries)
-    {
-        auto & name = name_and_entry.first;
-        auto & entry = name_and_entry.second;
-
-        {
-            std::unique_lock lock{mutex};
-            if (exception)
-                break;
-            ++num_active_jobs;
-        }
-
-        auto job = [&](bool async)
-        {
-            SCOPE_EXIT_SAFE(
-                std::lock_guard lock{mutex};
-                if (!--num_active_jobs)
-                    event.notify_all();
-                if (async)
-                    CurrentThread::detachFromGroupIfNotDetached();
-            );
-
-            try
-            {
-                if (async && thread_group)
-                    CurrentThread::attachToGroup(thread_group);
-
-                if (async)
-                    setThreadName("BackupWorker");
-
-                {
-                    std::lock_guard lock{mutex};
-                    if (exception)
-                        return;
-                }
-
-                backup->writeFile(name, std::move(entry));
-            }
-            catch (...)
-            {
-                std::lock_guard lock{mutex};
-                if (!exception)
-                    exception = std::current_exception();
-            }
-        };
-
-        if (always_single_threaded || !thread_pool.trySchedule([job] { job(true); }))
-            job(false);
-    }
-
-    {
-        std::unique_lock lock{mutex};
-        event.wait(lock, [&] { return !num_active_jobs; });
-        if (exception)
-            std::rethrow_exception(exception);
-    }
-}
-
-
-void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool)
-{
-    size_t num_active_jobs = 0;
-    std::mutex mutex;
-    std::condition_variable event;
-    std::exception_ptr exception;
-
-    auto thread_group = CurrentThread::getGroup();
-
-    for (auto & task : tasks)
-    {
-        {
-            std::unique_lock lock{mutex};
-            if (exception)
-                break;
-            ++num_active_jobs;
-        }
-
-        auto job = [&](bool async)
-        {
-            SCOPE_EXIT_SAFE(
-                std::lock_guard lock{mutex};
-                if (!--num_active_jobs)
-                    event.notify_all();
-                if (async)
-                    CurrentThread::detachFromGroupIfNotDetached();
-            );
-
-            try
-            {
-                if (async && thread_group)
-                    CurrentThread::attachToGroup(thread_group);
-
-                if (async)
-                    setThreadName("RestoreWorker");
-
-                {
-                    std::lock_guard lock{mutex};
-                    if (exception)
-                        return;
-                }
-
-                std::move(task)();
-            }
-            catch (...)
-            {
-                std::lock_guard lock{mutex};
-                if (!exception)
-                    exception = std::current_exception();
-            }
-        };
-
-        if (!thread_pool.trySchedule([job] { job(true); }))
-            job(false);
-    }
-
-    {
-        std::unique_lock lock{mutex};
-        event.wait(lock, [&] { return !num_active_jobs; });
-        if (exception)
-            std::rethrow_exception(exception);
-    }
-}
-
-
 /// Returns access required to execute BACKUP query.
 AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements)
 {
--- a/src/Backups/BackupUtils.h
+++ b/src/Backups/BackupUtils.h
@ -7,21 +7,12 @@
 namespace DB
 {
 class IBackup;
-using BackupMutablePtr = std::shared_ptr<IBackup>;
-class IBackupEntry;
-using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
-using DataRestoreTasks = std::vector<std::function<void()>>;
 class AccessRightsElements;
 class DDLRenamingMap;

 /// Initializes a DDLRenamingMap from a BACKUP or RESTORE query.
 DDLRenamingMap makeRenamingMapFromBackupQuery(const ASTBackupQuery::Elements & elements);

-/// Write backup entries to an opened backup.
-void writeBackupEntries(BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool);
-
-/// Run data restoring tasks which insert data to tables.
-void restoreTablesData(DataRestoreTasks && tasks, ThreadPool & thread_pool);

 /// Returns access required to execute BACKUP query.
 AccessRightsElements getRequiredAccessToBackup(const ASTBackupQuery::Elements & elements);
--- a/src/Backups/BackupsWorker.cpp
+++ b/src/Backups/BackupsWorker.cpp
@ -21,6 +21,7 @@
 #include <Common/Macros.h>
 #include <Common/logger_useful.h>
 #include <Common/setThreadName.h>
+#include <Common/scope_guard_safe.h>


 namespace DB
@ -38,14 +39,33 @@ namespace Stage = BackupCoordinationStage;

 namespace
 {
-    std::shared_ptr<IBackupCoordination> makeBackupCoordination(std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings, String & root_zk_path, const String & backup_uuid, const ContextPtr & context, bool is_internal_backup)
+    std::shared_ptr<IBackupCoordination> makeBackupCoordination(const ContextPtr & context, const BackupSettings & backup_settings, bool remote)
    {
-        if (!root_zk_path.empty())
+        if (remote)
        {
-            if (!keeper_settings.has_value())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Parameter keeper_settings is empty while root_zk_path is not. This is bug");
+            String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
+
            auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
-            return std::make_shared<BackupCoordinationRemote>(*keeper_settings, root_zk_path, backup_uuid, get_zookeeper, is_internal_backup);
+
+            BackupCoordinationRemote::BackupKeeperSettings keeper_settings
+            {
+                .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
+                .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
+                .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
+                .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
+            };
+
+            auto all_hosts = BackupSettings::Util::filterHostIDs(
+                backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
+
+            return std::make_shared<BackupCoordinationRemote>(
+                get_zookeeper,
+                root_zk_path,
+                keeper_settings,
+                toString(*backup_settings.backup_uuid),
+                all_hosts,
+                backup_settings.host_id,
+                backup_settings.internal);
        }
        else
        {
@ -53,12 +73,19 @@ namespace
        }
    }

-    std::shared_ptr<IRestoreCoordination> makeRestoreCoordination(const String & root_zk_path, const String & restore_uuid, const ContextPtr & context, bool is_internal_backup)
+    std::shared_ptr<IRestoreCoordination>
+    makeRestoreCoordination(const ContextPtr & context, const RestoreSettings & restore_settings, bool remote)
    {
-        if (!root_zk_path.empty())
+        if (remote)
        {
+            String root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
+
            auto get_zookeeper = [global_context = context->getGlobalContext()] { return global_context->getZooKeeper(); };
-            return std::make_shared<RestoreCoordinationRemote>(root_zk_path, restore_uuid, get_zookeeper, is_internal_backup);
+
+            auto all_hosts = BackupSettings::Util::filterHostIDs(
+                restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
+
+            return std::make_shared<RestoreCoordinationRemote>(get_zookeeper, root_zk_path, toString(*restore_settings.restore_uuid), all_hosts, restore_settings.host_id, restore_settings.internal);
        }
        else
        {
@ -68,12 +95,12 @@ namespace

    /// Sends information about an exception to IBackupCoordination or IRestoreCoordination.
    template <typename CoordinationType>
-    void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host, const Exception & exception)
+    void sendExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const Exception & exception)
    {
        try
        {
            if (coordination)
-                coordination->setError(current_host, exception);
+                coordination->setError(exception);
        }
        catch (...)
        {
@ -82,7 +109,7 @@ namespace

    /// Sends information about the current exception to IBackupCoordination or IRestoreCoordination.
    template <typename CoordinationType>
-    void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination, const String & current_host)
+    void sendCurrentExceptionToCoordination(std::shared_ptr<CoordinationType> coordination)
    {
        try
        {
@ -90,12 +117,12 @@ namespace
        }
        catch (const Exception & e)
        {
-            sendExceptionToCoordination(coordination, current_host, e);
+            sendExceptionToCoordination(coordination, e);
        }
        catch (...)
        {
            if (coordination)
-                coordination->setError(current_host, Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
+                coordination->setError(Exception(getCurrentExceptionMessageAndPattern(true, true), getCurrentExceptionCode()));
        }
    }

@ -162,24 +189,13 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
    else
        backup_id = toString(*backup_settings.backup_uuid);

-    String root_zk_path;
-
    std::shared_ptr<IBackupCoordination> backup_coordination;
    if (backup_settings.internal)
    {
        /// The following call of makeBackupCoordination() is not essential because doBackup() will later create a backup coordination
        /// if it's not created here. However to handle errors better it's better to make a coordination here because this way
        /// if an exception will be thrown in startMakingBackup() other hosts will know about that.
-        root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
-
-        BackupCoordinationRemote::BackupKeeperSettings keeper_settings
-        {
-            .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
-            .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
-            .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
-            .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
-        };
-        backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
+        backup_coordination = makeBackupCoordination(context, backup_settings, /* remote= */ true);
    }

    auto backup_info = BackupInfo::fromAST(*backup_query->backup_name);
@ -238,7 +254,7 @@ OperationID BackupsWorker::startMakingBackup(const ASTPtr & query, const Context
        tryLogCurrentException(log, fmt::format("Failed to start {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
        /// Something bad happened, the backup has not built.
        setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
-        sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
+        sendCurrentExceptionToCoordination(backup_coordination);
        throw;
    }
 }
@ -274,19 +290,9 @@ void BackupsWorker::doBackup(
        if (!on_cluster)
            context->checkAccess(required_access);

-        String root_zk_path;
-        std::optional<BackupCoordinationRemote::BackupKeeperSettings> keeper_settings;
        ClusterPtr cluster;
        if (on_cluster)
        {
-            keeper_settings = BackupCoordinationRemote::BackupKeeperSettings
-            {
-                .keeper_max_retries = context->getSettingsRef().backup_keeper_max_retries,
-                .keeper_retry_initial_backoff_ms = context->getSettingsRef().backup_keeper_retry_initial_backoff_ms,
-                .keeper_retry_max_backoff_ms = context->getSettingsRef().backup_keeper_retry_max_backoff_ms,
-                .batch_size_for_keeper_multiread = context->getSettingsRef().backup_batch_size_for_keeper_multiread,
-            };
-            root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
            backup_query->cluster = context->getMacros()->expand(backup_query->cluster);
            cluster = context->getCluster(backup_query->cluster);
            backup_settings.cluster_host_ids = cluster->getHostIDs();
@ -294,7 +300,7 @@ void BackupsWorker::doBackup(

        /// Make a backup coordination.
        if (!backup_coordination)
-            backup_coordination = makeBackupCoordination(keeper_settings, root_zk_path, toString(*backup_settings.backup_uuid), context, backup_settings.internal);
+            backup_coordination = makeBackupCoordination(context, backup_settings, /* remote= */ on_cluster);

        if (!allow_concurrent_backups && backup_coordination->hasConcurrentBackups(std::ref(num_active_backups)))
            throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent backups not supported, turn on setting 'allow_concurrent_backups'");
@ -330,9 +336,7 @@ void BackupsWorker::doBackup(
            executeDDLQueryOnCluster(backup_query, mutable_context, params);

            /// Wait until all the hosts have written their backup entries.
-            auto all_hosts = BackupSettings::Util::filterHostIDs(
-                backup_settings.cluster_host_ids, backup_settings.shard_num, backup_settings.replica_num);
-            backup_coordination->waitForStage(all_hosts, Stage::COMPLETED);
+            backup_coordination->waitForStage(Stage::COMPLETED);
        }
        else
        {
@ -346,10 +350,10 @@ void BackupsWorker::doBackup(
            }

            /// Write the backup entries to the backup.
-            writeBackupEntries(backup, std::move(backup_entries), backups_thread_pool);
+            writeBackupEntries(backup_id, backup, std::move(backup_entries), backups_thread_pool, backup_settings.internal);

            /// We have written our backup entries, we need to tell other hosts (they could be waiting for it).
-            backup_coordination->setStage(backup_settings.host_id, Stage::COMPLETED, "");
+            backup_coordination->setStage(Stage::COMPLETED, "");
        }

        size_t num_files = 0;
@ -374,6 +378,7 @@ void BackupsWorker::doBackup(

        LOG_INFO(log, "{} {} was created successfully", (backup_settings.internal ? "Internal backup" : "Backup"), backup_name_for_logging);
        setStatus(backup_id, BackupStatus::BACKUP_CREATED);
+        /// NOTE: we need to update metadata again after backup->finalizeWriting(), because backup metadata is written there.
        setNumFilesAndSize(backup_id, num_files, total_size, num_entries, uncompressed_size, compressed_size, 0, 0);
    }
    catch (...)
@ -383,7 +388,7 @@ void BackupsWorker::doBackup(
        {
            tryLogCurrentException(log, fmt::format("Failed to make {} {}", (backup_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
            setStatusSafe(backup_id, BackupStatus::BACKUP_FAILED);
-            sendCurrentExceptionToCoordination(backup_coordination, backup_settings.host_id);
+            sendCurrentExceptionToCoordination(backup_coordination);
        }
        else
        {
@ -394,6 +399,88 @@ void BackupsWorker::doBackup(
 }


+void BackupsWorker::writeBackupEntries(const OperationID & backup_id, BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool, bool internal)
+{
+    size_t num_active_jobs = 0;
+    std::mutex mutex;
+    std::condition_variable event;
+    std::exception_ptr exception;
+
+    bool always_single_threaded = !backup->supportsWritingInMultipleThreads();
+    auto thread_group = CurrentThread::getGroup();
+
+    for (auto & name_and_entry : backup_entries)
+    {
+        auto & name = name_and_entry.first;
+        auto & entry = name_and_entry.second;
+
+        {
+            std::unique_lock lock{mutex};
+            if (exception)
+                break;
+            ++num_active_jobs;
+        }
+
+        auto job = [&](bool async)
+        {
+            SCOPE_EXIT_SAFE(
+                std::lock_guard lock{mutex};
+                if (!--num_active_jobs)
+                    event.notify_all();
+                if (async)
+                    CurrentThread::detachFromGroupIfNotDetached();
+            );
+
+            try
+            {
+                if (async && thread_group)
+                    CurrentThread::attachToGroup(thread_group);
+
+                if (async)
+                    setThreadName("BackupWorker");
+
+                {
+                    std::lock_guard lock{mutex};
+                    if (exception)
+                        return;
+                }
+
+                backup->writeFile(name, std::move(entry));
+                // Update metadata
+                if (!internal)
+                {
+                    setNumFilesAndSize(
+                            backup_id,
+                            backup->getNumFiles(),
+                            backup->getTotalSize(),
+                            backup->getNumEntries(),
+                            backup->getUncompressedSize(),
+                            backup->getCompressedSize(),
+                            0, 0);
+                }
+
+            }
+            catch (...)
+            {
+                std::lock_guard lock{mutex};
+                if (!exception)
+                    exception = std::current_exception();
+            }
+        };
+
+        if (always_single_threaded || !thread_pool.trySchedule([job] { job(true); }))
+            job(false);
+    }
+
+    {
+        std::unique_lock lock{mutex};
+        event.wait(lock, [&] { return !num_active_jobs; });
+        if (exception)
+            std::rethrow_exception(exception);
+    }
+}
+
+
 OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePtr context)
 {
    auto restore_query = std::static_pointer_cast<ASTBackupQuery>(query->clone());
@ -417,8 +504,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
        /// The following call of makeRestoreCoordination() is not essential because doRestore() will later create a restore coordination
        /// if it's not created here. However to handle errors better it's better to make a coordination here because this way
        /// if an exception will be thrown in startRestoring() other hosts will know about that.
-        auto root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
-        restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
+        restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ true);
    }

    try
@ -474,7 +560,7 @@ OperationID BackupsWorker::startRestoring(const ASTPtr & query, ContextMutablePt
    {
        /// Something bad happened, the backup has not built.
        setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
-        sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
+        sendCurrentExceptionToCoordination(restore_coordination);
        throw;
    }
 }
@ -509,14 +595,12 @@ void BackupsWorker::doRestore(
        BackupPtr backup = BackupFactory::instance().createBackup(backup_open_params);

        String current_database = context->getCurrentDatabase();
-        String root_zk_path;
        /// Checks access rights if this is ON CLUSTER query.
        /// (If this isn't ON CLUSTER query RestorerFromBackup will check access rights later.)
        ClusterPtr cluster;
        bool on_cluster = !restore_query->cluster.empty();
        if (on_cluster)
        {
-            root_zk_path = context->getConfigRef().getString("backups.zookeeper_path", "/clickhouse/backups");
            restore_query->cluster = context->getMacros()->expand(restore_query->cluster);
            cluster = context->getCluster(restore_query->cluster);
            restore_settings.cluster_host_ids = cluster->getHostIDs();
@ -539,7 +623,7 @@ void BackupsWorker::doRestore(

        /// Make a restore coordination.
        if (!restore_coordination)
-            restore_coordination = makeRestoreCoordination(root_zk_path, toString(*restore_settings.restore_uuid), context, restore_settings.internal);
+            restore_coordination = makeRestoreCoordination(context, restore_settings, /* remote= */ on_cluster);

        if (!allow_concurrent_restores && restore_coordination->hasConcurrentRestores(std::ref(num_active_restores)))
            throw Exception(ErrorCodes::CONCURRENT_ACCESS_NOT_SUPPORTED, "Concurrent restores not supported, turn on setting 'allow_concurrent_restores'");
@ -561,9 +645,7 @@ void BackupsWorker::doRestore(
            executeDDLQueryOnCluster(restore_query, context, params);

            /// Wait until all the hosts have written their backup entries.
-            auto all_hosts = BackupSettings::Util::filterHostIDs(
-                restore_settings.cluster_host_ids, restore_settings.shard_num, restore_settings.replica_num);
-            restore_coordination->waitForStage(all_hosts, Stage::COMPLETED);
+            restore_coordination->waitForStage(Stage::COMPLETED);
        }
        else
        {
@ -578,23 +660,14 @@ void BackupsWorker::doRestore(
            }

            /// Execute the data restoring tasks.
-            restoreTablesData(std::move(data_restore_tasks), restores_thread_pool);
+            restoreTablesData(restore_id, backup, std::move(data_restore_tasks), restores_thread_pool);

            /// We have restored everything, we need to tell other hosts (they could be waiting for it).
-            restore_coordination->setStage(restore_settings.host_id, Stage::COMPLETED, "");
+            restore_coordination->setStage(Stage::COMPLETED, "");
        }

        LOG_INFO(log, "Restored from {} {} successfully", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging);
        setStatus(restore_id, BackupStatus::RESTORED);
-        setNumFilesAndSize(
-            restore_id,
-            backup->getNumFiles(),
-            backup->getTotalSize(),
-            backup->getNumEntries(),
-            backup->getUncompressedSize(),
-            backup->getCompressedSize(),
-            backup->getNumReadFiles(),
-            backup->getNumReadBytes());
    }
    catch (...)
    {
@ -603,7 +676,7 @@ void BackupsWorker::doRestore(
        {
            tryLogCurrentException(log, fmt::format("Failed to restore from {} {}", (restore_settings.internal ? "internal backup" : "backup"), backup_name_for_logging));
            setStatusSafe(restore_id, BackupStatus::RESTORE_FAILED);
-            sendCurrentExceptionToCoordination(restore_coordination, restore_settings.host_id);
+            sendCurrentExceptionToCoordination(restore_coordination);
        }
        else
        {
@ -614,6 +687,80 @@ void BackupsWorker::doRestore(
 }


+void BackupsWorker::restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool)
+{
+    size_t num_active_jobs = 0;
+    std::mutex mutex;
+    std::condition_variable event;
+    std::exception_ptr exception;
+
+    auto thread_group = CurrentThread::getGroup();
+
+    for (auto & task : tasks)
+    {
+        {
+            std::unique_lock lock{mutex};
+            if (exception)
+                break;
+            ++num_active_jobs;
+        }
+
+        auto job = [&](bool async)
+        {
+            SCOPE_EXIT_SAFE(
+                std::lock_guard lock{mutex};
+                if (!--num_active_jobs)
+                    event.notify_all();
+                if (async)
+                    CurrentThread::detachFromGroupIfNotDetached();
+            );
+
+            try
+            {
+                if (async && thread_group)
+                    CurrentThread::attachToGroup(thread_group);
+
+                if (async)
+                    setThreadName("RestoreWorker");
+
+                {
+                    std::lock_guard lock{mutex};
+                    if (exception)
+                        return;
+                }
+
+                std::move(task)();
+                setNumFilesAndSize(
+                    restore_id,
+                    backup->getNumFiles(),
+                    backup->getTotalSize(),
+                    backup->getNumEntries(),
+                    backup->getUncompressedSize(),
+                    backup->getCompressedSize(),
+                    backup->getNumReadFiles(),
+                    backup->getNumReadBytes());
+            }
+            catch (...)
+            {
+                std::lock_guard lock{mutex};
+                if (!exception)
+                    exception = std::current_exception();
+            }
+        };
+
+        if (!thread_pool.trySchedule([job] { job(true); }))
+            job(false);
+    }
+
+    {
+        std::unique_lock lock{mutex};
+        event.wait(lock, [&] { return !num_active_jobs; });
+        if (exception)
+            std::rethrow_exception(exception);
+    }
+}
+
+
 void BackupsWorker::addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status)
 {
    Info info;
--- a/src/Backups/BackupsWorker.h
+++ b/src/Backups/BackupsWorker.h
@ -17,6 +17,12 @@ struct RestoreSettings;
 struct BackupInfo;
 class IBackupCoordination;
 class IRestoreCoordination;
+class IBackup;
+using BackupMutablePtr = std::shared_ptr<IBackup>;
+using BackupPtr = std::shared_ptr<const IBackup>;
+class IBackupEntry;
+using BackupEntries = std::vector<std::pair<String, std::shared_ptr<const IBackupEntry>>>;
+using DataRestoreTasks = std::vector<std::function<void()>>;

 /// Manager of backups and restores: executes backups and restores' threads in the background.
 /// Keeps information about backups and restores started in this session.
@ -99,6 +105,9 @@ private:
        ContextMutablePtr mutable_context,
        bool called_async);

+    /// Write backup entries to an opened backup.
+    void writeBackupEntries(const OperationID & backup_id, BackupMutablePtr backup, BackupEntries && backup_entries, ThreadPool & thread_pool, bool internal);
+
    OperationID startRestoring(const ASTPtr & query, ContextMutablePtr context);

    void doRestore(
@ -111,6 +120,9 @@ private:
        ContextMutablePtr context,
        bool called_async);

+    /// Run data restoring tasks which insert data to tables.
+    void restoreTablesData(const OperationID & restore_id, BackupPtr backup, DataRestoreTasks && tasks, ThreadPool & thread_pool);
+
    void addInfo(const OperationID & id, const String & name, bool internal, BackupStatus status);
    void setStatus(const OperationID & id, BackupStatus status, bool throw_if_error = true);
    void setStatusSafe(const String & id, BackupStatus status) { setStatus(id, status, false); }
--- a/src/Backups/IBackupCoordination.h
+++ b/src/Backups/IBackupCoordination.h
@ -22,10 +22,10 @@ public:
    virtual ~IBackupCoordination() = default;

    /// Sets the current stage and waits for other hosts to come to this stage too.
-    virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
-    virtual void setError(const String & current_host, const Exception & exception) = 0;
-    virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
-    virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
+    virtual void setStage(const String & new_stage, const String & message) = 0;
+    virtual void setError(const Exception & exception) = 0;
+    virtual Strings waitForStage(const String & stage_to_wait) = 0;
+    virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;

    struct PartNameAndChecksum
    {
@ -66,12 +66,12 @@ public:
    virtual Strings getReplicatedDataPaths(const String & table_shared_id) const = 0;

    /// Adds a path to access.txt file keeping access entities of a ReplicatedAccessStorage.
-    virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id, const String & file_path) = 0;
-    virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type, const String & host_id) const = 0;
+    virtual void addReplicatedAccessFilePath(const String & access_zk_path, AccessEntityType access_entity_type, const String & file_path) = 0;
+    virtual Strings getReplicatedAccessFilePaths(const String & access_zk_path, AccessEntityType access_entity_type) const = 0;

    /// Adds a path to a directory with user-defined SQL objects inside the backup.
-    virtual void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id, const String & dir_path) = 0;
-    virtual Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & host_id) const = 0;
+    virtual void addReplicatedSQLObjectsDir(const String & loader_zk_path, UserDefinedSQLObjectType object_type, const String & dir_path) = 0;
+    virtual Strings getReplicatedSQLObjectsDirs(const String & loader_zk_path, UserDefinedSQLObjectType object_type) const = 0;

    struct FileInfo
    {
--- a/src/Backups/IRestoreCoordination.h
+++ b/src/Backups/IRestoreCoordination.h
@ -18,10 +18,10 @@ public:
    virtual ~IRestoreCoordination() = default;

    /// Sets the current stage and waits for other hosts to come to this stage too.
-    virtual void setStage(const String & current_host, const String & new_stage, const String & message) = 0;
-    virtual void setError(const String & current_host, const Exception & exception) = 0;
-    virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) = 0;
-    virtual Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;
+    virtual void setStage(const String & new_stage, const String & message) = 0;
+    virtual void setError(const Exception & exception) = 0;
+    virtual Strings waitForStage(const String & stage_to_wait) = 0;
+    virtual Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) = 0;

    static constexpr const char * kErrorStatus = "error";

--- a/src/Backups/RestoreCoordinationLocal.cpp
+++ b/src/Backups/RestoreCoordinationLocal.cpp
@ -7,20 +7,20 @@ namespace DB
 RestoreCoordinationLocal::RestoreCoordinationLocal() = default;
 RestoreCoordinationLocal::~RestoreCoordinationLocal() = default;

-void RestoreCoordinationLocal::setStage(const String &, const String &, const String &)
+void RestoreCoordinationLocal::setStage(const String &, const String &)
 {
 }

-void RestoreCoordinationLocal::setError(const String &, const Exception &)
+void RestoreCoordinationLocal::setError(const Exception &)
 {
 }

-Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &)
+Strings RestoreCoordinationLocal::waitForStage(const String &)
 {
    return {};
 }

-Strings RestoreCoordinationLocal::waitForStage(const Strings &, const String &, std::chrono::milliseconds)
+Strings RestoreCoordinationLocal::waitForStage(const String &, std::chrono::milliseconds)
 {
    return {};
 }
--- a/src/Backups/RestoreCoordinationLocal.h
+++ b/src/Backups/RestoreCoordinationLocal.h
@ -19,10 +19,10 @@ public:
    ~RestoreCoordinationLocal() override;

    /// Sets the current stage and waits for other hosts to come to this stage too.
-    void setStage(const String & current_host, const String & new_stage, const String & message) override;
-    void setError(const String & current_host, const Exception & exception) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
+    void setStage(const String & new_stage, const String & message) override;
+    void setError(const Exception & exception) override;
+    Strings waitForStage(const String & stage_to_wait) override;
+    Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;

    /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
    bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
--- a/src/Backups/RestoreCoordinationRemote.cpp
+++ b/src/Backups/RestoreCoordinationRemote.cpp
@ -11,11 +11,19 @@ namespace DB
 namespace Stage = BackupCoordinationStage;

 RestoreCoordinationRemote::RestoreCoordinationRemote(
-    const String & root_zookeeper_path_, const String & restore_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_)
-    : root_zookeeper_path(root_zookeeper_path_)
-    , zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
+    zkutil::GetZooKeeper get_zookeeper_,
+    const String & root_zookeeper_path_,
+    const String & restore_uuid_,
+    const Strings & all_hosts_,
+    const String & current_host_,
+    bool is_internal_)
+    : get_zookeeper(get_zookeeper_)
+    , root_zookeeper_path(root_zookeeper_path_)
    , restore_uuid(restore_uuid_)
-    , get_zookeeper(get_zookeeper_)
+    , zookeeper_path(root_zookeeper_path_ + "/restore-" + restore_uuid_)
+    , all_hosts(all_hosts_)
+    , current_host(current_host_)
+    , current_host_index(BackupCoordinationRemote::findCurrentHostIndex(all_hosts, current_host))
    , is_internal(is_internal_)
 {
    createRootNodes();
@ -63,22 +71,22 @@ void RestoreCoordinationRemote::createRootNodes()
 }


-void RestoreCoordinationRemote::setStage(const String & current_host, const String & new_stage, const String & message)
+void RestoreCoordinationRemote::setStage(const String & new_stage, const String & message)
 {
    stage_sync->set(current_host, new_stage, message);
 }

-void RestoreCoordinationRemote::setError(const String & current_host, const Exception & exception)
+void RestoreCoordinationRemote::setError(const Exception & exception)
 {
    stage_sync->setError(current_host, exception);
 }

-Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait)
+Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait)
 {
    return stage_sync->wait(all_hosts, stage_to_wait);
 }

-Strings RestoreCoordinationRemote::waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout)
+Strings RestoreCoordinationRemote::waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout)
 {
    return stage_sync->waitFor(all_hosts, stage_to_wait, timeout);
 }
--- a/src/Backups/RestoreCoordinationRemote.h
+++ b/src/Backups/RestoreCoordinationRemote.h
@ -11,14 +11,21 @@ namespace DB
 class RestoreCoordinationRemote : public IRestoreCoordination
 {
 public:
-    RestoreCoordinationRemote(const String & root_zookeeper_path_, const String & restore_uuid_, zkutil::GetZooKeeper get_zookeeper_, bool is_internal_);
+    RestoreCoordinationRemote(
+        zkutil::GetZooKeeper get_zookeeper_,
+        const String & root_zookeeper_path_,
+        const String & restore_uuid_,
+        const Strings & all_hosts_,
+        const String & current_host_,
+        bool is_internal_);
+
    ~RestoreCoordinationRemote() override;

    /// Sets the current stage and waits for other hosts to come to this stage too.
-    void setStage(const String & current_host, const String & new_stage, const String & message) override;
-    void setError(const String & current_host, const Exception & exception) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait) override;
-    Strings waitForStage(const Strings & all_hosts, const String & stage_to_wait, std::chrono::milliseconds timeout) override;
+    void setStage(const String & new_stage, const String & message) override;
+    void setError(const Exception & exception) override;
+    Strings waitForStage(const String & stage_to_wait) override;
+    Strings waitForStage(const String & stage_to_wait, std::chrono::milliseconds timeout) override;

    /// Starts creating a table in a replicated database. Returns false if there is another host which is already creating this table.
    bool acquireCreatingTableInReplicatedDatabase(const String & database_zk_path, const String & table_name) override;
@ -44,10 +51,13 @@ private:

    class ReplicatedDatabasesMetadataSync;

-    const String root_zookeeper_path;
-    const String zookeeper_path;
-    const String restore_uuid;
    const zkutil::GetZooKeeper get_zookeeper;
+    const String root_zookeeper_path;
+    const String restore_uuid;
+    const String zookeeper_path;
+    const Strings all_hosts;
+    const String current_host;
+    const size_t current_host_index;
    const bool is_internal;

    std::optional<BackupCoordinationStageSync> stage_sync;
--- a/src/Backups/RestorerFromBackup.cpp
+++ b/src/Backups/RestorerFromBackup.cpp
@ -150,11 +150,11 @@ void RestorerFromBackup::setStage(const String & new_stage, const String & messa

    if (restore_coordination)
    {
-        restore_coordination->setStage(restore_settings.host_id, new_stage, message);
+        restore_coordination->setStage(new_stage, message);
        if (new_stage == Stage::FINDING_TABLES_IN_BACKUP)
-            restore_coordination->waitForStage(all_hosts, new_stage, on_cluster_first_sync_timeout);
+            restore_coordination->waitForStage(new_stage, on_cluster_first_sync_timeout);
        else
-            restore_coordination->waitForStage(all_hosts, new_stage);
+            restore_coordination->waitForStage(new_stage);
    }
 }

--- a/src/Bridge/IBridge.cpp
+++ b/src/Bridge/IBridge.cpp
@ -4,21 +4,22 @@
 #include <Poco/Net/NetException.h>
 #include <Poco/Util/HelpFormatter.h>

-#include <base/range.h>
-
-#include <Common/StringUtils/StringUtils.h>
 #include <Common/SensitiveDataMasker.h>
-#include "config.h"
+#include <Common/StringUtils/StringUtils.h>
 #include <Common/logger_useful.h>
-#include <base/errnoToString.h>
-#include <IO/ReadHelpers.h>
 #include <Formats/registerFormats.h>
-#include <Server/HTTP/HTTPServer.h>
+#include <IO/ReadHelpers.h>
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
+#include <Server/HTTP/HTTPServer.h>
+#include <base/errnoToString.h>
+#include <base/range.h>
+
 #include <sys/time.h>
 #include <sys/resource.h>

+#include "config.h"
+
 #if USE_ODBC
 #    include <Poco/Data/ODBC/Connector.h>
 #endif
@ -89,7 +90,7 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options)
        Poco::Util::Option("listen-host", "", "hostname or address to listen, default 127.0.0.1").argument("listen-host").binding("listen-host"));

    options.addOption(
-        Poco::Util::Option("http-timeout", "", "http timeout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
+        Poco::Util::Option("http-timeout", "", "http timeout for socket, default 180").argument("http-timeout").binding("http-timeout"));

    options.addOption(
        Poco::Util::Option("max-server-connections", "", "max connections to server, default 1024").argument("max-server-connections").binding("max-server-connections"));
@ -97,6 +98,9 @@ void IBridge::defineOptions(Poco::Util::OptionSet & options)
    options.addOption(
        Poco::Util::Option("keep-alive-timeout", "", "keepalive timeout, default 10").argument("keep-alive-timeout").binding("keep-alive-timeout"));

+    options.addOption(
+        Poco::Util::Option("http-max-field-value-size", "", "max http field value size, default 1048576").argument("http-max-field-value-size").binding("http-max-field-value-size"));
+
    options.addOption(
        Poco::Util::Option("log-level", "", "sets log level, default info") .argument("log-level").binding("logger.level"));

@ -165,6 +169,7 @@ void IBridge::initialize(Application & self)
    http_timeout = config().getUInt64("http-timeout", DEFAULT_HTTP_READ_BUFFER_TIMEOUT);
    max_server_connections = config().getUInt("max-server-connections", 1024);
    keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);
+    http_max_field_value_size = config().getUInt64("http-max-field-value-size", 1048576);

    struct rlimit limit;
    const UInt64 gb = 1024 * 1024 * 1024;
@ -226,6 +231,10 @@ int IBridge::main(const std::vector<std::string> & /*args*/)
    auto context = Context::createGlobal(shared_context.get());
    context->makeGlobalContext();

+    auto settings = context->getSettings();
+    settings.set("http_max_field_value_size", http_max_field_value_size);
+    context->setSettings(settings);
+
    if (config().has("query_masking_rules"))
        SensitiveDataMasker::setInstance(std::make_unique<SensitiveDataMasker>(config(), "query_masking_rules"));

--- a/src/Bridge/IBridge.h
+++ b/src/Bridge/IBridge.h
@ -45,6 +45,7 @@ private:
    std::string log_level;
    unsigned max_server_connections;
    size_t http_timeout;
+    size_t http_max_field_value_size;

    Poco::Logger * log;
 };
--- a/src/BridgeHelper/IBridgeHelper.cpp
+++ b/src/BridgeHelper/IBridgeHelper.cpp
@ -67,6 +67,8 @@ std::unique_ptr<ShellCommand> IBridgeHelper::startBridgeCommand()
    cmd_args.push_back(config.getString(configPrefix() + ".listen_host", DEFAULT_HOST));
    cmd_args.push_back("--http-timeout");
    cmd_args.push_back(std::to_string(getHTTPTimeout().totalMicroseconds()));
+    cmd_args.push_back("--http-max-field-value-size");
+    cmd_args.push_back("99999999999999999"); // something "big" to accept large datasets (issue 47616)
    if (config.has("logger." + configPrefix() + "_log"))
    {
        cmd_args.push_back("--log-path");
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -1131,6 +1131,8 @@ void ClientBase::onProfileEvents(Block & block)
        {
            if (profile_events.watch.elapsedMilliseconds() >= profile_events.delay_ms)
            {
+                /// We need to restart the watch each time we flushed these events
+                profile_events.watch.restart();
                initLogsOutputStream();
                if (need_render_progress && tty_buf)
                    progress_indication.clearProgressOutput(*tty_buf);
@ -1144,7 +1146,6 @@ void ClientBase::onProfileEvents(Block & block)
                incrementProfileEventsBlock(profile_events.last_block, block);
            }
        }
-        profile_events.watch.restart();
    }
 }

--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@ -24,7 +24,6 @@
 #include <IO/Operators.h>
 #include <IO/UseSSL.h>
 #include <IO/WriteBufferFromOStream.h>
-#include <Parsers/ASTExplainQuery.h>
 #include <Parsers/ASTExpressionList.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
@ -684,43 +683,76 @@ void QueryFuzzer::fuzzTableName(ASTTableExpression & table)

 void QueryFuzzer::fuzzExplainQuery(ASTExplainQuery & explain)
 {
-    /// Fuzz ExplainKind
+    explain.setExplainKind(fuzzExplainKind(explain.getKind()));
+
+    bool settings_have_fuzzed = false;
+    for (auto & child : explain.children)
+    {
+        if (auto * settings_ast = typeid_cast<ASTSetQuery *>(child.get()))
+        {
+            fuzzExplainSettings(*settings_ast, explain.getKind());
+            settings_have_fuzzed = true;
+        }
+        /// Fuzzing other child like Explain Query
+        else
+        {
+            fuzz(child);
+        }
+    }
+
+    if (!settings_have_fuzzed)
+    {
+        auto settings_ast = std::make_shared<ASTSetQuery>();
+        settings_ast->is_standalone = false;
+        fuzzExplainSettings(*settings_ast, explain.getKind());
+        explain.setSettings(settings_ast);
+    }
+}
+
+ASTExplainQuery::ExplainKind QueryFuzzer::fuzzExplainKind(ASTExplainQuery::ExplainKind kind)
+{
    if (fuzz_rand() % 20 == 0)
    {
-        /// Do not modify ExplainKind
+        return kind;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::ParsedAST);
+        return ASTExplainQuery::ExplainKind::ParsedAST;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::AnalyzedSyntax);
+        return ASTExplainQuery::ExplainKind::AnalyzedSyntax;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryTree);
+        return ASTExplainQuery::ExplainKind::QueryTree;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryPlan);
+        return ASTExplainQuery::ExplainKind::QueryPlan;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryPipeline);
+        return ASTExplainQuery::ExplainKind::QueryPipeline;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::QueryEstimates);
+        return ASTExplainQuery::ExplainKind::QueryEstimates;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::TableOverride);
+        return ASTExplainQuery::ExplainKind::TableOverride;
    }
    else if (fuzz_rand() % 11 == 0)
    {
-        explain.setExplainKind(ASTExplainQuery::ExplainKind::CurrentTransaction);
+        return ASTExplainQuery::ExplainKind::CurrentTransaction;
    }
+    return kind;
+}
+
+void QueryFuzzer::fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind)
+{
+    auto & changes = settings_ast.changes;

    static const std::unordered_map<ASTExplainQuery::ExplainKind, std::vector<String>> settings_by_kind
        = {{ASTExplainQuery::ExplainKind::ParsedAST, {"graph", "optimize"}},
@ -732,44 +764,17 @@ void QueryFuzzer::fuzzExplainQuery(ASTExplainQuery & explain)
           {ASTExplainQuery::ExplainKind::TableOverride, {}},
           {ASTExplainQuery::ExplainKind::CurrentTransaction, {}}};

-    const auto & settings = settings_by_kind.at(explain.getKind());
-    bool settings_have_fuzzed = false;
-    for (auto & child : explain.children)
-    {
-        if (auto * settings_ast = typeid_cast<ASTSetQuery *>(child.get()))
-        {
-            fuzzExplainSettings(*settings_ast, settings);
-            settings_have_fuzzed = true;
-        }
-        /// Fuzz other child like Explain Query
-        else
-        {
-            fuzz(child);
-        }
-    }
-
-    if (!settings_have_fuzzed && !settings.empty())
-    {
-        auto settings_ast = std::make_shared<ASTSetQuery>();
-        fuzzExplainSettings(*settings_ast, settings);
-        explain.setSettings(settings_ast);
-    }
-}
-
-void QueryFuzzer::fuzzExplainSettings(ASTSetQuery & settings, const std::vector<String> & names)
-{
-    auto & changes = settings.changes;
-
+    const auto & settings = settings_by_kind.at(kind);
    if (fuzz_rand() % 50 == 0 && !changes.empty())
    {
        changes.erase(changes.begin() + fuzz_rand() % changes.size());
    }

-    for (const auto & name : names)
+    for (const auto & setting : settings)
    {
        if (fuzz_rand() % 5 == 0)
        {
-            changes.emplace_back(name, true);
+            changes.emplace_back(setting, true);
        }
    }
 }
@ -910,6 +915,20 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
    if (auto * with_union = typeid_cast<ASTSelectWithUnionQuery *>(ast.get()))
    {
        fuzz(with_union->list_of_selects);
+        /// Fuzzing SELECT query to EXPLAIN query randomly.
+        /// And we only fuzzing the root query into an EXPLAIN query, not fuzzing subquery
+        if (fuzz_rand() % 20 == 0 && current_ast_depth <= 1)
+        {
+            auto explain = std::make_shared<ASTExplainQuery>(fuzzExplainKind());
+
+            auto settings_ast = std::make_shared<ASTSetQuery>();
+            settings_ast->is_standalone = false;
+            fuzzExplainSettings(*settings_ast, explain->getKind());
+            explain->setSettings(settings_ast);
+
+            explain->setExplainedQuery(ast);
+            ast = explain;
+        }
    }
    else if (auto * with_intersect_except = typeid_cast<ASTSelectIntersectExceptQuery *>(ast.get()))
    {
@ -1086,7 +1105,17 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
    }
    else if (auto * explain_query = typeid_cast<ASTExplainQuery *>(ast.get()))
    {
-        fuzzExplainQuery(*explain_query);
+        /// Fuzzing EXPLAIN query to SELECT query randomly
+        if (fuzz_rand() % 20 == 0 && explain_query->getExplainedQuery()->getQueryKind() == IAST::QueryKind::Select)
+        {
+            auto select_query = explain_query->getExplainedQuery()->clone();
+            fuzz(select_query);
+            ast = select_query;
+        }
+        else
+        {
+            fuzzExplainQuery(*explain_query);
+        }
    }
    else
    {
--- a/src/Client/QueryFuzzer.h
+++ b/src/Client/QueryFuzzer.h
@ -7,10 +7,11 @@

 #include <pcg-random/pcg_random.hpp>

+#include <Core/Field.h>
+#include <Parsers/ASTExplainQuery.h>
+#include <Parsers/IAST.h>
 #include <Common/randomSeed.h>
 #include "Parsers/IAST_fwd.h"
-#include <Core/Field.h>
-#include <Parsers/IAST.h>


 namespace DB
@ -22,7 +23,6 @@ class ASTCreateQuery;
 class ASTInsertQuery;
 class ASTColumnDeclaration;
 class ASTDropQuery;
-class ASTExplainQuery;
 class ASTSetQuery;
 struct ASTTableExpression;
 struct ASTWindowDefinition;
@ -89,7 +89,8 @@ struct QueryFuzzer
    void fuzzWindowFrame(ASTWindowDefinition & def);
    void fuzzCreateQuery(ASTCreateQuery & create);
    void fuzzExplainQuery(ASTExplainQuery & explain);
-    void fuzzExplainSettings(ASTSetQuery & settings, const std::vector<String> & names);
+    ASTExplainQuery::ExplainKind fuzzExplainKind(ASTExplainQuery::ExplainKind kind = ASTExplainQuery::ExplainKind::QueryPipeline);
+    void fuzzExplainSettings(ASTSetQuery & settings_ast, ASTExplainQuery::ExplainKind kind);
    void fuzzColumnDeclaration(ASTColumnDeclaration & column);
    void fuzzTableName(ASTTableExpression & table);
    void fuzz(ASTs & asts);
--- a/src/Common/OptimizedRegularExpression.cpp
+++ b/src/Common/OptimizedRegularExpression.cpp
@ -1,3 +1,4 @@
+#include <limits>
 #include <Common/Exception.h>
 #include <Common/PODArray.h>
 #include <Common/OptimizedRegularExpression.h>
@ -14,13 +15,40 @@ namespace DB
    }
 }

+namespace
+{

-template <bool thread_safe>
-void OptimizedRegularExpressionImpl<thread_safe>::analyze(
+struct Literal
+{
+    std::string literal;
+    bool prefix; /// this literal string is the prefix of the whole string.
+    bool suffix; /// this literal string is the suffix of the whole string.
+    void clear()
+    {
+        literal.clear();
+        prefix = false;
+        suffix = false;
+    }
+};
+
+using Literals = std::vector<Literal>;
+
+size_t shortest_literal_length(const Literals & literals)
+{
+    if (literals.empty()) return 0;
+    size_t shortest = std::numeric_limits<size_t>::max();
+    for (const auto & lit : literals)
+        if (shortest > lit.literal.size())
+            shortest = lit.literal.size();
+    return shortest;
+}
+
+const char * analyzeImpl(
    std::string_view regexp,
-    std::string & required_substring,
+    const char * pos,
+    Literal & required_substring,
    bool & is_trivial,
-    bool & required_substring_is_prefix)
+    Literals & global_alternatives)
 {
    /** The expression is trivial if all the metacharacters in it are escaped.
      * The non-alternative string is
@ -30,12 +58,11 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
      *  and also avoid substrings of the form `http://` or `www` and some other
      *   (this is the hack for typical use case in web analytics applications).
      */
-    const char * begin = regexp.data();
-    const char * pos = begin;
+    const char * begin = pos;
    const char * end = regexp.data() + regexp.size();
+    bool is_first_call = begin == regexp.data();
    int depth = 0;
    is_trivial = true;
-    required_substring_is_prefix = false;
    required_substring.clear();
    bool has_alternative_on_depth_0 = false;
    bool has_case_insensitive_flag = false;
@ -47,6 +74,80 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
    Substrings trivial_substrings(1);
    Substring * last_substring = &trivial_substrings.back();

+    Literals cur_alternatives;
+
+    auto finish_cur_alternatives = [&]()
+    {
+        if (cur_alternatives.empty())
+            return;
+
+        if (global_alternatives.empty())
+        {
+            global_alternatives = cur_alternatives;
+            cur_alternatives.clear();
+            return;
+        }
+        /// that means current alternatives have better quality.
+        if (shortest_literal_length(global_alternatives) < shortest_literal_length(cur_alternatives))
+        {
+            global_alternatives.clear();
+            global_alternatives = cur_alternatives;
+        }
+        cur_alternatives.clear();
+    };
+
+    auto finish_non_trivial_char = [&](bool create_new_substr = true)
+    {
+        if (depth != 0)
+            return;
+
+        for (auto & alter : cur_alternatives)
+        {
+            if (alter.suffix)
+            {
+                alter.literal += last_substring->first;
+            }
+        }
+
+        finish_cur_alternatives();
+
+        if (!last_substring->first.empty() && create_new_substr)
+        {
+            trivial_substrings.resize(trivial_substrings.size() + 1);
+            last_substring = &trivial_substrings.back();
+        }
+    };
+
+    /// Resolve the string or alters in a group (xxxxx)
+    auto finish_group = [&](Literal & group_required_string, Literals & group_alternatives)
+    {
+        for (auto & alter : group_alternatives)
+        {
+            if (alter.prefix)
+            {
+                alter.literal = last_substring->first + alter.literal;
+            }
+        }
+
+        if (group_required_string.prefix)
+            last_substring->first += group_required_string.literal;
+        else
+        {
+            finish_non_trivial_char();
+            last_substring->first = group_required_string.literal;
+        }
+        /// if we can still append, no need to finish it. e.g. abc(de)fg should capture abcdefg
+        if (!last_substring->first.empty() && !group_required_string.suffix)
+        {
+            trivial_substrings.resize(trivial_substrings.size() + 1);
+            last_substring = &trivial_substrings.back();
+        }
+
+        /// assign group alters to current alters.
+        finish_cur_alternatives();
+        cur_alternatives = std::move(group_alternatives);
+    };
+
    bool in_curly_braces = false;
    bool in_square_braces = false;

@ -73,25 +174,19 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                    case '$':
                    case '.':
                    case '[':
+                    case ']':
                    case '?':
                    case '*':
                    case '+':
+                    case '-':
                    case '{':
-                        if (depth == 0 && !in_curly_braces && !in_square_braces)
-                        {
-                            if (last_substring->first.empty())
-                                last_substring->second = pos - begin;
-                            last_substring->first.push_back(*pos);
-                        }
-                        break;
+                    case '}':
+                    case '/':
+                        goto ordinary;
                    default:
                        /// all other escape sequences are not supported
                        is_trivial = false;
-                        if (!last_substring->first.empty())
-                        {
-                            trivial_substrings.resize(trivial_substrings.size() + 1);
-                            last_substring = &trivial_substrings.back();
-                        }
+                        finish_non_trivial_char();
                        break;
                }

@ -100,28 +195,19 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
            }

            case '|':
-                if (depth == 0)
-                    has_alternative_on_depth_0 = true;
                is_trivial = false;
-                if (!in_square_braces && !last_substring->first.empty())
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
                ++pos;
+                if (depth == 0)
+                {
+                    has_alternative_on_depth_0 = true;
+                    goto finish;
+                }
                break;

            case '(':
+                is_trivial = false;
                if (!in_square_braces)
                {
-                    ++depth;
-                    is_trivial = false;
-                    if (!last_substring->first.empty())
-                    {
-                        trivial_substrings.resize(trivial_substrings.size() + 1);
-                        last_substring = &trivial_substrings.back();
-                    }
-
                    /// Check for case-insensitive flag.
                    if (pos + 1 < end && pos[1] == '?')
                    {
@ -143,6 +229,28 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                                break;
                        }
                    }
+                    if (pos + 2 < end && pos[1] == '?' && pos[2] == ':')
+                    {
+                        pos += 2;
+                    }
+                    Literal group_required_substr;
+                    bool group_is_trival = true;
+                    Literals group_alters;
+                    pos = analyzeImpl(regexp, pos + 1, group_required_substr, group_is_trival, group_alters);
+                    /// pos should be ')', if not, then it is not a valid regular expression
+                    if (pos == end)
+                        return pos;
+
+                    /// For ()? or ()* or (){0,1}, we can just ignore the whole group.
+                    if ((pos + 1 < end && (pos[1] == '?' || pos[1] == '*')) ||
+                        (pos + 2 < end && pos[1] == '{' && pos[2] == '0'))
+                    {
+                        finish_non_trivial_char();
+                    }
+                    else
+                    {
+                        finish_group(group_required_substr, group_alters);
+                    }
                }
                ++pos;
                break;
@ -151,11 +259,7 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                in_square_braces = true;
                ++depth;
                is_trivial = false;
-                if (!last_substring->first.empty())
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
+                finish_non_trivial_char();
                ++pos;
                break;

@ -163,38 +267,25 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                if (!in_square_braces)
                    goto ordinary;

-                in_square_braces = false;
                --depth;
+                if (depth == 0)
+                    in_square_braces = false;
                is_trivial = false;
-                if (!last_substring->first.empty())
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
+                finish_non_trivial_char();
                ++pos;
                break;

            case ')':
                if (!in_square_braces)
                {
-                    --depth;
-                    is_trivial = false;
-                    if (!last_substring->first.empty())
-                    {
-                        trivial_substrings.resize(trivial_substrings.size() + 1);
-                        last_substring = &trivial_substrings.back();
-                    }
+                    goto finish;
                }
                ++pos;
                break;

            case '^': case '$': case '.': case '+':
                is_trivial = false;
-                if (!last_substring->first.empty() && !in_square_braces)
-                {
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
-                }
+                finish_non_trivial_char();
                ++pos;
                break;

@ -206,12 +297,11 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                [[fallthrough]];
            case '*':
                is_trivial = false;
-                if (!last_substring->first.empty() && !in_square_braces)
+                if (depth == 0 && !last_substring->first.empty() && !in_square_braces)
                {
                    last_substring->first.resize(last_substring->first.size() - 1);
-                    trivial_substrings.resize(trivial_substrings.size() + 1);
-                    last_substring = &trivial_substrings.back();
                }
+                finish_non_trivial_char();
                ++pos;
                break;

@ -236,13 +326,15 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                break;
        }
    }
+finish:

-    if (last_substring && last_substring->first.empty())
-        trivial_substrings.pop_back();
+    finish_non_trivial_char(false);

    if (!is_trivial)
    {
-        if (!has_alternative_on_depth_0 && !has_case_insensitive_flag)
+        /// we calculate required substring even though has_alternative_on_depth_0.
+        /// we will clear the required substring after putting it to alternatives.
+        if (!has_case_insensitive_flag)
        {
            /// We choose the non-alternative substring of the maximum length for first search.

@ -262,19 +354,45 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
                }
            }

-            if (max_length >= MIN_LENGTH_FOR_STRSTR)
+            if (max_length >= MIN_LENGTH_FOR_STRSTR || (!is_first_call && max_length > 0))
            {
-                required_substring = candidate_it->first;
-                required_substring_is_prefix = candidate_it->second == 0;
+                required_substring.literal = candidate_it->first;
+                required_substring.prefix = candidate_it->second == 0;
+                required_substring.suffix = candidate_it + 1 == trivial_substrings.end();
            }
        }
    }
    else if (!trivial_substrings.empty())
    {
-        required_substring = trivial_substrings.front().first;
-        required_substring_is_prefix = trivial_substrings.front().second == 0;
+        required_substring.literal = trivial_substrings.front().first;
+        required_substring.prefix = trivial_substrings.front().second == 0;
+        required_substring.suffix = true;
    }

+    /// if it is xxx|xxx|xxx, we should call the next xxx|xxx recursively and collect the result.
+    if (has_alternative_on_depth_0)
+    {
+        /// compare the quality of required substring and alternatives and choose the better one.
+        if (shortest_literal_length(global_alternatives) < required_substring.literal.size())
+            global_alternatives = {required_substring};
+        Literals next_alternatives;
+        /// this two vals are useless, xxx|xxx cannot be trivial nor prefix.
+        bool next_is_trivial = true;
+        pos = analyzeImpl(regexp, pos, required_substring, next_is_trivial, next_alternatives);
+        /// For xxx|xxx|xxx, we only conbine the alternatives and return a empty required_substring.
+        if (next_alternatives.empty() || shortest_literal_length(next_alternatives) < required_substring.literal.size())
+        {
+            global_alternatives.push_back(required_substring);
+        }
+        else
+        {
+            global_alternatives.insert(global_alternatives.end(), next_alternatives.begin(), next_alternatives.end());
+        }
+        required_substring.clear();
+    }
+
+    return pos;
+
 /*    std::cerr
        << "regexp: " << regexp
        << ", is_trivial: " << is_trivial
@ -282,12 +400,31 @@ void OptimizedRegularExpressionImpl<thread_safe>::analyze(
        << ", required_substring_is_prefix: " << required_substring_is_prefix
        << std::endl;*/
 }
+}

+template <bool thread_safe>
+void OptimizedRegularExpressionImpl<thread_safe>::analyze(
+        std::string_view regexp_,
+        std::string & required_substring,
+        bool & is_trivial,
+        bool & required_substring_is_prefix,
+        std::vector<std::string> & alternatives)
+{
+    Literals alternative_literals;
+    Literal required_literal;
+    analyzeImpl(regexp_, regexp_.data(), required_literal, is_trivial, alternative_literals);
+    required_substring = std::move(required_literal.literal);
+    required_substring_is_prefix = required_literal.prefix;
+    for (auto & lit : alternative_literals)
+        alternatives.push_back(std::move(lit.literal));
+}

 template <bool thread_safe>
 OptimizedRegularExpressionImpl<thread_safe>::OptimizedRegularExpressionImpl(const std::string & regexp_, int options)
 {
-    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix);
+    std::vector<std::string> alternativesDummy; /// this vector extracts patterns a,b,c from pattern (a|b|c). for now it's not used.
+    analyze(regexp_, required_substring, is_trivial, required_substring_is_prefix, alternativesDummy);
+

    /// Just three following options are supported
    if (options & (~(RE_CASELESS | RE_NO_CAPTURE | RE_DOT_NL)))
--- a/src/Common/OptimizedRegularExpression.h
+++ b/src/Common/OptimizedRegularExpression.h
@ -95,6 +95,15 @@ public:
        out_required_substring_is_prefix = required_substring_is_prefix;
    }

+    /// analyze function will extract the longest string literal or multiple alternative string literals from regexp for pre-checking if
+    /// a string contains the string literal(s). If not, we can tell this string can never match the regexp.
+    static void analyze(
+        std::string_view regexp_,
+        std::string & required_substring,
+        bool & is_trivial,
+        bool & required_substring_is_prefix,
+        std::vector<std::string> & alternatives);
+
 private:
    bool is_trivial;
    bool required_substring_is_prefix;
@ -104,8 +113,6 @@ private:
    std::optional<DB::ASCIICaseInsensitiveStringSearcher> case_insensitive_substring_searcher;
    std::unique_ptr<RegexType> re2;
    unsigned number_of_subpatterns;
-
-    static void analyze(std::string_view regexp_, std::string & required_substring, bool & is_trivial, bool & required_substring_is_prefix);
 };

 using OptimizedRegularExpression = OptimizedRegularExpressionImpl<true>;
--- a/src/Common/mysqlxx/Exception.cpp
+++ b/src/Common/mysqlxx/Exception.cpp
@ -10,9 +10,11 @@
 namespace mysqlxx
 {

-std::string errorMessage(MYSQL * driver)
+std::string errorMessage(MYSQL * driver, const std::string & query)
 {
-    return fmt::format("{} ({}:{})", mysql_error(driver), driver->host ? driver->host : "(nullptr)", driver->port);
+    return fmt::format("{}{} ({}:{})", mysql_error(driver),
+        query.empty() ? "" : " while executing query: '" + query + "'",
+        driver->host ? driver->host : "(nullptr)", driver->port);
 }

 void checkError(MYSQL * driver)
--- a/src/Common/mysqlxx/Query.cpp
+++ b/src/Common/mysqlxx/Query.cpp
@ -64,7 +64,7 @@ void Query::executeImpl()
        case CR_SERVER_LOST:
            throw ConnectionLost(errorMessage(mysql_driver), err_no);
        default:
-            throw BadQuery(errorMessage(mysql_driver), err_no);
+            throw BadQuery(errorMessage(mysql_driver, query), err_no);
        }
    }
 }
--- a/src/Common/mysqlxx/Value.cpp
+++ b/src/Common/mysqlxx/Value.cpp
@ -160,14 +160,16 @@ void Value::throwException(const char * text) const

    if (!isNull())
    {
-        info.append(": ");
+        info.append(": '");
        info.append(m_data, m_length);
+        info.append("'");
    }

    if (res && res->getQuery())
    {
-        info.append(", query: ");
+        info.append(", query: '");
        info.append(res->getQuery()->str().substr(0, preview_length));
+        info.append("'");
    }

    throw CannotParseValue(info);
--- a/src/Common/mysqlxx/mysqlxx/Exception.h
+++ b/src/Common/mysqlxx/mysqlxx/Exception.h
@ -53,7 +53,7 @@ struct CannotParseValue : public Exception
 };


-std::string errorMessage(MYSQL * driver);
+std::string errorMessage(MYSQL * driver, const std::string & query = "");

 /// For internal need of library.
 void checkError(MYSQL * driver);
--- a/src/Common/tests/gtest_global_register.h
+++ b/src/Common/tests/gtest_global_register.h
@ -1,8 +1,13 @@
 #pragma once

 #include <Functions/registerFunctions.h>
+#include <AggregateFunctions/registerAggregateFunctions.h>
 #include <Formats/registerFormats.h>

+inline void tryRegisterAggregateFunctions()
+{
+    static struct Register { Register() { DB::registerAggregateFunctions(); } } registered;
+}

 inline void tryRegisterFunctions()
 {
--- a/src/Common/tests/gtest_optimize_re.cpp
+++ b/src/Common/tests/gtest_optimize_re.cpp
@ -0,0 +1,46 @@
+#include <gtest/gtest.h>
+
+#include <Common/OptimizedRegularExpression.h>
+
+TEST(OptimizeRE, analyze)
+{
+    auto test_f = [](const std::string & regexp, const std::string & answer, std::vector<std::string> expect_alternatives = {}, bool trival_expected = false)
+    {
+        std::string required;
+        bool is_trivial;
+        bool is_prefix;
+        std::vector<std::string> alternatives;
+        OptimizedRegularExpression::analyze(regexp, required, is_trivial, is_prefix, alternatives);
+        std::cerr << regexp << std::endl;
+        EXPECT_EQ(required, answer);
+        EXPECT_EQ(alternatives, expect_alternatives);
+        EXPECT_EQ(is_trivial, trival_expected);
+    };
+    test_f("abc", "abc", {}, true);
+    test_f("c([^k]*)de", "");
+    test_f("abc(de)fg", "abcdefg");
+    test_f("abc(de|xyz)fg", "abc", {"abcdefg", "abcxyzfg"});
+    test_f("abc(de?f|xyz)fg", "abc", {"abcd", "abcxyzfg"});
+    test_f("abc|fgk|xyz", "", {"abc","fgk", "xyz"});
+    test_f("(abc)", "abc");
+    test_f("(abc|fgk)", "", {"abc","fgk"});
+    test_f("(abc|fgk)(e|f|zkh|)", "", {"abc","fgk"});
+    test_f("abc(abc|fg)xyzz", "xyzz", {"abcabcxyzz","abcfgxyzz"});
+    test_f("abc[k]xyzz", "xyzz");
+    test_f("(abc[k]xyzz)", "xyzz");
+    test_f("abc((de)fg(hi))jk", "abcdefghijk");
+    test_f("abc((?:de)fg(?:hi))jk", "abcdefghijk");
+    test_f("abc((de)fghi+zzz)jk", "abcdefghi");
+    test_f("abc((de)fg(hi))?jk", "abc");
+    test_f("abc((de)fghi?zzz)jk", "abcdefgh");
+    test_f("abc(*cd)jk", "cdjk");
+    test_f(R"(abc(de|xyz|(\{xx\}))fg)", "abc", {"abcdefg", "abcxyzfg", "abc{xx}fg"});
+    test_f("abc(abc|fg)?xyzz", "xyzz");
+    test_f("abc(abc|fg){0,1}xyzz", "xyzz");
+    test_f("abc(abc|fg)xyzz|bcdd?k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bc"});
+    test_f("abc(abc|fg)xyzz|bc(dd?x|kk?y|(f))k|bc(f|g|h?)z", "", {"abcabcxyzz", "abcfgxyzz", "bcd", "bck", "bcfk", "bc"});
+    test_f("((?:abc|efg|xyz)/[a-zA-Z0-9]{1-50})(/?[^ ]*|)", "", {"abc/", "efg/", "xyz/"});
+    test_f(R"([Bb]ai[Dd]u[Ss]pider(?:-[A-Za-z]{1,30})(?:-[A-Za-z]{1,30}|)|bingbot|\bYeti(?:-[a-z]{1,30}|)|Catchpoint(?: bot|)|[Cc]harlotte|Daumoa(?:-feedfetcher|)|(?:[a-zA-Z]{1,30}-|)Googlebot(?:-[a-zA-Z]{1,30}|))", "", {"pider-", "bingbot", "Yeti-", "Yeti", "Catchpoint bot", "Catchpoint", "harlotte", "Daumoa-feedfetcher", "Daumoa", "-Googlebot", "Googlebot"});
+    test_f("abc|(:?xx|yy|zz|x?)def", "", {"abc", "def"});
+    test_f("abc|(:?xx|yy|zz|x?){1,2}def", "", {"abc", "def"});
+}
--- a/src/Coordination/CoordinationSettings.cpp
+++ b/src/Coordination/CoordinationSettings.cpp
@ -140,6 +140,8 @@ void KeeperConfigurationAndSettings::dump(WriteBufferFromOwnString & buf) const

    writeText("max_requests_batch_size=", buf);
    write_int(coordination_settings->max_requests_batch_size);
+    writeText("max_requests_batch_bytes_size=", buf);
+    write_int(coordination_settings->max_requests_batch_bytes_size);
    writeText("max_request_queue_size=", buf);
    write_int(coordination_settings->max_request_queue_size);
    writeText("max_requests_quick_batch_size=", buf);
--- a/src/Coordination/CoordinationSettings.h
+++ b/src/Coordination/CoordinationSettings.h
@ -39,7 +39,8 @@ struct Settings;
    M(UInt64, fresh_log_gap, 200, "When node became fresh", 0) \
    M(UInt64, max_request_queue_size, 100000, "Maximum number of request that can be in queue for processing", 0) \
    M(UInt64, max_requests_batch_size, 100, "Max size of batch of requests that can be sent to RAFT", 0) \
-    M(UInt64, max_requests_quick_batch_size, 10, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
+    M(UInt64, max_requests_batch_bytes_size, 100*1024, "Max size in bytes of batch of requests that can be sent to RAFT", 0) \
+    M(UInt64, max_requests_quick_batch_size, 100, "Max size of batch of requests to try to get before proceeding with RAFT. Keeper will not wait for requests but take only requests that are already in queue" , 0) \
    M(Bool, quorum_reads, false, "Execute read requests as writes through whole RAFT consesus with similar speed", 0) \
    M(Bool, force_sync, true, "Call fsync on each change in RAFT changelog", 0) \
    M(Bool, compress_logs, true, "Write compressed coordination logs in ZSTD format", 0) \
--- a/src/Coordination/KeeperDispatcher.cpp
+++ b/src/Coordination/KeeperDispatcher.cpp
@ -73,6 +73,7 @@ void KeeperDispatcher::requestThread()
        auto coordination_settings = configuration_and_settings->coordination_settings;
        uint64_t max_wait = coordination_settings->operation_timeout_ms.totalMilliseconds();
        uint64_t max_batch_size = coordination_settings->max_requests_batch_size;
+        uint64_t max_batch_bytes_size = coordination_settings->max_requests_batch_bytes_size;

        /// The code below do a very simple thing: batch all write (quorum) requests into vector until
        /// previous write batch is not finished or max_batch size achieved. The main complexity goes from
@ -89,6 +90,7 @@ void KeeperDispatcher::requestThread()
                    break;

                KeeperStorage::RequestsForSessions current_batch;
+                size_t current_batch_bytes_size = 0;

                bool has_read_request = false;

@ -96,6 +98,7 @@ void KeeperDispatcher::requestThread()
                /// Otherwise we will process it locally.
                if (coordination_settings->quorum_reads || !request.request->isReadRequest())
                {
+                    current_batch_bytes_size += request.request->bytesSize();
                    current_batch.emplace_back(request);

                    const auto try_get_request = [&]
@ -108,7 +111,10 @@ void KeeperDispatcher::requestThread()
                            if (!coordination_settings->quorum_reads && request.request->isReadRequest())
                                has_read_request = true;
                            else
+                            {
+                                current_batch_bytes_size += request.request->bytesSize();
                                current_batch.emplace_back(request);
+                            }

                            return true;
                        }
@ -116,9 +122,11 @@ void KeeperDispatcher::requestThread()
                        return false;
                    };

-                    /// If we have enough requests in queue, we will try to batch at least max_quick_batch_size of them.
+                    /// TODO: Deprecate max_requests_quick_batch_size and use only max_requests_batch_size and max_requests_batch_bytes_size
                    size_t max_quick_batch_size = coordination_settings->max_requests_quick_batch_size;
-                    while (!shutdown_called && !has_read_request && current_batch.size() < max_quick_batch_size && try_get_request())
+                    while (!shutdown_called && !has_read_request &&
+                        current_batch.size() < max_quick_batch_size && current_batch_bytes_size < max_batch_bytes_size &&
+                        try_get_request())
                        ;

                    const auto prev_result_done = [&]
@ -129,7 +137,8 @@ void KeeperDispatcher::requestThread()
                    };

                    /// Waiting until previous append will be successful, or batch is big enough
-                    while (!shutdown_called && !has_read_request && !prev_result_done() && current_batch.size() <= max_batch_size)
+                    while (!shutdown_called && !has_read_request && !prev_result_done() &&
+                        current_batch.size() <= max_batch_size && current_batch_bytes_size < max_batch_bytes_size)
                    {
                        try_get_request();
                    }
@ -147,6 +156,8 @@ void KeeperDispatcher::requestThread()
                /// Process collected write requests batch
                if (!current_batch.empty())
                {
+                    LOG_TRACE(log, "Processing requests batch, size: {}, bytes: {}", current_batch.size(), current_batch_bytes_size);
+
                    auto result = server->putRequestBatch(current_batch);

                    if (result)
@ -158,6 +169,7 @@ void KeeperDispatcher::requestThread()
                    {
                        addErrorResponses(current_batch, Coordination::Error::ZCONNECTIONLOSS);
                        current_batch.clear();
+                        current_batch_bytes_size = 0;
                    }

                    prev_batch = std::move(current_batch);
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -611,6 +611,7 @@ class IColumn;
    M(Bool, query_plan_aggregation_in_order, true, "Use query plan for aggregation-in-order optimisation", 0) \
    M(Bool, query_plan_remove_redundant_sorting, true, "Remove redundant sorting in query plan. For example, sorting steps related to ORDER BY clauses in subqueries", 0) \
    M(Bool, query_plan_remove_redundant_distinct, true, "Remove redundant Distinct step in query plan", 0) \
+    M(Bool, query_plan_optimize_projection, true, "Use query plan for aggregation-in-order optimisation", 0) \
    M(UInt64, regexp_max_matches_per_row, 1000, "Max matches of any single regexp per row, used to safeguard 'extractAllGroupsHorizontal' against consuming too much memory with greedy RE.", 0) \
    \
    M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
@ -933,7 +934,7 @@ class IColumn;
    M(Bool, input_format_bson_skip_fields_with_unsupported_types_in_schema_inference, false, "Skip fields with unsupported types while schema inference for format BSON.", 0) \
    \
    M(Bool, regexp_dict_allow_other_sources, false, "Allow regexp_tree dictionary to use sources other than yaml source.", 0) \
-    M(Bool, regexp_dict_allow_hyperscan, false, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
+    M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \

 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
--- a/Show More
+++ b/Show More