Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into cast-internal

2024-11-24 00:22:29 +00:00 · 2021-08-08 21:30:17 +00:00 · 2021-08-08 21:30:17 +00:00 · 7991bb6e83
commit 7991bb6e83
parent 01b4bd3f91 bf72ac0854
230 changed files with 2326 additions and 1172 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -243,3 +243,6 @@
 [submodule "contrib/s2geometry"]
 	path = contrib/s2geometry
 	url = https://github.com/ClickHouse-Extras/s2geometry.git
+[submodule "contrib/bzip2"]
+	path = contrib/bzip2
+	url = https://github.com/ClickHouse-Extras/bzip2.git
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -543,6 +543,7 @@ include (cmake/find/nuraft.cmake)
 include (cmake/find/yaml-cpp.cmake)
 include (cmake/find/s2geometry.cmake)
 include (cmake/find/nlp.cmake)
+include (cmake/find/bzip2.cmake)

 if(NOT USE_INTERNAL_PARQUET_LIBRARY)
    set (ENABLE_ORC OFF CACHE INTERNAL "")
--- a/base/common/memory.h
+++ b/base/common/memory.h
@ -1,57 +0,0 @@
-#pragma once
-
-#include <new>
-#include "defines.h"
-
-#if USE_JEMALLOC
-#    include <jemalloc/jemalloc.h>
-#endif
-
-#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
-#    include <cstdlib>
-#endif
-
-
-namespace Memory
-{
-
-inline ALWAYS_INLINE void * newImpl(std::size_t size)
-{
-    auto * ptr = malloc(size);
-    if (likely(ptr != nullptr))
-        return ptr;
-
-    /// @note no std::get_new_handler logic implemented
-    throw std::bad_alloc{};
-}
-
-inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
-{
-    return malloc(size);
-}
-
-inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
-{
-    free(ptr);
-}
-
-#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
-
-inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
-{
-    if (unlikely(ptr == nullptr))
-        return;
-
-    sdallocx(ptr, size, 0);
-}
-
-#else
-
-inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
-{
-    free(ptr);
-}
-
-#endif
-
-}
--- a/cmake/find/bzip2.cmake
+++ b/cmake/find/bzip2.cmake
@ -0,0 +1,19 @@
+option(ENABLE_BZIP2 "Enable bzip2 compression support" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_BZIP2)
+    message (STATUS "bzip2 compression disabled")
+    return()
+endif()
+
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/bzip2/bzlib.h")
+    message (WARNING "submodule contrib/bzip2 is missing. to fix try run: \n git submodule update --init --recursive")
+    message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal bzip2 library")
+    set (USE_NLP 0)
+    return()
+endif ()
+
+set (USE_BZIP2 1)
+set (BZIP2_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
+set (BZIP2_LIBRARY bzip2)
+
+message (STATUS "Using bzip2=${USE_BZIP2}: ${BZIP2_INCLUDE_DIR} : ${BZIP2_LIBRARY}")
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -334,6 +334,10 @@ if (USE_NLP)
    add_subdirectory(lemmagen-c-cmake)
 endif()

+if (USE_BZIP2)
+    add_subdirectory(bzip2-cmake)
+endif()
+
 if (USE_SQLITE)
    add_subdirectory(sqlite-cmake)
 endif()
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit 0ce9490093021c63564cca159571a8b27772ad48
+Subproject commit 7ecb16844af6a9c283ad432d85ecc2e7d1544676
--- a/contrib/bzip2
+++ b/contrib/bzip2
@ -0,0 +1 @@
+Subproject commit bf905ea2251191ff9911ae7ec0cfc35d41f9f7f6
--- a/contrib/bzip2-cmake/CMakeLists.txt
+++ b/contrib/bzip2-cmake/CMakeLists.txt
@ -0,0 +1,23 @@
+set(BZIP2_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/bzip2")
+set(BZIP2_BINARY_DIR "${ClickHouse_BINARY_DIR}/contrib/bzip2")
+
+set(SRCS
+    "${BZIP2_SOURCE_DIR}/blocksort.c"
+    "${BZIP2_SOURCE_DIR}/huffman.c"
+    "${BZIP2_SOURCE_DIR}/crctable.c"
+    "${BZIP2_SOURCE_DIR}/randtable.c"
+    "${BZIP2_SOURCE_DIR}/compress.c"
+    "${BZIP2_SOURCE_DIR}/decompress.c"
+    "${BZIP2_SOURCE_DIR}/bzlib.c"
+)
+
+# From bzip2/CMakeLists.txt 
+set(BZ_VERSION "1.0.7")
+configure_file (
+    "${BZIP2_SOURCE_DIR}/bz_version.h.in"
+    "${BZIP2_BINARY_DIR}/bz_version.h"
+)
+
+add_library(bzip2 ${SRCS})
+
+target_include_directories(bzip2 PUBLIC "${BZIP2_SOURCE_DIR}" "${BZIP2_BINARY_DIR}")
--- a/contrib/croaring-cmake/CMakeLists.txt
+++ b/contrib/croaring-cmake/CMakeLists.txt
@ -24,3 +24,19 @@ add_library(roaring ${SRCS})
 target_include_directories(roaring PRIVATE "${LIBRARY_DIR}/include/roaring")
 target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/include")
 target_include_directories(roaring SYSTEM BEFORE PUBLIC "${LIBRARY_DIR}/cpp")
+
+# We redirect malloc/free family of functions to different functions that will track memory in ClickHouse.
+# It will make this library depend on linking to 'clickhouse_common_io' library that is not done explicitly via 'target_link_libraries'.
+# And we check that all libraries dependencies are satisfied and all symbols are resolved if we do build with shared libraries.
+# That's why we enable it only in static build.
+# Also note that we exploit implicit function declarations.
+
+if (USE_STATIC_LIBRARIES)
+    target_compile_definitions(roaring PRIVATE
+        -Dmalloc=clickhouse_malloc
+        -Dcalloc=clickhouse_calloc
+        -Drealloc=clickhouse_realloc
+        -Dreallocarray=clickhouse_reallocarray
+        -Dfree=clickhouse_free
+        -Dposix_memalign=clickhouse_posix_memalign)
+endif ()
--- a/contrib/zlib-ng
+++ b/contrib/zlib-ng
@ -1 +1 @@
-Subproject commit db232d30b4c72fd58e6d7eae2d12cebf9c3d90db
+Subproject commit 6a5e93b9007782115f7f7e5235dedc81c4f1facb
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -312,6 +312,7 @@ function run_tests
        01798_uniq_theta_sketch
        01799_long_uniq_theta_sketch
        01890_stem                               # depends on libstemmer_c
+        02003_compress_bz2                       # depends on bzip2
        collate
        collation
        _orc_
--- a/docs/en/getting-started/example-datasets/index.md
+++ b/docs/en/getting-started/example-datasets/index.md
@ -15,6 +15,7 @@ The list of documented datasets:
 -   [Recipes](../../getting-started/example-datasets/recipes.md)
 -   [OnTime](../../getting-started/example-datasets/ontime.md)
 -   [New York Taxi Data](../../getting-started/example-datasets/nyc-taxi.md)
+-   [UK Property Price Paid](../../getting-started/example-datasets/uk-price-paid.md)
 -   [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
 -   [WikiStat](../../getting-started/example-datasets/wikistat.md)
 -   [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
--- a/docs/en/getting-started/example-datasets/uk-price-paid.md
+++ b/docs/en/getting-started/example-datasets/uk-price-paid.md
@ -0,0 +1,325 @@
+---
+toc_priority: 20
+toc_title: UK Property Price Paid
+---
+
+# UK Property Price Paid
+
+The dataset contains data about prices paid for real-estate property in England and Wales. The data is available since year 1995.
+The size of the dataset in uncompressed form is about 4 GiB and it will take about 226 MiB in ClickHouse.
+
+Source: https://www.gov.uk/government/statistical-data-sets/price-paid-data-downloads
+Description of the fields: https://www.gov.uk/guidance/about-the-price-paid-data
+
+Contains HM Land Registry data © Crown copyright and database right 2021. This data is licensed under the Open Government Licence v3.0.
+
+## Download the Dataset
+
+```
+wget http://prod.publicdata.landregistry.gov.uk.s3-website-eu-west-1.amazonaws.com/pp-complete.csv
+```
+
+Download will take about 2 minutes with good internet connection.
+
+## Create the Table
+
+```
+CREATE TABLE uk_price_paid
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4, 'other' = 0),
+    is_new UInt8,
+    duration Enum8('freehold' = 1, 'leasehold' = 2, 'unknown' = 0),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String),
+    category UInt8
+) ENGINE = MergeTree ORDER BY (postcode1, postcode2, addr1, addr2);
+```
+
+## Preprocess and Import Data
+
+We will use `clickhouse-local` tool for data preprocessing and `clickhouse-client` to upload it.
+
+In this example, we define the structure of source data from the CSV file and specify a query to preprocess the data with `clickhouse-local`.
+
+The preprocessing is:
+- splitting the postcode to two different columns `postcode1` and `postcode2` that is better for storage and queries;
+- coverting the `time` field to date as it only contains 00:00 time;
+- ignoring the `uuid` field because we don't need it for analysis;
+- transforming `type` and `duration` to more readable Enum fields with function `transform`;
+- transforming `is_new` and `category` fields from single-character string (`Y`/`N` and `A`/`B`) to UInt8 field with 0 and 1.
+
+Preprocessed data is piped directly to `clickhouse-client` to be inserted into ClickHouse table in streaming fashion.
+
+```
+clickhouse-local --input-format CSV --structure '
+    uuid String,
+    price UInt32,
+    time DateTime,
+    postcode String,
+    a String,
+    b String,
+    c String,
+    addr1 String,
+    addr2 String,
+    street String,
+    locality String,
+    town String,
+    district String,
+    county String,
+    d String,
+    e String
+' --query "
+    WITH splitByChar(' ', postcode) AS p
+    SELECT
+        price,
+        toDate(time) AS date,
+        p[1] AS postcode1,
+        p[2] AS postcode2,
+        transform(a, ['T', 'S', 'D', 'F', 'O'], ['terraced', 'semi-detached', 'detached', 'flat', 'other']) AS type,
+        b = 'Y' AS is_new,
+        transform(c, ['F', 'L', 'U'], ['freehold', 'leasehold', 'unknown']) AS duration,
+        addr1,
+        addr2,
+        street,
+        locality,
+        town,
+        district,
+        county,
+        d = 'B' AS category
+    FROM table" --date_time_input_format best_effort < pp-complete.csv | clickhouse-client --query "INSERT INTO uk_price_paid FORMAT TSV"
+```
+
+It will take about 40 seconds.
+
+## Validate the Data
+
+```
+SELECT count() FROM uk_price_paid
+26248711
+```
+
+The size of dataset in ClickHouse is just 226 MiB:
+
+```
+SELECT formatReadableSize(total_bytes) FROM system.tables WHERE name = 'uk_price_paid'
+226.40 MiB
+```
+
+## Run Some Queries
+
+### Average price per year:
+
+```
+SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 1000000, 80) FROM uk_price_paid GROUP BY year ORDER BY year
+
+┌─year─┬──price─┬─bar(round(avg(price)), 0, 1000000, 80)─┐
+│ 1995 │  67932 │ █████▍                                 │
+│ 1996 │  71505 │ █████▋                                 │
+│ 1997 │  78532 │ ██████▎                                │
+│ 1998 │  85435 │ ██████▋                                │
+│ 1999 │  96036 │ ███████▋                               │
+│ 2000 │ 107478 │ ████████▌                              │
+│ 2001 │ 118886 │ █████████▌                             │
+│ 2002 │ 137940 │ ███████████                            │
+│ 2003 │ 155888 │ ████████████▍                          │
+│ 2004 │ 178885 │ ██████████████▎                        │
+│ 2005 │ 189350 │ ███████████████▏                       │
+│ 2006 │ 203528 │ ████████████████▎                      │
+│ 2007 │ 219377 │ █████████████████▌                     │
+│ 2008 │ 217056 │ █████████████████▎                     │
+│ 2009 │ 213419 │ █████████████████                      │
+│ 2010 │ 236110 │ ██████████████████▊                    │
+│ 2011 │ 232804 │ ██████████████████▌                    │
+│ 2012 │ 238366 │ ███████████████████                    │
+│ 2013 │ 256931 │ ████████████████████▌                  │
+│ 2014 │ 279917 │ ██████████████████████▍                │
+│ 2015 │ 297264 │ ███████████████████████▋               │
+│ 2016 │ 313197 │ █████████████████████████              │
+│ 2017 │ 346070 │ ███████████████████████████▋           │
+│ 2018 │ 350117 │ ████████████████████████████           │
+│ 2019 │ 351010 │ ████████████████████████████           │
+│ 2020 │ 368974 │ █████████████████████████████▌         │
+│ 2021 │ 384351 │ ██████████████████████████████▋        │
+└──────┴────────┴────────────────────────────────────────┘
+
+27 rows in set. Elapsed: 0.027 sec. Processed 26.25 million rows, 157.49 MB (955.96 million rows/s., 5.74 GB/s.)
+```
+
+### Average price per year in London:
+
+```
+SELECT toYear(date) AS year, round(avg(price)) AS price, bar(price, 0, 2000000, 100) FROM uk_price_paid WHERE town = 'LONDON' GROUP BY year ORDER BY year
+
+┌─year─┬───price─┬─bar(round(avg(price)), 0, 2000000, 100)───────────────┐
+│ 1995 │  109112 │ █████▍                                                │
+│ 1996 │  118667 │ █████▊                                                │
+│ 1997 │  136518 │ ██████▋                                               │
+│ 1998 │  152983 │ ███████▋                                              │
+│ 1999 │  180633 │ █████████                                             │
+│ 2000 │  215830 │ ██████████▋                                           │
+│ 2001 │  232996 │ ███████████▋                                          │
+│ 2002 │  263672 │ █████████████▏                                        │
+│ 2003 │  278394 │ █████████████▊                                        │
+│ 2004 │  304665 │ ███████████████▏                                      │
+│ 2005 │  322875 │ ████████████████▏                                     │
+│ 2006 │  356192 │ █████████████████▋                                    │
+│ 2007 │  404055 │ ████████████████████▏                                 │
+│ 2008 │  420741 │ █████████████████████                                 │
+│ 2009 │  427754 │ █████████████████████▍                                │
+│ 2010 │  480306 │ ████████████████████████                              │
+│ 2011 │  496274 │ ████████████████████████▋                             │
+│ 2012 │  519441 │ █████████████████████████▊                            │
+│ 2013 │  616209 │ ██████████████████████████████▋                       │
+│ 2014 │  724144 │ ████████████████████████████████████▏                 │
+│ 2015 │  792112 │ ███████████████████████████████████████▌              │
+│ 2016 │  843568 │ ██████████████████████████████████████████▏           │
+│ 2017 │  982566 │ █████████████████████████████████████████████████▏    │
+│ 2018 │ 1016845 │ ██████████████████████████████████████████████████▋   │
+│ 2019 │ 1043277 │ ████████████████████████████████████████████████████▏ │
+│ 2020 │ 1003963 │ ██████████████████████████████████████████████████▏   │
+│ 2021 │  940794 │ ███████████████████████████████████████████████       │
+└──────┴─────────┴───────────────────────────────────────────────────────┘
+
+27 rows in set. Elapsed: 0.024 sec. Processed 26.25 million rows, 76.88 MB (1.08 billion rows/s., 3.15 GB/s.)
+```
+
+Something happened in 2013. I don't have a clue. Maybe you have a clue what happened in 2020?
+
+### The most expensive neighborhoods:
+
+```
+SELECT
+    town,
+    district,
+    count() AS c,
+    round(avg(price)) AS price,
+    bar(price, 0, 5000000, 100)
+FROM uk_price_paid
+WHERE date >= '2020-01-01'
+GROUP BY
+    town,
+    district
+HAVING c >= 100
+ORDER BY price DESC
+LIMIT 100
+
+Query id: df8c0a98-4713-4f0e-9690-5f73b52f7206
+
+┌─town─────────────────┬─district───────────────┬────c─┬───price─┬─bar(round(avg(price)), 0, 5000000, 100)────────────────────────────┐
+│ LONDON               │ CITY OF WESTMINSTER    │ 3372 │ 3305225 │ ██████████████████████████████████████████████████████████████████ │
+│ LONDON               │ CITY OF LONDON         │  257 │ 3294478 │ █████████████████████████████████████████████████████████████████▊ │
+│ LONDON               │ KENSINGTON AND CHELSEA │ 2367 │ 2342422 │ ██████████████████████████████████████████████▋                    │
+│ LEATHERHEAD          │ ELMBRIDGE              │  108 │ 1927143 │ ██████████████████████████████████████▌                            │
+│ VIRGINIA WATER       │ RUNNYMEDE              │  142 │ 1868819 │ █████████████████████████████████████▍                             │
+│ LONDON               │ CAMDEN                 │ 2815 │ 1736788 │ ██████████████████████████████████▋                                │
+│ THORNTON HEATH       │ CROYDON                │  521 │ 1733051 │ ██████████████████████████████████▋                                │
+│ WINDLESHAM           │ SURREY HEATH           │  103 │ 1717255 │ ██████████████████████████████████▎                                │
+│ BARNET               │ ENFIELD                │  115 │ 1503458 │ ██████████████████████████████                                     │
+│ OXFORD               │ SOUTH OXFORDSHIRE      │  298 │ 1275200 │ █████████████████████████▌                                         │
+│ LONDON               │ ISLINGTON              │ 2458 │ 1274308 │ █████████████████████████▍                                         │
+│ COBHAM               │ ELMBRIDGE              │  364 │ 1260005 │ █████████████████████████▏                                         │
+│ LONDON               │ HOUNSLOW               │  618 │ 1215682 │ ████████████████████████▎                                          │
+│ ASCOT                │ WINDSOR AND MAIDENHEAD │  379 │ 1215146 │ ████████████████████████▎                                          │
+│ LONDON               │ RICHMOND UPON THAMES   │  654 │ 1207551 │ ████████████████████████▏                                          │
+│ BEACONSFIELD         │ BUCKINGHAMSHIRE        │  307 │ 1186220 │ ███████████████████████▋                                           │
+│ RICHMOND             │ RICHMOND UPON THAMES   │  805 │ 1100420 │ ██████████████████████                                             │
+│ LONDON               │ HAMMERSMITH AND FULHAM │ 2888 │ 1062959 │ █████████████████████▎                                             │
+│ WEYBRIDGE            │ ELMBRIDGE              │  607 │ 1027161 │ ████████████████████▌                                              │
+│ RADLETT              │ HERTSMERE              │  265 │ 1015896 │ ████████████████████▎                                              │
+│ SALCOMBE             │ SOUTH HAMS             │  124 │ 1014393 │ ████████████████████▎                                              │
+│ BURFORD              │ WEST OXFORDSHIRE       │  102 │  993100 │ ███████████████████▋                                               │
+│ ESHER                │ ELMBRIDGE              │  454 │  969770 │ ███████████████████▍                                               │
+│ HINDHEAD             │ WAVERLEY               │  128 │  967786 │ ███████████████████▎                                               │
+│ BROCKENHURST         │ NEW FOREST             │  121 │  967046 │ ███████████████████▎                                               │
+│ LEATHERHEAD          │ GUILDFORD              │  191 │  964489 │ ███████████████████▎                                               │
+│ GERRARDS CROSS       │ BUCKINGHAMSHIRE        │  376 │  958555 │ ███████████████████▏                                               │
+│ EAST MOLESEY         │ ELMBRIDGE              │  181 │  943457 │ ██████████████████▋                                                │
+│ OLNEY                │ MILTON KEYNES          │  220 │  942892 │ ██████████████████▋                                                │
+│ CHALFONT ST GILES    │ BUCKINGHAMSHIRE        │  135 │  926950 │ ██████████████████▌                                                │
+│ HENLEY-ON-THAMES     │ SOUTH OXFORDSHIRE      │  509 │  905732 │ ██████████████████                                                 │
+│ KINGSTON UPON THAMES │ KINGSTON UPON THAMES   │  889 │  899689 │ █████████████████▊                                                 │
+│ BELVEDERE            │ BEXLEY                 │  313 │  895336 │ █████████████████▊                                                 │
+│ CRANBROOK            │ TUNBRIDGE WELLS        │  404 │  888190 │ █████████████████▋                                                 │
+│ LONDON               │ EALING                 │ 2460 │  865893 │ █████████████████▎                                                 │
+│ MAIDENHEAD           │ BUCKINGHAMSHIRE        │  114 │  863814 │ █████████████████▎                                                 │
+│ LONDON               │ MERTON                 │ 1958 │  857192 │ █████████████████▏                                                 │
+│ GUILDFORD            │ WAVERLEY               │  131 │  854447 │ █████████████████                                                  │
+│ LONDON               │ HACKNEY                │ 3088 │  846571 │ ████████████████▊                                                  │
+│ LYMM                 │ WARRINGTON             │  285 │  839920 │ ████████████████▋                                                  │
+│ HARPENDEN            │ ST ALBANS              │  606 │  836994 │ ████████████████▋                                                  │
+│ LONDON               │ WANDSWORTH             │ 6113 │  832292 │ ████████████████▋                                                  │
+│ LONDON               │ SOUTHWARK              │ 3612 │  831319 │ ████████████████▋                                                  │
+│ BERKHAMSTED          │ DACORUM                │  502 │  830356 │ ████████████████▌                                                  │
+│ KINGS LANGLEY        │ DACORUM                │  137 │  821358 │ ████████████████▍                                                  │
+│ TONBRIDGE            │ TUNBRIDGE WELLS        │  339 │  806736 │ ████████████████▏                                                  │
+│ EPSOM                │ REIGATE AND BANSTEAD   │  157 │  805903 │ ████████████████                                                   │
+│ WOKING               │ GUILDFORD              │  161 │  803283 │ ████████████████                                                   │
+│ STOCKBRIDGE          │ TEST VALLEY            │  168 │  801973 │ ████████████████                                                   │
+│ TEDDINGTON           │ RICHMOND UPON THAMES   │  539 │  798591 │ ███████████████▊                                                   │
+│ OXFORD               │ VALE OF WHITE HORSE    │  329 │  792907 │ ███████████████▋                                                   │
+│ LONDON               │ BARNET                 │ 3624 │  789583 │ ███████████████▋                                                   │
+│ TWICKENHAM           │ RICHMOND UPON THAMES   │ 1090 │  787760 │ ███████████████▋                                                   │
+│ LUTON                │ CENTRAL BEDFORDSHIRE   │  196 │  786051 │ ███████████████▋                                                   │
+│ TONBRIDGE            │ MAIDSTONE              │  277 │  785746 │ ███████████████▋                                                   │
+│ TOWCESTER            │ WEST NORTHAMPTONSHIRE  │  186 │  783532 │ ███████████████▋                                                   │
+│ LONDON               │ LAMBETH                │ 4832 │  783422 │ ███████████████▋                                                   │
+│ LUTTERWORTH          │ HARBOROUGH             │  515 │  781775 │ ███████████████▋                                                   │
+│ WOODSTOCK            │ WEST OXFORDSHIRE       │  135 │  777499 │ ███████████████▌                                                   │
+│ ALRESFORD            │ WINCHESTER             │  196 │  775577 │ ███████████████▌                                                   │
+│ LONDON               │ NEWHAM                 │ 2942 │  768551 │ ███████████████▎                                                   │
+│ ALDERLEY EDGE        │ CHESHIRE EAST          │  168 │  768280 │ ███████████████▎                                                   │
+│ MARLOW               │ BUCKINGHAMSHIRE        │  301 │  762784 │ ███████████████▎                                                   │
+│ BILLINGSHURST        │ CHICHESTER             │  134 │  760920 │ ███████████████▏                                                   │
+│ LONDON               │ TOWER HAMLETS          │ 4183 │  759635 │ ███████████████▏                                                   │
+│ MIDHURST             │ CHICHESTER             │  245 │  759101 │ ███████████████▏                                                   │
+│ THAMES DITTON        │ ELMBRIDGE              │  227 │  753347 │ ███████████████                                                    │
+│ POTTERS BAR          │ WELWYN HATFIELD        │  163 │  752926 │ ███████████████                                                    │
+│ REIGATE              │ REIGATE AND BANSTEAD   │  555 │  740961 │ ██████████████▋                                                    │
+│ TADWORTH             │ REIGATE AND BANSTEAD   │  477 │  738997 │ ██████████████▋                                                    │
+│ SEVENOAKS            │ SEVENOAKS              │ 1074 │  734658 │ ██████████████▋                                                    │
+│ PETWORTH             │ CHICHESTER             │  138 │  732432 │ ██████████████▋                                                    │
+│ BOURNE END           │ BUCKINGHAMSHIRE        │  127 │  730742 │ ██████████████▌                                                    │
+│ PURLEY               │ CROYDON                │  540 │  727721 │ ██████████████▌                                                    │
+│ OXTED                │ TANDRIDGE              │  320 │  726078 │ ██████████████▌                                                    │
+│ LONDON               │ HARINGEY               │ 2988 │  724573 │ ██████████████▍                                                    │
+│ BANSTEAD             │ REIGATE AND BANSTEAD   │  373 │  713834 │ ██████████████▎                                                    │
+│ PINNER               │ HARROW                 │  480 │  712166 │ ██████████████▏                                                    │
+│ MALMESBURY           │ WILTSHIRE              │  293 │  707747 │ ██████████████▏                                                    │
+│ RICKMANSWORTH        │ THREE RIVERS           │  732 │  705400 │ ██████████████                                                     │
+│ SLOUGH               │ BUCKINGHAMSHIRE        │  359 │  705002 │ ██████████████                                                     │
+│ GREAT MISSENDEN      │ BUCKINGHAMSHIRE        │  214 │  704904 │ ██████████████                                                     │
+│ READING              │ SOUTH OXFORDSHIRE      │  295 │  701697 │ ██████████████                                                     │
+│ HYTHE                │ FOLKESTONE AND HYTHE   │  457 │  700334 │ ██████████████                                                     │
+│ WELWYN               │ WELWYN HATFIELD        │  217 │  699649 │ █████████████▊                                                     │
+│ CHIGWELL             │ EPPING FOREST          │  242 │  697869 │ █████████████▊                                                     │
+│ BARNET               │ BARNET                 │  906 │  695680 │ █████████████▊                                                     │
+│ HASLEMERE            │ CHICHESTER             │  120 │  694028 │ █████████████▊                                                     │
+│ LEATHERHEAD          │ MOLE VALLEY            │  748 │  692026 │ █████████████▋                                                     │
+│ LONDON               │ BRENT                  │ 1945 │  690799 │ █████████████▋                                                     │
+│ HASLEMERE            │ WAVERLEY               │  258 │  690765 │ █████████████▋                                                     │
+│ NORTHWOOD            │ HILLINGDON             │  252 │  690753 │ █████████████▋                                                     │
+│ WALTON-ON-THAMES     │ ELMBRIDGE              │  871 │  689431 │ █████████████▋                                                     │
+│ INGATESTONE          │ BRENTWOOD              │  150 │  688345 │ █████████████▋                                                     │
+│ OXFORD               │ OXFORD                 │ 1761 │  686114 │ █████████████▋                                                     │
+│ CHISLEHURST          │ BROMLEY                │  410 │  682892 │ █████████████▋                                                     │
+│ KINGS LANGLEY        │ THREE RIVERS           │  109 │  682320 │ █████████████▋                                                     │
+│ ASHTEAD              │ MOLE VALLEY            │  280 │  680483 │ █████████████▌                                                     │
+│ WOKING               │ SURREY HEATH           │  269 │  679035 │ █████████████▌                                                     │
+│ ASCOT                │ BRACKNELL FOREST       │  160 │  678632 │ █████████████▌                                                     │
+└──────────────────────┴────────────────────────┴──────┴─────────┴────────────────────────────────────────────────────────────────────┘
+
+100 rows in set. Elapsed: 0.039 sec. Processed 26.25 million rows, 278.03 MB (674.32 million rows/s., 7.14 GB/s.)
+```
+
+### Test it in Playground
+
+The data is uploaded to ClickHouse Playground, [example](https://gh-api.clickhouse.tech/play?user=play#U0VMRUNUIHRvd24sIGRpc3RyaWN0LCBjb3VudCgpIEFTIGMsIHJvdW5kKGF2ZyhwcmljZSkpIEFTIHByaWNlLCBiYXIocHJpY2UsIDAsIDUwMDAwMDAsIDEwMCkgRlJPTSB1a19wcmljZV9wYWlkIFdIRVJFIGRhdGUgPj0gJzIwMjAtMDEtMDEnIEdST1VQIEJZIHRvd24sIGRpc3RyaWN0IEhBVklORyBjID49IDEwMCBPUkRFUiBCWSBwcmljZSBERVNDIExJTUlUIDEwMA==).
--- a/docs/en/sql-reference/functions/geo/coordinates.md
+++ b/docs/en/sql-reference/functions/geo/coordinates.md
@ -41,6 +41,13 @@ SELECT greatCircleDistance(55.755831, 37.617673, -55.755831, -37.617673)
 └───────────────────────────────────────────────────────────────────┘
 ```

+## geoDistance
+
+Similar to `greatCircleDistance` but calculates the distance on WGS-84 ellipsoid instead of sphere. This is more precise approximation of the Earth Geoid.
+The performance is the same as for `greatCircleDistance` (no performance drawback). It is recommended to use `geoDistance` to calculate the distances on Earth.
+
+Technical note: for close enough points we calculate the distance using planar approximation with the metric on the tangent plane at the midpoint of the coordinates.
+
 ## greatCircleAngle {#greatcircleangle}

 Calculates the central angle between two points on the Earth’s surface using [the great-circle formula](https://en.wikipedia.org/wiki/Great-circle_distance).
--- a/docs/en/sql-reference/statements/alter/projection.md
+++ b/docs/en/sql-reference/statements/alter/projection.md
@ -0,0 +1,23 @@
+---
+toc_priority: 49
+toc_title: PROJECTION
+---
+
+# Manipulating Projections {#manipulations-with-projections}
+
+The following operations are available:
+
+-   `ALTER TABLE [db].name ADD PROJECTION name AS SELECT <COLUMN LIST EXPR> [GROUP BY] [ORDER BY]` - Adds projection description to tables metadata.
+
+-   `ALTER TABLE [db].name DROP PROJECTION name` - Removes projection description from tables metadata and deletes projection files from disk.
+
+-   `ALTER TABLE [db.]table MATERIALIZE PROJECTION name IN PARTITION partition_name` - The query rebuilds the projection `name` in the partition `partition_name`. Implemented as a [mutation](../../../sql-reference/statements/alter/index.md#mutations).
+
+-   `ALTER TABLE [db.]table CLEAR PROJECTION name IN PARTITION partition_name` - Deletes projection files from disk without removing description.
+
+The commands ADD, DROP and CLEAR are lightweight in a sense that they only change metadata or remove files.
+
+Also, they are replicated, syncing projections metadata via ZooKeeper.
+
+!!! note "Note"
+    Projection manipulation is supported only for tables with [`*MergeTree`](../../../engines/table-engines/mergetree-family/mergetree.md) engine (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) variants).
--- a/docs/ru/development/developer-instruction.md
+++ b/docs/ru/development/developer-instruction.md
@ -140,7 +140,7 @@ ClickHouse использует для сборки некоторое коли

 Впрочем, наша среда continuous integration проверяет около десятка вариантов сборки, включая gcc, но сборка с помощью gcc непригодна для использования в продакшене.

-On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))
+На Ubuntu и Debian вы можете использовать скрипт для автоматической установки (см. [официальный сайт](https://apt.llvm.org/))

 ```bash
 sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
--- a/docs/ru/sql-reference/functions/other-functions.md
+++ b/docs/ru/sql-reference/functions/other-functions.md
@ -2090,9 +2090,9 @@ SELECT tcpPort();

 ## currentProfiles {#current-profiles}

-Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя. 
+Возвращает список [профилей настроек](../../operations/access-rights.md#settings-profiles-management) для текущего пользователя.

-Для изменения текущего профиля настроек может быть использована команда [SET PROFILE](../../sql-reference/statements/set.md#set-statement#query-set). Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).
+Для изменения текущего профиля настроек может быть использована команда SET PROFILE. Если команда `SET PROFILE` не применялась, функция возвращает профили, указанные при определении текущего пользователя (см. [CREATE USER](../../sql-reference/statements/create/user.md#create-user-statement)).

 **Синтаксис**

@ -2102,7 +2102,7 @@ currentProfiles()

 **Возвращаемое значение**

-   Список профилей настроек для текущего пользователя. 
+-   Список профилей настроек для текущего пользователя.

 Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

@ -2118,7 +2118,7 @@ enabledProfiles()

 **Возвращаемое значение**

-   Список доступных профилей для текущего пользователя. 
+-   Список доступных профилей для текущего пользователя.

 Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).

@ -2134,6 +2134,6 @@ defaultProfiles()

 **Возвращаемое значение**

-   Список профилей по умолчанию. 
+-   Список профилей по умолчанию.

-Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
+Тип: [Array](../../sql-reference/data-types/array.md)([String](../../sql-reference/data-types/string.md)).
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@ -44,7 +44,7 @@ private:

    void toLarge()
    {
-        rb = std::make_unique<RoaringBitmap>();
+        rb = std::make_shared<RoaringBitmap>();
        for (const auto & x : small)
            rb->add(static_cast<Value>(x.getValue()));
        small.clear();
@ -114,7 +114,7 @@ public:
            readVarUInt(size, in);
            std::unique_ptr<char[]> buf(new char[size]);
            in.readStrict(buf.get(), size);
-            rb = std::make_unique<RoaringBitmap>(RoaringBitmap::read(buf.get()));
+            rb = std::make_shared<RoaringBitmap>(RoaringBitmap::read(buf.get()));
        }
    }

@ -141,7 +141,7 @@ public:
     */
    std::shared_ptr<RoaringBitmap> getNewRoaringBitmapFromSmall() const
    {
-        std::shared_ptr<RoaringBitmap> ret = std::make_unique<RoaringBitmap>();
+        std::shared_ptr<RoaringBitmap> ret = std::make_shared<RoaringBitmap>();
        for (const auto & x : small)
            ret->add(static_cast<Value>(x.getValue()));
        return ret;
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -158,6 +158,8 @@ else()
    target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io jemalloc)
 endif()

+target_link_libraries (clickhouse_common_io PRIVATE jemalloc)
+
 add_subdirectory(Common/ZooKeeper)
 add_subdirectory(Common/Config)

@ -479,6 +481,11 @@ if (USE_NLP)
    dbms_target_link_libraries (PUBLIC lemmagen)
 endif()

+if (USE_BZIP2)
+    target_link_libraries (clickhouse_common_io PRIVATE ${BZIP2_LIBRARY})
+    target_include_directories (clickhouse_common_io SYSTEM BEFORE PRIVATE ${BZIP2_INCLUDE_DIR})
+endif()
+
 include ("${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake")

 if (ENABLE_TESTS AND USE_GTEST)
--- a/src/Common/CurrentMemoryTracker.cpp
+++ b/src/Common/CurrentMemoryTracker.cpp
@ -3,6 +3,7 @@

 #include <Common/CurrentMemoryTracker.h>

+
 namespace
 {

@ -36,6 +37,7 @@ namespace
            if (current_thread)
            {
                current_thread->untracked_memory += size;
+
                if (current_thread->untracked_memory > current_thread->untracked_memory_limit)
                {
                    /// Zero untracked before track. If tracker throws out-of-limit we would be able to alloc up to untracked_memory_limit bytes
@ -54,6 +56,12 @@ namespace
    }
 }

+void check()
+{
+    if (auto * memory_tracker = getMemoryTracker())
+        memory_tracker->allocImpl(0, true);
+}
+
 void alloc(Int64 size)
 {
    bool throw_if_memory_exceeded = true;
--- a/src/Common/CurrentMemoryTracker.h
+++ b/src/Common/CurrentMemoryTracker.h
@ -9,4 +9,5 @@ namespace CurrentMemoryTracker
    void allocNoThrow(Int64 size);
    void realloc(Int64 old_size, Int64 new_size);
    void free(Int64 size);
+    void check();
 }
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -561,6 +561,8 @@
    M(591, SQLITE_ENGINE_ERROR) \
    M(592, DATA_ENCRYPTION_ERROR) \
    M(593, ZERO_COPY_REPLICATION_ERROR) \
+    M(594, BZIP2_STREAM_DECODER_FAILED) \
+    M(595, BZIP2_STREAM_ENCODER_FAILED) \
    \
    M(998, POSTGRESQL_CONNECTION_FAILURE) \
    M(999, KEEPER_EXCEPTION) \
--- a/src/Common/clickhouse_malloc.cpp
+++ b/src/Common/clickhouse_malloc.cpp
@ -0,0 +1,55 @@
+#include <Common/memory.h>
+#include <cstdlib>
+
+
+/** These functions can be substituted instead of regular ones when memory tracking is needed.
+  */
+
+extern "C" void * clickhouse_malloc(size_t size)
+{
+    void * res = malloc(size);
+    if (res)
+        Memory::trackMemory(size);
+    return res;
+}
+
+extern "C" void * clickhouse_calloc(size_t number_of_members, size_t size)
+{
+    void * res = calloc(number_of_members, size);
+    if (res)
+        Memory::trackMemory(number_of_members * size);
+    return res;
+}
+
+extern "C" void * clickhouse_realloc(void * ptr, size_t size)
+{
+    if (ptr)
+        Memory::untrackMemory(ptr);
+    void * res = realloc(ptr, size);
+    if (res)
+        Memory::trackMemory(size);
+    return res;
+}
+
+extern "C" void * clickhouse_reallocarray(void * ptr, size_t number_of_members, size_t size)
+{
+    size_t real_size = 0;
+    if (__builtin_mul_overflow(number_of_members, size, &real_size))
+        return nullptr;
+
+    return clickhouse_realloc(ptr, real_size);
+}
+
+extern "C" void clickhouse_free(void * ptr)
+{
+    Memory::untrackMemory(ptr);
+    free(ptr);
+}
+
+extern "C" int clickhouse_posix_memalign(void ** memptr, size_t alignment, size_t size)
+{
+    int res = posix_memalign(memptr, alignment, size);
+    if (res == 0)
+        Memory::trackMemory(size);
+    return res;
+}
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -19,3 +19,4 @@
 #cmakedefine01 USE_DATASKETCHES
 #cmakedefine01 USE_YAML_CPP
 #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
+#cmakedefine01 USE_BZIP2
--- a/src/Common/memory.cpp
+++ b/src/Common/memory.cpp
@ -0,0 +1,25 @@
+#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
+
+extern "C"
+{
+    extern void zone_register();
+}
+
+struct InitializeJemallocZoneAllocatorForOSX
+{
+    InitializeJemallocZoneAllocatorForOSX()
+    {
+        /// In case of OSX jemalloc register itself as a default zone allocator.
+        ///
+        /// But when you link statically then zone_register() will not be called,
+        /// and even will be optimized out:
+        ///
+        /// It is ok to call it twice (i.e. in case of shared libraries)
+        /// Since zone_register() is a no-op if the default zone is already replaced with something.
+        ///
+        /// https://github.com/jemalloc/jemalloc/issues/708
+        zone_register();
+    }
+} initializeJemallocZoneAllocatorForOSX;
+
+#endif
--- a/src/Common/memory.h
+++ b/src/Common/memory.h
@ -0,0 +1,108 @@
+#pragma once
+
+#include <new>
+#include <common/defines.h>
+
+#include <Common/CurrentMemoryTracker.h>
+
+#if USE_JEMALLOC
+#    include <jemalloc/jemalloc.h>
+#endif
+
+#if !USE_JEMALLOC || JEMALLOC_VERSION_MAJOR < 4
+#    include <cstdlib>
+#endif
+
+
+namespace Memory
+{
+
+inline ALWAYS_INLINE void * newImpl(std::size_t size)
+{
+    auto * ptr = malloc(size);
+    if (likely(ptr != nullptr))
+        return ptr;
+
+    /// @note no std::get_new_handler logic implemented
+    throw std::bad_alloc{};
+}
+
+inline ALWAYS_INLINE void * newNoExept(std::size_t size) noexcept
+{
+    return malloc(size);
+}
+
+inline ALWAYS_INLINE void deleteImpl(void * ptr) noexcept
+{
+    free(ptr);
+}
+
+#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 4
+
+inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size) noexcept
+{
+    if (unlikely(ptr == nullptr))
+        return;
+
+    sdallocx(ptr, size, 0);
+}
+
+#else
+
+inline ALWAYS_INLINE void deleteSized(void * ptr, std::size_t size [[maybe_unused]]) noexcept
+{
+    free(ptr);
+}
+
+#endif
+
+#if defined(OS_LINUX)
+#   include <malloc.h>
+#elif defined(OS_DARWIN)
+#   include <malloc/malloc.h>
+#endif
+
+
+inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
+{
+    size_t actual_size = size;
+
+#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
+    /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
+    /// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
+    if (likely(size != 0))
+        actual_size = nallocx(size, 0);
+#endif
+
+    return actual_size;
+}
+
+inline ALWAYS_INLINE void trackMemory(std::size_t size)
+{
+    std::size_t actual_size = getActualAllocationSize(size);
+    CurrentMemoryTracker::allocNoThrow(actual_size);
+}
+
+inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
+{
+    try
+    {
+#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
+        /// @note It's also possible to use je_malloc_usable_size() here.
+        if (likely(ptr != nullptr))
+            CurrentMemoryTracker::free(sallocx(ptr, 0));
+#else
+        if (size)
+            CurrentMemoryTracker::free(size);
+#    if defined(_GNU_SOURCE)
+        /// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
+        else
+            CurrentMemoryTracker::free(malloc_usable_size(ptr));
+#    endif
+#endif
+    }
+    catch (...)
+    {}
+}
+
+}
--- a/src/Common/new_delete.cpp
+++ b/src/Common/new_delete.cpp
@ -1,117 +1,34 @@
-#include <common/memory.h>
-#include <Common/CurrentMemoryTracker.h>
-
-#include <iostream>
+#include <Common/memory.h>
 #include <new>

-#if defined(OS_LINUX)
-#   include <malloc.h>
-#elif defined(OS_DARWIN)
-#   include <malloc/malloc.h>
-#endif
-
-#if defined(OS_DARWIN) && defined(BUNDLED_STATIC_JEMALLOC)
-extern "C"
-{
-extern void zone_register();
-}
-
-struct InitializeJemallocZoneAllocatorForOSX
-{
-    InitializeJemallocZoneAllocatorForOSX()
-    {
-        /// In case of OSX jemalloc register itself as a default zone allocator.
-        ///
-        /// But when you link statically then zone_register() will not be called,
-        /// and even will be optimized out:
-        ///
-        /// It is ok to call it twice (i.e. in case of shared libraries)
-        /// Since zone_register() is a no-op if the default zone is already replaced with something.
-        ///
-        /// https://github.com/jemalloc/jemalloc/issues/708
-        zone_register();
-    }
-} initializeJemallocZoneAllocatorForOSX;
-#endif

 /// Replace default new/delete with memory tracking versions.
 /// @sa https://en.cppreference.com/w/cpp/memory/new/operator_new
 ///     https://en.cppreference.com/w/cpp/memory/new/operator_delete

-namespace Memory
-{
-
-inline ALWAYS_INLINE size_t getActualAllocationSize(size_t size)
-{
-    size_t actual_size = size;
-
-#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
-    /// The nallocx() function allocates no memory, but it performs the same size computation as the mallocx() function
-    /// @note je_mallocx() != je_malloc(). It's expected they don't differ much in allocation logic.
-    if (likely(size != 0))
-        actual_size = nallocx(size, 0);
-#endif
-
-    return actual_size;
-}
-
-inline ALWAYS_INLINE void trackMemory(std::size_t size)
-{
-    std::size_t actual_size = getActualAllocationSize(size);
-    CurrentMemoryTracker::allocNoThrow(actual_size);
-}
-
-inline ALWAYS_INLINE void untrackMemory(void * ptr [[maybe_unused]], std::size_t size [[maybe_unused]] = 0) noexcept
-{
-    try
-    {
-#if USE_JEMALLOC && JEMALLOC_VERSION_MAJOR >= 5
-        /// @note It's also possible to use je_malloc_usable_size() here.
-        if (likely(ptr != nullptr))
-            CurrentMemoryTracker::free(sallocx(ptr, 0));
-#else
-        if (size)
-            CurrentMemoryTracker::free(size);
-#    if defined(_GNU_SOURCE)
-        /// It's innaccurate resource free for sanitizers. malloc_usable_size() result is greater or equal to allocated size.
-        else
-            CurrentMemoryTracker::free(malloc_usable_size(ptr));
-#    endif
-#endif
-    }
-    catch (...)
-    {}
-}
-
-}
-
 /// new

 void * operator new(std::size_t size)
 {
    Memory::trackMemory(size);
-
    return Memory::newImpl(size);
 }

 void * operator new[](std::size_t size)
 {
    Memory::trackMemory(size);
-
    return Memory::newImpl(size);
 }

 void * operator new(std::size_t size, const std::nothrow_t &) noexcept
 {
    Memory::trackMemory(size);
-
    return Memory::newNoExept(size);
 }

 void * operator new[](std::size_t size, const std::nothrow_t &) noexcept
 {
    Memory::trackMemory(size);
-
    return Memory::newNoExept(size);
 }

--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -169,6 +169,7 @@ class IColumn;
    M(Int64, os_thread_priority, 0, "If non zero - set corresponding 'nice' value for query processing threads. Can be used to adjust query priority for OS scheduler.", 0) \
    \
    M(Bool, log_queries, 1, "Log requests and write the log to the system table.", 0) \
+    M(Bool, log_formatted_queries, 0, "Log formatted queries and write the log to the system table.", 0) \
    M(LogQueriesType, log_queries_min_type, QueryLogElementType::QUERY_START, "Minimal type in query_log to log, possible values (from low to high): QUERY_START, QUERY_FINISH, EXCEPTION_BEFORE_START, EXCEPTION_WHILE_PROCESSING.", 0) \
    M(Milliseconds, log_queries_min_query_duration_ms, 0, "Minimal time for the query to run, to get to the query_log/query_thread_log.", 0) \
    M(UInt64, log_queries_cut_to_length, 100000, "If query length is greater than specified threshold (in bytes), then cut query when writing to query log. Also limit length of printed query in ordinary text log.", 0) \
@ -499,6 +500,7 @@ class IColumn;
    M(Bool, enable_debug_queries, false, "Obsolete setting, does nothing.", 0) \
    M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing.", 0) \
    M(Bool, allow_experimental_bigint_types, true, "Obsolete setting, does nothing.", 0) \
+    M(Bool, allow_experimental_window_functions, true, "Obsolete setting, does nothing.", 0) \
    M(HandleKafkaErrorMode, handle_kafka_error_mode, HandleKafkaErrorMode::DEFAULT, "Obsolete setting, does nothing.", 0) \
    M(Bool, database_replicated_ddl_output, true, "Obsolete setting, does nothing.", 0) \
    /** The section above is for obsolete settings. Do not add anything there. */
--- a/src/DataStreams/ExecutionSpeedLimits.cpp
+++ b/src/DataStreams/ExecutionSpeedLimits.cpp
@ -3,6 +3,7 @@
 #include <Common/ProfileEvents.h>
 #include <Common/CurrentThread.h>
 #include <IO/WriteHelpers.h>
+#include <Common/Stopwatch.h>
 #include <common/sleep.h>

 namespace ProfileEvents
@ -104,14 +105,18 @@ static bool handleOverflowMode(OverflowMode mode, const String & message, int co
    }
 }

-bool ExecutionSpeedLimits::checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const
+bool ExecutionSpeedLimits::checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const
 {
-    if (max_execution_time != 0
-        && elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
-        return handleOverflowMode(overflow_mode,
+    if (max_execution_time != 0)
+    {
+        auto elapsed_ns = stopwatch.elapsed();
+
+        if (elapsed_ns > static_cast<UInt64>(max_execution_time.totalMicroseconds()) * 1000)
+            return handleOverflowMode(overflow_mode,
                                  "Timeout exceeded: elapsed " + toString(static_cast<double>(elapsed_ns) / 1000000000ULL)
                                  + " seconds, maximum: " + toString(max_execution_time.totalMicroseconds() / 1000000.0),
                                  ErrorCodes::TIMEOUT_EXCEEDED);
+    }

    return true;
 }
--- a/src/DataStreams/ExecutionSpeedLimits.h
+++ b/src/DataStreams/ExecutionSpeedLimits.h
@ -3,6 +3,7 @@
 #include <Poco/Timespan.h>
 #include <common/types.h>
 #include <DataStreams/SizeLimits.h>
+#include <Common/Stopwatch.h>

 namespace DB
 {
@ -25,7 +26,7 @@ public:
    /// Pause execution in case if speed limits were exceeded.
    void throttle(size_t read_rows, size_t read_bytes, size_t total_rows_to_read, UInt64 total_elapsed_microseconds) const;

-    bool checkTimeLimit(UInt64 elapsed_ns, OverflowMode overflow_mode) const;
+    bool checkTimeLimit(const Stopwatch & stopwatch, OverflowMode overflow_mode) const;
 };

 }
--- a/src/DataStreams/IBlockInputStream.cpp
+++ b/src/DataStreams/IBlockInputStream.cpp
@ -201,7 +201,7 @@ void IBlockInputStream::updateExtremes(Block & block)

 bool IBlockInputStream::checkTimeLimit() const
 {
-    return limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode);
+    return limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode);
 }


--- a/src/IO/Bzip2ReadBuffer.cpp
+++ b/src/IO/Bzip2ReadBuffer.cpp
@ -0,0 +1,97 @@
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#endif
+
+#if USE_BZIP2
+#    include <IO/Bzip2ReadBuffer.h>
+#    include <bzlib.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BZIP2_STREAM_DECODER_FAILED;
+}
+
+
+class Bzip2ReadBuffer::Bzip2StateWrapper
+{
+public:
+    Bzip2StateWrapper()
+    {
+        memset(&stream, 0, sizeof(stream));
+
+        int ret = BZ2_bzDecompressInit(&stream, 0, 0);
+
+        if (ret != BZ_OK)
+            throw Exception(
+                ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
+                "bzip2 stream encoder init failed: error code: {}",
+                ret);
+    }
+
+    ~Bzip2StateWrapper()
+    {
+        BZ2_bzDecompressEnd(&stream);
+    }
+
+    bz_stream stream;
+};
+
+Bzip2ReadBuffer::Bzip2ReadBuffer(std::unique_ptr<ReadBuffer> in_, size_t buf_size, char *existing_memory, size_t alignment)
+        : BufferWithOwnMemory<ReadBuffer>(buf_size, existing_memory, alignment)
+        , in(std::move(in_))
+        , bz(std::make_unique<Bzip2StateWrapper>())
+        , eof(false)
+{
+}
+
+Bzip2ReadBuffer::~Bzip2ReadBuffer() = default;
+
+bool Bzip2ReadBuffer::nextImpl()
+{
+    if (eof)
+        return false;
+
+    if (!bz->stream.avail_in)
+    {
+        in->nextIfAtEnd();
+        bz->stream.avail_in = in->buffer().end() - in->position();
+        bz->stream.next_in = in->position();
+    }
+
+    bz->stream.avail_out = internal_buffer.size();
+    bz->stream.next_out = internal_buffer.begin();
+
+    int ret = BZ2_bzDecompress(&bz->stream);
+
+    in->position() = in->buffer().end() - bz->stream.avail_in;
+    working_buffer.resize(internal_buffer.size() - bz->stream.avail_out);
+
+    if (ret == BZ_STREAM_END)
+    {
+        if (in->eof())
+        {
+            eof = true;
+            return !working_buffer.empty();
+        }
+        else
+        {
+            throw Exception(
+                ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
+                "bzip2 decoder finished, but input stream has not exceeded: error code: {}", ret);
+        }
+    }
+
+    if (ret != BZ_OK)
+        throw Exception(
+            ErrorCodes::BZIP2_STREAM_DECODER_FAILED,
+            "bzip2 stream decoder failed: error code: {}",
+            ret);
+
+    return true;
+}
+}
+
+#endif
--- a/src/IO/Bzip2ReadBuffer.h
+++ b/src/IO/Bzip2ReadBuffer.h
@ -0,0 +1,33 @@
+#pragma once
+
+#include <IO/ReadBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+
+namespace DB
+{
+
+class Bzip2ReadBuffer : public BufferWithOwnMemory<ReadBuffer>
+{
+public:
+    Bzip2ReadBuffer(
+            std::unique_ptr<ReadBuffer> in_,
+            size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+            char * existing_memory = nullptr,
+            size_t alignment = 0);
+
+    ~Bzip2ReadBuffer() override;
+
+private:
+    bool nextImpl() override;
+
+    std::unique_ptr<ReadBuffer> in;
+
+    class Bzip2StateWrapper;
+    std::unique_ptr<Bzip2StateWrapper> bz;
+
+    bool eof;
+};
+
+}
+
--- a/src/IO/Bzip2WriteBuffer.cpp
+++ b/src/IO/Bzip2WriteBuffer.cpp
@ -0,0 +1,138 @@
+#if !defined(ARCADIA_BUILD)
+#    include <Common/config.h>
+#endif
+
+#if USE_BROTLI
+#    include <IO/Bzip2WriteBuffer.h>
+#    include <bzlib.h>
+
+#include <Common/MemoryTracker.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BZIP2_STREAM_ENCODER_FAILED;
+}
+
+
+class Bzip2WriteBuffer::Bzip2StateWrapper
+{
+public:
+    explicit Bzip2StateWrapper(int compression_level)
+    {
+        memset(&stream, 0, sizeof(stream));
+
+        int ret = BZ2_bzCompressInit(&stream, compression_level, 0, 0);
+
+        if (ret != BZ_OK)
+            throw Exception(
+                ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
+                "bzip2 stream encoder init failed: error code: {}",
+                ret);
+    }
+
+    ~Bzip2StateWrapper()
+    {
+        BZ2_bzCompressEnd(&stream);
+    }
+
+    bz_stream stream;
+};
+
+Bzip2WriteBuffer::Bzip2WriteBuffer(std::unique_ptr<WriteBuffer> out_, int compression_level, size_t buf_size, char * existing_memory, size_t alignment)
+    : BufferWithOwnMemory<WriteBuffer>(buf_size, existing_memory, alignment)
+    , bz(std::make_unique<Bzip2StateWrapper>(compression_level))
+    , out(std::move(out_))
+{
+}
+
+Bzip2WriteBuffer::~Bzip2WriteBuffer()
+{
+    /// FIXME move final flush into the caller
+    MemoryTracker::LockExceptionInThread lock(VariableContext::Global);
+    finish();
+}
+
+void Bzip2WriteBuffer::nextImpl()
+{
+    if (!offset())
+    {
+        return;
+    }
+
+    bz->stream.next_in = working_buffer.begin();
+    bz->stream.avail_in = offset();
+
+    try
+    {
+        do
+        {
+            out->nextIfAtEnd();
+            bz->stream.next_out = out->position();
+            bz->stream.avail_out = out->buffer().end() - out->position();
+
+            int ret = BZ2_bzCompress(&bz->stream, BZ_RUN);
+
+            out->position() = out->buffer().end() - bz->stream.avail_out;
+
+            if (ret != BZ_RUN_OK)
+                throw Exception(
+                    ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
+                    "bzip2 stream encoder failed: error code: {}",
+                    ret);
+
+        }
+        while (bz->stream.avail_in > 0);
+    }
+    catch (...)
+    {
+        /// Do not try to write next time after exception.
+        out->position() = out->buffer().begin();
+        throw;
+    }
+}
+
+void Bzip2WriteBuffer::finish()
+{
+    if (finished)
+        return;
+
+    try
+    {
+        finishImpl();
+        out->finalize();
+        finished = true;
+    }
+    catch (...)
+    {
+        /// Do not try to flush next time after exception.
+        out->position() = out->buffer().begin();
+        finished = true;
+        throw;
+    }
+}
+
+void Bzip2WriteBuffer::finishImpl()
+{
+    next();
+
+    out->nextIfAtEnd();
+    bz->stream.next_out = out->position();
+    bz->stream.avail_out = out->buffer().end() - out->position();
+
+    int ret = BZ2_bzCompress(&bz->stream, BZ_FINISH);
+
+    out->position() = out->buffer().end() - bz->stream.avail_out;
+
+    if (ret != BZ_STREAM_END && ret != BZ_FINISH_OK)
+        throw Exception(
+            ErrorCodes::BZIP2_STREAM_ENCODER_FAILED,
+            "bzip2 stream encoder failed: error code: {}",
+            ret);
+}
+
+}
+
+#endif
--- a/src/IO/Bzip2WriteBuffer.h
+++ b/src/IO/Bzip2WriteBuffer.h
@ -0,0 +1,37 @@
+#pragma once
+
+#include <IO/WriteBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+
+namespace DB
+{
+
+class Bzip2WriteBuffer : public BufferWithOwnMemory<WriteBuffer>
+{
+public:
+    Bzip2WriteBuffer(
+        std::unique_ptr<WriteBuffer> out_,
+        int compression_level,
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
+        char * existing_memory = nullptr,
+        size_t alignment = 0);
+
+    ~Bzip2WriteBuffer() override;
+
+    void finalize() override { finish(); }
+
+private:
+    void nextImpl() override;
+
+    void finish();
+    void finishImpl();
+
+    class Bzip2StateWrapper;
+    std::unique_ptr<Bzip2StateWrapper> bz;
+
+    std::unique_ptr<WriteBuffer> out;
+
+    bool finished = false;
+};
+
+}
--- a/src/IO/CompressionMethod.cpp
+++ b/src/IO/CompressionMethod.cpp
@ -10,6 +10,8 @@
 #include <IO/ZlibInflatingReadBuffer.h>
 #include <IO/ZstdDeflatingWriteBuffer.h>
 #include <IO/ZstdInflatingReadBuffer.h>
+#include <IO/Bzip2ReadBuffer.h>
+#include <IO/Bzip2WriteBuffer.h>

 #if !defined(ARCADIA_BUILD)
 #    include <Common/config.h>
@ -40,6 +42,8 @@ std::string toContentEncodingName(CompressionMethod method)
            return "xz";
        case CompressionMethod::Zstd:
            return "zstd";
+        case CompressionMethod::Bzip2:
+            return "bz2";
        case CompressionMethod::None:
            return "";
    }
@ -69,11 +73,13 @@ CompressionMethod chooseCompressionMethod(const std::string & path, const std::s
        return CompressionMethod::Xz;
    if (method_str == "zstd" || method_str == "zst")
        return CompressionMethod::Zstd;
+    if (method_str == "bz2")
+        return CompressionMethod::Bzip2;
    if (hint.empty() || hint == "auto" || hint == "none")
        return CompressionMethod::None;

    throw Exception(
-        "Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd' are supported as compression methods",
+        "Unknown compression method " + hint + ". Only 'auto', 'none', 'gzip', 'deflate', 'br', 'xz', 'zstd', 'bz2' are supported as compression methods",
        ErrorCodes::NOT_IMPLEMENTED);
 }

@ -91,7 +97,10 @@ std::unique_ptr<ReadBuffer> wrapReadBufferWithCompressionMethod(
        return std::make_unique<LZMAInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
    if (method == CompressionMethod::Zstd)
        return std::make_unique<ZstdInflatingReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
-
+#if USE_BZIP2
+    if (method == CompressionMethod::Bzip2)
+        return std::make_unique<Bzip2ReadBuffer>(std::move(nested), buf_size, existing_memory, alignment);
+#endif
    if (method == CompressionMethod::None)
        return nested;

@ -114,7 +123,10 @@ std::unique_ptr<WriteBuffer> wrapWriteBufferWithCompressionMethod(

    if (method == CompressionMethod::Zstd)
        return std::make_unique<ZstdDeflatingWriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
-
+#if USE_BZIP2
+    if (method == CompressionMethod::Bzip2)
+        return std::make_unique<Bzip2WriteBuffer>(std::move(nested), level, buf_size, existing_memory, alignment);
+#endif
    if (method == CompressionMethod::None)
        return nested;

--- a/src/IO/CompressionMethod.h
+++ b/src/IO/CompressionMethod.h
@ -31,7 +31,8 @@ enum class CompressionMethod
    /// Zstd compressor
    ///  This option corresponds to HTTP Content-Encoding: zstd
    Zstd,
-    Brotli
+    Brotli,
+    Bzip2
 };

 /// How the compression method is named in HTTP.
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@ -23,6 +23,8 @@ SRCS(
    AIOContextPool.cpp
    BrotliReadBuffer.cpp
    BrotliWriteBuffer.cpp
+    Bzip2ReadBuffer.cpp
+    Bzip2WriteBuffer.cpp
    CascadeWriteBuffer.cpp
    CompressionMethod.cpp
    DoubleConverter.cpp
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@ -348,7 +348,7 @@ SetPtr makeExplicitSet(
    const ASTPtr & left_arg = args.children.at(0);
    const ASTPtr & right_arg = args.children.at(1);

-    auto column_name = left_arg->getColumnName(context->getSettingsRef());
+    auto column_name = left_arg->getColumnName();
    const auto & dag_node = actions.findInIndex(column_name);
    const DataTypePtr & left_arg_type = dag_node.result_type;

@ -641,7 +641,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
 {
    // If the argument is a literal, we generated a unique column name for it.
    // Use it instead of a generic display name.
-    auto child_column_name = ast->getColumnName(data.getContext()->getSettingsRef());
+    auto child_column_name = ast->getColumnName();
    const auto * as_literal = ast->as<ASTLiteral>();
    if (as_literal)
    {
@ -698,7 +698,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
        auto func = makeASTFunction("tupleElement", tuple_ast, literal);

        auto function_builder = FunctionFactory::instance().get(func->name, data.getContext());
-        data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName(data.getContext()->getSettingsRef())}, func->getColumnName(data.getContext()->getSettingsRef()));
+        data.addFunction(function_builder, {tuple_name_type->name, literal->getColumnName()}, func->getColumnName());

        columns.push_back(std::move(func));
    }
@ -762,7 +762,7 @@ void ActionsMatcher::visit(const ASTIdentifier & identifier, const ASTPtr &, Dat

 void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & data)
 {
-    auto column_name = ast->getColumnName(data.getContext()->getSettingsRef());
+    auto column_name = ast->getColumnName();
    if (data.hasColumn(column_name))
        return;

@ -778,7 +778,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
        ASTPtr arg = node.arguments->children.at(0);
        visit(arg, data);
        if (!data.only_consts)
-            data.addArrayJoin(arg->getColumnName(data.getContext()->getSettingsRef()), column_name);
+            data.addArrayJoin(arg->getColumnName(), column_name);

        return;
    }
@ -800,7 +800,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                /// We are in the part of the tree that we are not going to compute. You just need to define types.
                /// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".

-                auto argument_name = node.arguments->children.at(0)->getColumnName(data.getContext()->getSettingsRef());
+                auto argument_name = node.arguments->children.at(0)->getColumnName();

                data.addFunction(
                        FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
@ -929,7 +929,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                if (!prepared_set->empty())
                    column.name = data.getUniqueName("__set");
                else
-                    column.name = child->getColumnName(data.getContext()->getSettingsRef());
+                    column.name = child->getColumnName();

                if (!data.hasColumn(column.name))
                {
@ -1008,7 +1008,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                    visit(lambda->arguments->children.at(1), data);
                    auto lambda_dag = data.actions_stack.popLevel();

-                    String result_name = lambda->arguments->children.at(1)->getColumnName(data.getContext()->getSettingsRef());
+                    String result_name = lambda->arguments->children.at(1)->getColumnName();
                    lambda_dag->removeUnusedActions(Names(1, result_name));

                    auto lambda_actions = std::make_shared<ExpressionActions>(
@ -1023,7 +1023,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                        if (findColumn(required_arg, lambda_arguments) == lambda_arguments.end())
                            captured.push_back(required_arg);

-                    /// We can not name `getColumnName(data.getContext()->getSettingsRef())`,
+                    /// We can not name `getColumnName()`,
                    ///  because it does not uniquely define the expression (the types of arguments can be different).
                    String lambda_name = data.getUniqueName("__lambda");

@ -1053,7 +1053,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
    if (arguments_present)
    {
        /// Calculate column name here again, because AST may be changed here (in case of untuple).
-        data.addFunction(function_builder, argument_names, ast->getColumnName(data.getContext()->getSettingsRef()));
+        data.addFunction(function_builder, argument_names, ast->getColumnName());
    }
 }

@ -1067,7 +1067,7 @@ void ActionsMatcher::visit(const ASTLiteral & literal, const ASTPtr & /* ast */,
    // AST here? Anyway, do not modify the column name if it is set already.
    if (literal.unique_column_name.empty())
    {
-        const auto default_name = literal.getColumnName(data.getContext()->getSettingsRef());
+        const auto default_name = literal.getColumnName();
        const auto & index = data.actions_stack.getLastActionsIndex();
        const auto * existing_column = index.tryGetNode(default_name);

@ -1147,7 +1147,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
        }

        /// We get the stream of blocks for the subquery. Create Set and put it in place of the subquery.
-        String set_id = right_in_operand->getColumnName(data.getContext()->getSettingsRef());
+        String set_id = right_in_operand->getColumnName();

        SubqueryForSet & subquery_for_set = data.subqueries_for_sets[set_id];

@ -1183,7 +1183,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
    {
        const auto & last_actions = data.actions_stack.getLastActions();
        const auto & index = data.actions_stack.getLastActionsIndex();
-        if (index.contains(left_in_operand->getColumnName(data.getContext()->getSettingsRef())))
+        if (index.contains(left_in_operand->getColumnName()))
            /// An explicit enumeration of values in parentheses.
            return makeExplicitSet(&node, last_actions, false, data.getContext(), data.set_size_limit, data.prepared_sets);
        else
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@ -1193,6 +1193,9 @@ bool Aggregator::checkLimits(size_t result_size, bool & no_more_keys) const
        }
    }

+    /// Some aggregate functions cannot throw exceptions on allocations (e.g. from C malloc)
+    /// but still tracks memory. Check it here.
+    CurrentMemoryTracker::check();
    return true;
 }

--- a/src/Interpreters/AsynchronousMetrics.cpp
+++ b/src/Interpreters/AsynchronousMetrics.cpp
@ -779,43 +779,60 @@ void AsynchronousMetrics::update(std::chrono::system_clock::time_point update_ti

                uint64_t kb = 0;
                readText(kb, *meminfo);
-                if (kb)
+
+                if (!kb)
                {
-                    skipWhitespaceIfAny(*meminfo, true);
-                    assertString("kB", *meminfo);
+                    skipToNextLineOrEOF(*meminfo);
+                    continue;
+                }

-                    uint64_t bytes = kb * 1024;
+                skipWhitespaceIfAny(*meminfo, true);

-                    if (name == "MemTotal:")
-                    {
-                        new_values["OSMemoryTotal"] = bytes;
-                    }
-                    else if (name == "MemFree:")
-                    {
-                        /// We cannot simply name this metric "Free", because it confuses users.
-                        /// See https://www.linuxatemyram.com/
-                        /// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
+                /**
+                 * Not all entries in /proc/meminfo contain the kB suffix, e.g.
+                 * HugePages_Total:       0
+                 * HugePages_Free:        0
+                 * We simply skip such entries as they're not needed
+                 */
+                if (*meminfo->position() == '\n')
+                {
+                    skipToNextLineOrEOF(*meminfo);
+                    continue;
+                }

-                        free_plus_cached_bytes += bytes;
-                        new_values["OSMemoryFreeWithoutCached"] = bytes;
-                    }
-                    else if (name == "MemAvailable:")
-                    {
-                        new_values["OSMemoryAvailable"] = bytes;
-                    }
-                    else if (name == "Buffers:")
-                    {
-                        new_values["OSMemoryBuffers"] = bytes;
-                    }
-                    else if (name == "Cached:")
-                    {
-                        free_plus_cached_bytes += bytes;
-                        new_values["OSMemoryCached"] = bytes;
-                    }
-                    else if (name == "SwapCached:")
-                    {
-                        new_values["OSMemorySwapCached"] = bytes;
-                    }
+                assertString("kB", *meminfo);
+
+                uint64_t bytes = kb * 1024;
+
+                if (name == "MemTotal:")
+                {
+                    new_values["OSMemoryTotal"] = bytes;
+                }
+                else if (name == "MemFree:")
+                {
+                    /// We cannot simply name this metric "Free", because it confuses users.
+                    /// See https://www.linuxatemyram.com/
+                    /// For convenience we also provide OSMemoryFreePlusCached, that should be somewhat similar to OSMemoryAvailable.
+
+                    free_plus_cached_bytes += bytes;
+                    new_values["OSMemoryFreeWithoutCached"] = bytes;
+                }
+                else if (name == "MemAvailable:")
+                {
+                    new_values["OSMemoryAvailable"] = bytes;
+                }
+                else if (name == "Buffers:")
+                {
+                    new_values["OSMemoryBuffers"] = bytes;
+                }
+                else if (name == "Cached:")
+                {
+                    free_plus_cached_bytes += bytes;
+                    new_values["OSMemoryCached"] = bytes;
+                }
+                else if (name == "SwapCached:")
+                {
+                    new_values["OSMemorySwapCached"] = bytes;
                }

                skipToNextLineOrEOF(*meminfo);
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -243,7 +243,7 @@ void ExpressionAnalyzer::analyzeAggregation()
                    ssize_t size = group_asts.size();
                    getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);

-                    const auto & column_name = group_asts[i]->getColumnName(getContext()->getSettingsRef());
+                    const auto & column_name = group_asts[i]->getColumnName();
                    const auto * node = temp_actions->tryFindInIndex(column_name);
                    if (!node)
                        throw Exception("Unknown identifier (in GROUP BY): " + column_name, ErrorCodes::UNKNOWN_IDENTIFIER);
@ -408,7 +408,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
                auto temp_actions = std::make_shared<ActionsDAG>(columns_after_join);
                getRootActions(left_in_operand, true, temp_actions);

-                if (temp_actions->tryFindInIndex(left_in_operand->getColumnName(getContext()->getSettingsRef())))
+                if (temp_actions->tryFindInIndex(left_in_operand->getColumnName()))
                    makeExplicitSet(func, *temp_actions, true, getContext(), settings.size_limits_for_set, prepared_sets);
            }
        }
@ -456,7 +456,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
        if (node->arguments)
            getRootActionsNoMakeSet(node->arguments, true, actions);

-        aggregate.column_name = node->getColumnName(getContext()->getSettingsRef());
+        aggregate.column_name = node->getColumnName();

        const ASTs & arguments = node->arguments ? node->arguments->children : ASTs();
        aggregate.argument_names.resize(arguments.size());
@ -464,7 +464,7 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)

        for (size_t i = 0; i < arguments.size(); ++i)
        {
-            const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
+            const std::string & name = arguments[i]->getColumnName();
            const auto * dag_node = actions->tryFindInIndex(name);
            if (!dag_node)
            {
@ -645,7 +645,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
        WindowFunctionDescription window_function;
        window_function.function_node = function_node;
        window_function.column_name
-            = window_function.function_node->getColumnName(getContext()->getSettingsRef());
+            = window_function.function_node->getColumnName();
        window_function.function_parameters
            = window_function.function_node->parameters
                ? getAggregateFunctionParametersArray(
@ -664,7 +664,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
        window_function.argument_names.resize(arguments.size());
        for (size_t i = 0; i < arguments.size(); ++i)
        {
-            const std::string & name = arguments[i]->getColumnName(getContext()->getSettingsRef());
+            const std::string & name = arguments[i]->getColumnName();
            const auto * node = actions->tryFindInIndex(name);

            if (!node)
@ -961,7 +961,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(

    auto & step = chain.lastStep(sourceColumns());
    getRootActions(select_query->prewhere(), only_types, step.actions());
-    String prewhere_column_name = select_query->prewhere()->getColumnName(getContext()->getSettingsRef());
+    String prewhere_column_name = select_query->prewhere()->getColumnName();
    step.addRequiredOutput(prewhere_column_name);

    const auto & node = step.actions()->findInIndex(prewhere_column_name);
@ -1047,7 +1047,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,

    getRootActions(select_query->where(), only_types, step.actions());

-    auto where_column_name = select_query->where()->getColumnName(getContext()->getSettingsRef());
+    auto where_column_name = select_query->where()->getColumnName();
    step.addRequiredOutput(where_column_name);

    const auto & node = step.actions()->findInIndex(where_column_name);
@ -1072,7 +1072,7 @@ bool SelectQueryExpressionAnalyzer::appendGroupBy(ExpressionActionsChain & chain
    ASTs asts = select_query->groupBy()->children;
    for (const auto & ast : asts)
    {
-        step.addRequiredOutput(ast->getColumnName(getContext()->getSettingsRef()));
+        step.addRequiredOutput(ast->getColumnName());
        getRootActions(ast, only_types, step.actions());
    }

@ -1100,7 +1100,7 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
        for (const auto & name : desc.argument_names)
            step.addRequiredOutput(name);

-    /// Collect aggregates removing duplicates by node.getColumnName(getContext()->getSettingsRef())
+    /// Collect aggregates removing duplicates by node.getColumnName()
    /// It's not clear why we recollect aggregates (for query parts) while we're able to use previously collected ones (for entire query)
    /// @note The original recollection logic didn't remove duplicates.
    GetAggregatesVisitor::Data data;
@ -1155,7 +1155,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
            // (2b) Required function argument columns.
            for (const auto & a : f.function_node->arguments->children)
            {
-                step.addRequiredOutput(a->getColumnName(getContext()->getSettingsRef()));
+                step.addRequiredOutput(a->getColumnName());
            }
        }

@ -1177,7 +1177,7 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
    ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);

    getRootActionsForHaving(select_query->having(), only_types, step.actions());
-    step.addRequiredOutput(select_query->having()->getColumnName(getContext()->getSettingsRef()));
+    step.addRequiredOutput(select_query->having()->getColumnName());

    return true;
 }
@ -1201,7 +1201,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
            continue;
        }

-        step.addRequiredOutput(child->getColumnName(getContext()->getSettingsRef()));
+        step.addRequiredOutput(child->getColumnName());
    }
 }

@ -1229,7 +1229,7 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
        if (!ast || ast->children.empty())
            throw Exception("Bad order expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
        ASTPtr order_expression = ast->children.at(0);
-        step.addRequiredOutput(order_expression->getColumnName(getContext()->getSettingsRef()));
+        step.addRequiredOutput(order_expression->getColumnName());

        if (ast->with_fill)
            with_fill = true;
@ -1279,7 +1279,7 @@ bool SelectQueryExpressionAnalyzer::appendLimitBy(ExpressionActionsChain & chain

    for (const auto & child : select_query->limitBy()->children)
    {
-        auto child_name = child->getColumnName(getContext()->getSettingsRef());
+        auto child_name = child->getColumnName();
        if (!aggregated_names.count(child_name))
            step.addRequiredOutput(std::move(child_name));
    }
@ -1295,15 +1295,13 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendProjectResult(ExpressionActio

    NamesWithAliases result_columns;

-    const auto & settings = getContext()->getSettingsRef();
-
    ASTs asts = select_query->select()->children;
    for (const auto & ast : asts)
    {
-        String result_name = ast->getAliasOrColumnName(settings);
+        String result_name = ast->getAliasOrColumnName();
        if (required_result_columns.empty() || required_result_columns.count(result_name))
        {
-            std::string source_name = ast->getColumnName(settings);
+            std::string source_name = ast->getColumnName();

            /*
             * For temporary columns created by ExpressionAnalyzer for literals,
@ -1345,7 +1343,7 @@ void ExpressionAnalyzer::appendExpression(ExpressionActionsChain & chain, const
 {
    ExpressionActionsChain::Step & step = chain.lastStep(sourceColumns());
    getRootActions(expr, only_types, step.actions());
-    step.addRequiredOutput(expr->getColumnName(getContext()->getSettingsRef()));
+    step.addRequiredOutput(expr->getColumnName());
 }


@ -1362,13 +1360,12 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
    else
        asts = ASTs(1, query);

-    const auto & settings = getContext()->getSettingsRef();
    for (const auto & ast : asts)
    {
-        std::string name = ast->getColumnName(settings);
+        std::string name = ast->getColumnName();
        std::string alias;
        if (add_aliases)
-            alias = ast->getAliasOrColumnName(settings);
+            alias = ast->getAliasOrColumnName();
        else
            alias = name;
        result_columns.emplace_back(name, alias);
@ -1497,7 +1494,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(

        if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
        {
-            prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName(settings));
+            prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());

            if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
            {
@ -1507,7 +1504,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                    ExpressionActions(
                        prewhere_info->prewhere_actions,
                        ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_prewhere_sample);
-                    auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName(settings));
+                    auto & column_elem = before_prewhere_sample.getByName(query.prewhere()->getColumnName());
                    /// If the filter column is a constant, record it.
                    if (column_elem.column)
                        prewhere_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1542,7 +1539,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                    ExpressionActions(
                        before_where,
                        ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
-                    auto & column_elem = before_where_sample.getByName(query.where()->getColumnName(settings));
+                    auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
                    /// If the filter column is a constant, record it.
                    if (column_elem.column)
                        where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
@ -1633,7 +1630,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
            const auto * select_query = query_analyzer.getSelectQuery();
            for (const auto & child : select_query->select()->children)
            {
-                step.addRequiredOutput(child->getColumnName(settings));
+                step.addRequiredOutput(child->getColumnName());
            }
        }

@ -1689,8 +1686,7 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si

    if (hasWhere())
    {
-        const auto & settings = chain.getContext()->getSettingsRef();
-        where_column_name = query.where()->getColumnName(settings);
+        where_column_name = query.where()->getColumnName();
        remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
    }
 }
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -141,7 +141,7 @@ String InterpreterSelectQuery::generateFilterActions(ActionsDAGPtr & actions, co
    SelectQueryExpressionAnalyzer analyzer(query_ast, syntax_result, context, metadata_snapshot);
    actions = analyzer.simpleSelectActions();

-    auto column_name = expr_list->children.at(0)->getColumnName(context->getSettingsRef());
+    auto column_name = expr_list->children.at(0)->getColumnName();
    actions->removeUnusedActions(NameSet{column_name});
    actions->projectInput(false);

@ -782,7 +782,7 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
    order_descr.reserve(query.orderBy()->children.size());
    for (const auto & elem : query.orderBy()->children)
    {
-        String name = elem->children.front()->getColumnName(context->getSettingsRef());
+        String name = elem->children.front()->getColumnName();
        const auto & order_by_elem = elem->as<ASTOrderByElement &>();

        std::shared_ptr<Collator> collator;
@ -801,14 +801,14 @@ static SortDescription getSortDescription(const ASTSelectQuery & query, ContextP
    return order_descr;
 }

-static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query, ContextPtr context)
+static SortDescription getSortDescriptionFromGroupBy(const ASTSelectQuery & query)
 {
    SortDescription order_descr;
    order_descr.reserve(query.groupBy()->children.size());

    for (const auto & elem : query.groupBy()->children)
    {
-        String name = elem->getColumnName(context->getSettingsRef());
+        String name = elem->getColumnName();
        order_descr.emplace_back(name, 1, 1);
    }

@ -1327,24 +1327,29 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
            }

            bool apply_limit = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregation;
+            bool apply_prelimit = apply_limit &&
+                                  query.limitLength() && !query.limit_with_ties &&
+                                  !hasWithTotalsInAnySubqueryInFromClause(query) &&
+                                  !query.arrayJoinExpressionList() &&
+                                  !query.distinct &&
+                                  !expressions.hasLimitBy() &&
+                                  !settings.extremes &&
+                                  !has_withfill;
            bool apply_offset = options.to_stage != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
-            bool has_prelimit = false;
-            if (apply_limit &&
-                query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
-                !query.arrayJoinExpressionList() && !query.distinct && !expressions.hasLimitBy() && !settings.extremes &&
-                !has_withfill)
+            bool limit_applied = false;
+            if (apply_prelimit)
            {
                executePreLimit(query_plan, /* do_not_skip_offset= */!apply_offset);
-                has_prelimit = true;
+                limit_applied = true;
            }

            /** If there was more than one stream,
              * then DISTINCT needs to be performed once again after merging all streams.
              */
-            if (query.distinct)
+            if (!from_aggregation_stage && query.distinct)
                executeDistinct(query_plan, false, expressions.selected_columns, false);

-            if (expressions.hasLimitBy())
+            if (!from_aggregation_stage && expressions.hasLimitBy())
            {
                executeExpression(query_plan, expressions.before_limit_by, "Before LIMIT BY");
                executeLimitBy(query_plan);
@ -1354,10 +1359,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu

            /// If we have 'WITH TIES', we need execute limit before projection,
            /// because in that case columns from 'ORDER BY' are used.
-            if (query.limit_with_ties)
+            if (query.limit_with_ties && apply_offset)
            {
                executeLimit(query_plan);
-                has_prelimit = true;
+                limit_applied = true;
            }

            /// Projection not be done on the shards, since then initiator will not find column in blocks.
@ -1372,7 +1377,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
            executeExtremes(query_plan);

            /// Limit is no longer needed if there is prelimit.
-            if (apply_limit && !has_prelimit)
+            ///
+            /// NOTE: that LIMIT cannot be applied if OFFSET should not be applied,
+            /// since LIMIT will apply OFFSET too.
+            /// This is the case for various optimizations for distributed queries,
+            /// and when LIMIT cannot be applied it will be applied on the initiator anyway.
+            if (apply_limit && !limit_applied && apply_offset)
                executeLimit(query_plan);

            if (apply_offset)
@ -1918,13 +1928,13 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
                {
                    query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
                        query_info.projection->group_by_elements_actions,
-                        getSortDescriptionFromGroupBy(query, context),
+                        getSortDescriptionFromGroupBy(query),
                        query_info.syntax_analyzer_result);
                }
                else
                {
                    query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
-                        analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query, context), query_info.syntax_analyzer_result);
+                        analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
                }
            }

@ -2005,7 +2015,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
 void InterpreterSelectQuery::executeWhere(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
 {
    auto where_step = std::make_unique<FilterStep>(
-        query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(context->getSettingsRef()), remove_filter);
+        query_plan.getCurrentDataStream(), expression, getSelectQuery().where()->getColumnName(), remove_filter);

    where_step->setStepDescription("WHERE");
    query_plan.addStep(std::move(where_step));
@ -2054,7 +2064,7 @@ void InterpreterSelectQuery::executeAggregation(QueryPlan & query_plan, const Ac
    SortDescription group_by_sort_description;

    if (group_by_info && settings.optimize_aggregation_in_order)
-        group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery(), context);
+        group_by_sort_description = getSortDescriptionFromGroupBy(getSelectQuery());
    else
        group_by_info = nullptr;

@ -2102,7 +2112,7 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
 void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
 {
    auto having_step
-        = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(context->getSettingsRef()), false);
+        = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);

    having_step->setStepDescription("HAVING");
    query_plan.addStep(std::move(having_step));
@ -2118,7 +2128,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
        query_plan.getCurrentDataStream(),
        overflow_row,
        expression,
-        has_having ? getSelectQuery().having()->getColumnName(context->getSettingsRef()) : "",
+        has_having ? getSelectQuery().having()->getColumnName() : "",
        settings.totals_mode,
        settings.totals_auto_threshold,
        final);
@ -2429,7 +2439,10 @@ void InterpreterSelectQuery::executePreLimit(QueryPlan & query_plan, bool do_not
        }

        auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset);
-        limit->setStepDescription("preliminary LIMIT");
+        if (do_not_skip_offset)
+            limit->setStepDescription("preliminary LIMIT (with OFFSET)");
+        else
+            limit->setStepDescription("preliminary LIMIT (without OFFSET)");
        query_plan.addStep(std::move(limit));
    }
 }
@ -2443,7 +2456,7 @@ void InterpreterSelectQuery::executeLimitBy(QueryPlan & query_plan)

    Names columns;
    for (const auto & elem : query.limitBy()->children)
-        columns.emplace_back(elem->getColumnName(context->getSettingsRef()));
+        columns.emplace_back(elem->getColumnName());

    UInt64 length = getLimitUIntValue(query.limitByLength(), context, "LIMIT");
    UInt64 offset = (query.limitByOffset() ? getLimitUIntValue(query.limitByOffset(), context, "OFFSET") : 0);
--- a/src/Interpreters/QueryLog.cpp
+++ b/src/Interpreters/QueryLog.cpp
@ -57,6 +57,7 @@ NamesAndTypesList QueryLogElement::getNamesAndTypes()

        {"current_database", std::make_shared<DataTypeString>()},
        {"query", std::make_shared<DataTypeString>()},
+        {"formatted_query", std::make_shared<DataTypeString>()},
        {"normalized_query_hash", std::make_shared<DataTypeUInt64>()},
        {"query_kind", std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>())},
        {"databases", std::make_shared<DataTypeArray>(
@ -151,6 +152,7 @@ void QueryLogElement::appendToBlock(MutableColumns & columns) const

    columns[i++]->insertData(current_database.data(), current_database.size());
    columns[i++]->insertData(query.data(), query.size());
+    columns[i++]->insertData(formatted_query.data(), formatted_query.size());
    columns[i++]->insert(normalized_query_hash);
    columns[i++]->insertData(query_kind.data(), query_kind.size());

--- a/src/Interpreters/QueryLog.h
+++ b/src/Interpreters/QueryLog.h
@ -51,6 +51,7 @@ struct QueryLogElement

    String current_database;
    String query;
+    String formatted_query;
    UInt64 normalized_query_hash{};

    String query_kind;
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@ -609,6 +609,27 @@ std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSel
    return data.window_functions;
 }

+class MarkTupleLiteralsAsLegacyData
+{
+public:
+    using TypeToVisit = ASTLiteral;
+
+    static void visit(ASTLiteral & literal, ASTPtr &)
+    {
+        if (literal.value.getType() == Field::Types::Tuple)
+            literal.use_legacy_column_name_of_tuple = true;
+    }
+};
+
+using MarkTupleLiteralsAsLegacyMatcher = OneTypeMatcher<MarkTupleLiteralsAsLegacyData>;
+using MarkTupleLiteralsAsLegacyVisitor = InDepthNodeVisitor<MarkTupleLiteralsAsLegacyMatcher, true>;
+
+void markTupleLiteralsAsLegacy(ASTPtr & query)
+{
+    MarkTupleLiteralsAsLegacyVisitor::Data data;
+    MarkTupleLiteralsAsLegacyVisitor(data).visit(query);
+}
+
 }

 TreeRewriterResult::TreeRewriterResult(
@ -927,6 +948,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
    /// Executing scalar subqueries - replacing them with constant values.
    executeScalarSubqueries(query, getContext(), subquery_depth, result.scalars, select_options.only_analyze);

+    if (settings.legacy_column_name_of_tuple_literal)
+        markTupleLiteralsAsLegacy(query);
+
    TreeOptimizer::apply(query, result, tables_with_columns, getContext());

    /// array_join_alias_to_name, array_join_result_to_source.
@ -994,6 +1018,9 @@ TreeRewriterResultPtr TreeRewriter::analyze(
    /// Executing scalar subqueries. Column defaults could be a scalar subquery.
    executeScalarSubqueries(query, getContext(), 0, result.scalars, false);

+    if (settings.legacy_column_name_of_tuple_literal)
+        markTupleLiteralsAsLegacy(query);
+
    TreeOptimizer::optimizeIf(query, result.aliases, settings.optimize_if_chain_to_multiif);

    if (allow_aggregations)
--- a/src/Interpreters/evaluateConstantExpression.cpp
+++ b/src/Interpreters/evaluateConstantExpression.cpp
@ -39,7 +39,7 @@ std::pair<Field, std::shared_ptr<const IDataType>> evaluateConstantExpression(co
    if (context->getSettingsRef().normalize_function_names)
        FunctionNameNormalizer().visit(ast.get());

-    String name = ast->getColumnName(context->getSettingsRef());
+    String name = ast->getColumnName();
    auto syntax_result = TreeRewriter(context).analyze(ast, source_columns);
    ExpressionActionsPtr expr_for_constant_folding = ExpressionAnalyzer(ast, syntax_result, context).getConstActions();

--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@ -265,7 +265,11 @@ static void onExceptionBeforeStart(const String & query_for_logging, ContextPtr

    // Try log query_kind if ast is valid
    if (ast)
+    {
        elem.query_kind = ast->getQueryKindString();
+        if (settings.log_formatted_queries)
+            elem.formatted_query = queryToString(ast);
+    }

    // We don't calculate databases, tables and columns when the query isn't able to start

@ -641,6 +645,8 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(

            elem.current_database = context->getCurrentDatabase();
            elem.query = query_for_logging;
+            if (settings.log_formatted_queries)
+                elem.formatted_query = queryToString(ast);
            elem.normalized_query_hash = normalizedQueryHash<false>(query_for_logging);

            elem.client_info = client_info;
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@ -24,16 +24,6 @@ namespace ErrorCodes
 }

 void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr) const
-{
-    appendColumnNameImpl(ostr, nullptr);
-}
-
-void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
-{
-    appendColumnNameImpl(ostr, &settings);
-}
-
-void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const
 {
    if (name == "view")
        throw Exception("Table function view cannot be used as an expression", ErrorCodes::UNEXPECTED_EXPRESSION);
@ -48,10 +38,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
            if (it != parameters->children.begin())
                writeCString(", ", ostr);

-            if (settings)
-                (*it)->appendColumnName(ostr, *settings);
-            else
-                (*it)->appendColumnName(ostr);
+            (*it)->appendColumnName(ostr);
        }
        writeChar(')', ostr);
    }
@ -64,10 +51,7 @@ void ASTFunction::appendColumnNameImpl(WriteBuffer & ostr, const Settings * sett
            if (it != arguments->children.begin())
                writeCString(", ", ostr);

-            if (settings)
-                (*it)->appendColumnName(ostr, *settings);
-            else
-                (*it)->appendColumnName(ostr);
+            (*it)->appendColumnName(ostr);
        }
    }

--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@ -54,10 +54,6 @@ public:
 protected:
    void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
    void appendColumnNameImpl(WriteBuffer & ostr) const override;
-    void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;
-
-private:
-    void appendColumnNameImpl(WriteBuffer & ostr, const Settings * settings) const;
 };


--- a/src/Parsers/ASTLiteral.cpp
+++ b/src/Parsers/ASTLiteral.cpp
@ -50,16 +50,14 @@ String FieldVisitorToColumnName::operator() (const Tuple & x) const

 }

-void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const
-{
-    if (settings.legacy_column_name_of_tuple_literal)
-        appendColumnNameImplLegacy(ostr);
-    else
-        appendColumnNameImpl(ostr);
-}
-
 void ASTLiteral::appendColumnNameImpl(WriteBuffer & ostr) const
 {
+    if (use_legacy_column_name_of_tuple)
+    {
+        appendColumnNameImplLegacy(ostr);
+        return;
+    }
+
    /// 100 - just arbitrary value.
    constexpr auto min_elements_for_hashing = 100;

--- a/src/Parsers/ASTLiteral.h
+++ b/src/Parsers/ASTLiteral.h
@ -33,6 +33,10 @@ public:
     */
    String unique_column_name;

+    /// For compatibility reasons in distributed queries,
+    /// we may need to use legacy column name for tuple literal.
+    bool use_legacy_column_name_of_tuple = false;
+
    /** Get the text that identifies this element. */
    String getID(char delim) const override { return "Literal" + (delim + applyVisitor(FieldVisitorDump(), value)); }

@ -44,7 +48,6 @@ protected:
    void formatImplWithoutAlias(const FormatSettings & settings, FormatState &, FormatStateStacked) const override;

    void appendColumnNameImpl(WriteBuffer & ostr) const override;
-    void appendColumnNameImpl(WriteBuffer & ostr, const Settings & settings) const override;

 private:
    /// Legacy version of 'appendColumnNameImpl'. It differs only with tuple literals.
--- a/src/Parsers/ASTWithAlias.cpp
+++ b/src/Parsers/ASTWithAlias.cpp
@ -48,14 +48,6 @@ void ASTWithAlias::appendColumnName(WriteBuffer & ostr) const
        appendColumnNameImpl(ostr);
 }

-void ASTWithAlias::appendColumnName(WriteBuffer & ostr, const Settings & settings) const
-{
-    if (prefer_alias_to_column_name && !alias.empty())
-        writeString(alias, ostr);
-    else
-        appendColumnNameImpl(ostr, settings);
-}
-
 void ASTWithAlias::appendColumnNameWithoutAlias(WriteBuffer & ostr) const
 {
    appendColumnNameImpl(ostr);
--- a/src/Parsers/ASTWithAlias.h
+++ b/src/Parsers/ASTWithAlias.h
@ -21,10 +21,8 @@ public:
    using IAST::IAST;

    void appendColumnName(WriteBuffer & ostr) const final;
-    void appendColumnName(WriteBuffer & ostr, const Settings & settings) const final;
    void appendColumnNameWithoutAlias(WriteBuffer & ostr) const final;
    String getAliasOrColumnName() const override { return alias.empty() ? getColumnName() : alias; }
-    String getAliasOrColumnName(const Settings & settings) const override { return alias.empty() ? getColumnName(settings) : alias; }
    String tryGetAlias() const override { return alias; }
    void setAlias(const String & to) override { alias = to; }

@ -35,7 +33,6 @@ public:

 protected:
    virtual void appendColumnNameImpl(WriteBuffer & ostr) const = 0;
-    virtual void appendColumnNameImpl(WriteBuffer & ostr, const Settings &) const { appendColumnNameImpl(ostr); }
 };

 /// helper for setting aliases and chaining result to other functions
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@ -109,14 +109,6 @@ String IAST::getColumnName() const
 }


-String IAST::getColumnName(const Settings & settings) const
-{
-    WriteBufferFromOwnString write_buffer;
-    appendColumnName(write_buffer, settings);
-    return write_buffer.str();
-}
-
-
 String IAST::getColumnNameWithoutAlias() const
 {
    WriteBufferFromOwnString write_buffer;
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@ -42,7 +42,6 @@ public:

    /** Get the canonical name of the column if the element is a column */
    String getColumnName() const;
-    String getColumnName(const Settings & settings) const;

    /** Same as the above but ensure no alias names are used. This is for index analysis */
    String getColumnNameWithoutAlias() const;
@ -52,8 +51,6 @@ public:
        throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
    }

-    virtual void appendColumnName(WriteBuffer & ostr, const Settings &) const { appendColumnName(ostr); }
-
    virtual void appendColumnNameWithoutAlias(WriteBuffer &) const
    {
        throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
@ -61,7 +58,6 @@ public:

    /** Get the alias, if any, or the canonical name of the column, if it is not. */
    virtual String getAliasOrColumnName() const { return getColumnName(); }
-    virtual String getAliasOrColumnName(const Settings & settings) const { return getColumnName(settings); }

    /** Get the alias, if any, or an empty string if it does not exist, or if the element does not support aliases. */
    virtual String tryGetAlias() const { return String(); }
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@ -992,17 +992,14 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
        });
    }

-    Block cur_header = result_projection ? result_projection->getResultColumns()
-                                         : pipe.getHeader();
+    Block cur_header = pipe.getHeader();

-    auto append_actions = [&result_projection, &cur_header](ActionsDAGPtr actions)
+    auto append_actions = [&result_projection](ActionsDAGPtr actions)
    {
        if (!result_projection)
            result_projection = std::move(actions);
        else
            result_projection = ActionsDAG::merge(std::move(*result_projection), std::move(*actions));
-
-        cur_header = result_projection->getResultColumns();
    };

    /// By the way, if a distributed query or query to a Merge table is made, then the `_sample_factor` column can have different values.
@ -1017,6 +1014,9 @@ void ReadFromMergeTree::initializePipeline(QueryPipeline & pipeline, const Build
        append_actions(std::move(adding_column));
    }

+    if (result_projection)
+        cur_header = result_projection->updateHeader(cur_header);
+
    /// Extra columns may be returned (for example, if sampling is used).
    /// Convert pipe to step header structure.
    if (!isCompatibleHeader(cur_header, getOutputStream().header))
--- a/src/Processors/Sources/SourceWithProgress.cpp
+++ b/src/Processors/Sources/SourceWithProgress.cpp
@ -49,7 +49,7 @@ void SourceWithProgress::setProcessListElement(QueryStatus * elem)

 void SourceWithProgress::work()
 {
-    if (!limits.speed_limits.checkTimeLimit(total_stopwatch.elapsed(), limits.timeout_overflow_mode))
+    if (!limits.speed_limits.checkTimeLimit(total_stopwatch, limits.timeout_overflow_mode))
    {
        cancel();
    }
--- a/src/Processors/Transforms/LimitsCheckingTransform.cpp
+++ b/src/Processors/Transforms/LimitsCheckingTransform.cpp
@ -32,7 +32,7 @@ void LimitsCheckingTransform::transform(Chunk & chunk)
        info.started = true;
    }

-    if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch.elapsed(), limits.timeout_overflow_mode))
+    if (!limits.speed_limits.checkTimeLimit(info.total_stopwatch, limits.timeout_overflow_mode))
    {
        stopReading();
        return;
--- a/src/Server/KeeperTCPHandler.cpp
+++ b/src/Server/KeeperTCPHandler.cpp
@ -195,8 +195,8 @@ KeeperTCPHandler::KeeperTCPHandler(IServer & server_, const Poco::Net::StreamSoc
    , log(&Poco::Logger::get("NuKeeperTCPHandler"))
    , global_context(Context::createCopy(server.context()))
    , keeper_dispatcher(global_context->getKeeperStorageDispatcher())
-    , operation_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
-    , session_timeout(0, global_context->getConfigRef().getUInt("test_keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
+    , operation_timeout(0, global_context->getConfigRef().getUInt("keeper_server.operation_timeout_ms", Coordination::DEFAULT_OPERATION_TIMEOUT_MS) * 1000)
+    , session_timeout(0, global_context->getConfigRef().getUInt("keeper_server.session_timeout_ms", Coordination::DEFAULT_SESSION_TIMEOUT_MS) * 1000)
    , poll_wrapper(std::make_unique<SocketInterruptablePollWrapper>(socket_))
    , responses(std::make_unique<ThreadSafeResponseQueue>())
 {
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@ -3213,8 +3213,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc

    if (!partition_ast.value)
    {
-        if (!MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version))
-            throw Exception("Invalid partition format: " + partition_ast.id, ErrorCodes::INVALID_PARTITION_VALUE);
+        MergeTreePartInfo::validatePartitionID(partition_ast.id, format_version);
        return partition_ast.id;
    }

@ -3225,10 +3224,7 @@ String MergeTreeData::getPartitionIDFromQuery(const ASTPtr & ast, ContextPtr loc
        if (partition_lit && partition_lit->value.getType() == Field::Types::String)
        {
            String partition_id = partition_lit->value.get<String>();
-            if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
-                throw Exception(
-                    "Invalid partition format: " + partition_id + ". Partition should consist of 6 digits: YYYYMM",
-                    ErrorCodes::INVALID_PARTITION_VALUE);
+            MergeTreePartInfo::validatePartitionID(partition_id, format_version);
            return partition_id;
        }
    }
--- a/src/Storages/MergeTree/MergeTreePartInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp
@ -9,6 +9,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int BAD_DATA_PART_NAME;
+    extern const int INVALID_PARTITION_VALUE;
 }


@ -21,38 +22,25 @@ MergeTreePartInfo MergeTreePartInfo::fromPartName(const String & part_name, Merg
 }


-bool MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
+void MergeTreePartInfo::validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version)
 {
    if (partition_id.empty())
-        return false;
-
-    ReadBufferFromString in(partition_id);
+        throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Partition id is empty");

    if (format_version < MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
    {
-        UInt32 min_yyyymmdd = 0;
-        UInt32 max_yyyymmdd = 0;
-        if (!tryReadIntText(min_yyyymmdd, in)
-            || !checkChar('_', in)
-            || !tryReadIntText(max_yyyymmdd, in)
-            || !checkChar('_', in))
-        {
-            return false;
-        }
+        if (partition_id.size() != 6 || !std::all_of(partition_id.begin(), partition_id.end(), isNumericASCII))
+            throw Exception(ErrorCodes::INVALID_PARTITION_VALUE,
+                "Invalid partition format: {}. Partition should consist of 6 digits: YYYYMM",
+                partition_id);
    }
    else
    {
-        while (!in.eof())
-        {
-            char c;
-            readChar(c, in);
-
-            if (c == '_')
-                break;
-        }
+        auto is_valid_char = [](char c) { return c == '-' || isAlphaNumericASCII(c); };
+        if (!std::all_of(partition_id.begin(), partition_id.end(), is_valid_char))
+            throw Exception(ErrorCodes::INVALID_PARTITION_VALUE, "Invalid partition format: {}", partition_id);
    }

-    return in.eof();
 }

 bool MergeTreePartInfo::tryParsePartName(const String & part_name, MergeTreePartInfo * part_info, MergeTreeDataFormatVersion format_version)
--- a/src/Storages/MergeTree/MergeTreePartInfo.h
+++ b/src/Storages/MergeTree/MergeTreePartInfo.h
@ -88,7 +88,7 @@ struct MergeTreePartInfo
    }

    /// Simple sanity check for partition ID. Checking that it's not too long or too short, doesn't contain a lot of '_'.
-    static bool validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);
+    static void validatePartitionID(const String & partition_id, MergeTreeDataFormatVersion format_version);

    static MergeTreePartInfo fromPartName(const String & part_name, MergeTreeDataFormatVersion format_version);  // -V1071

--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@ -124,7 +124,7 @@ struct Settings;
    M(UInt64, concurrent_part_removal_threshold, 100, "Activate concurrent part removal (see 'max_part_removal_threads') only if the number of inactive data parts is at least this.", 0) \
    M(String, storage_policy, "default", "Name of storage disk policy", 0) \
    M(Bool, allow_nullable_key, false, "Allow Nullable types as primary keys.", 0) \
-    M(Bool, allow_remote_fs_zero_copy_replication, false, "Allow Zero-copy replication over remote fs", 0) \
+    M(Bool, allow_remote_fs_zero_copy_replication, true, "Allow Zero-copy replication over remote fs", 0) \
    M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
    M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
    M(Int64, max_partitions_to_read, -1, "Limit the max number of partitions that can be accessed in one query. <= 0 means unlimited. This setting is the default that can be overridden by the query-level setting with the same name.", 0) \
--- a/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeMergeStrategyPicker.cpp
@ -144,9 +144,14 @@ void ReplicatedMergeTreeMergeStrategyPicker::refreshState()

    if (current_replica_index_tmp < 0 || active_replicas_tmp.size() < 2)
    {
-        LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use execute_merges_on_single_replica_time_threshold!");
-        /// we can reset the settings w/o lock (it's atomic)
-        execute_merges_on_single_replica_time_threshold = 0;
+        if (execute_merges_on_single_replica_time_threshold > 0)
+        {
+            LOG_WARNING(storage.log, "Can't find current replica in the active replicas list, or too few active replicas to use 'execute_merges_on_single_replica_time_threshold'");
+            /// we can reset the settings w/o lock (it's atomic)
+            execute_merges_on_single_replica_time_threshold = 0;
+        }
+        /// default value of remote_fs_execute_merges_on_single_replica_time_threshold is not 0
+        /// so we write no warning in log here
        remote_fs_execute_merges_on_single_replica_time_threshold = 0;
        return;
    }
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@ -284,86 +284,6 @@ void replaceConstantExpressions(
    visitor.visit(node);
 }

-/// This is the implementation of optimize_distributed_group_by_sharding_key.
-/// It returns up to which stage the query can be processed on a shard, which
-/// is one of the following:
-/// - QueryProcessingStage::Complete
-/// - QueryProcessingStage::WithMergeableStateAfterAggregation
-/// - QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit
-/// - none (in this case regular WithMergeableState should be used)
-std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, bool extremes, const Names & sharding_key_columns)
-{
-    const auto & select = query_info.query->as<ASTSelectQuery &>();
-
-    auto sharding_block_has = [&](const auto & exprs) -> bool
-    {
-        std::unordered_set<std::string> expr_columns;
-        for (auto & expr : exprs)
-        {
-            auto id = expr->template as<ASTIdentifier>();
-            if (!id)
-                continue;
-            expr_columns.emplace(id->name());
-        }
-
-        for (const auto & column : sharding_key_columns)
-        {
-            if (!expr_columns.contains(column))
-                return false;
-        }
-
-        return true;
-    };
-
-    // GROUP BY qualifiers
-    // - TODO: WITH TOTALS can be implemented
-    // - TODO: WITH ROLLUP can be implemented (I guess)
-    if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
-        return {};
-
-    // Window functions are not supported.
-    if (query_info.has_window)
-        return {};
-
-    // TODO: extremes support can be implemented
-    if (extremes)
-        return {};
-
-    // DISTINCT
-    if (select.distinct)
-    {
-        if (!sharding_block_has(select.select()->children))
-            return {};
-    }
-
-    // GROUP BY
-    const ASTPtr group_by = select.groupBy();
-    if (!group_by)
-    {
-        if (!select.distinct)
-            return {};
-    }
-    else
-    {
-        if (!sharding_block_has(group_by->children))
-            return {};
-    }
-
-    // ORDER BY
-    const ASTPtr order_by = select.orderBy();
-    if (order_by)
-        return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
-
-    // LIMIT BY
-    // LIMIT
-    // OFFSET
-    if (select.limitBy() || select.limitLength() || select.limitOffset())
-        return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
-
-    // Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
-    return QueryProcessingStage::Complete;
-}
-
 size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & cluster)
 {
    size_t num_local_shards = cluster->getLocalShardCount();
@ -527,13 +447,12 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
        {
            /// NOTE: distributed_group_by_no_merge=1 does not respect distributed_push_down_limit
            /// (since in this case queries processed separately and the initiator is just a proxy in this case).
+            if (to_stage != QueryProcessingStage::Complete)
+                throw Exception("Queries with distributed_group_by_no_merge=1 should be processed to Complete stage", ErrorCodes::LOGICAL_ERROR);
            return QueryProcessingStage::Complete;
        }
    }

-    if (settings.distributed_push_down_limit)
-        return QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
-
    /// Nested distributed query cannot return Complete stage,
    /// since the parent query need to aggregate the results after.
    if (to_stage == QueryProcessingStage::WithMergeableState)
@ -542,24 +461,107 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(
    /// If there is only one node, the query can be fully processed by the
    /// shard, initiator will work as a proxy only.
    if (getClusterQueriedNodes(settings, cluster) == 1)
-        return QueryProcessingStage::Complete;
-
-    if (settings.optimize_skip_unused_shards &&
-        settings.optimize_distributed_group_by_sharding_key &&
-        has_sharding_key &&
-        (settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic))
    {
-        auto stage = getOptimizedQueryProcessingStage(query_info, settings.extremes, sharding_key_expr->getRequiredColumns());
-        if (stage)
-        {
-            LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage));
-            return *stage;
-        }
+        /// In case the query was processed to
+        /// WithMergeableStateAfterAggregation/WithMergeableStateAfterAggregationAndLimit
+        /// (which are greater the Complete stage)
+        /// we cannot return Complete (will break aliases and similar),
+        /// relevant for Distributed over Distributed
+        return std::max(to_stage, QueryProcessingStage::Complete);
+    }
+
+    auto optimized_stage = getOptimizedQueryProcessingStage(query_info, settings);
+    if (optimized_stage)
+    {
+        if (*optimized_stage == QueryProcessingStage::Complete)
+            return std::min(to_stage, *optimized_stage);
+        return *optimized_stage;
    }

    return QueryProcessingStage::WithMergeableState;
 }

+std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const
+{
+    bool optimize_sharding_key_aggregation =
+        settings.optimize_skip_unused_shards &&
+        settings.optimize_distributed_group_by_sharding_key &&
+        has_sharding_key &&
+        (settings.allow_nondeterministic_optimize_skip_unused_shards || sharding_key_is_deterministic);
+
+    QueryProcessingStage::Enum default_stage = QueryProcessingStage::WithMergeableStateAfterAggregation;
+    if (settings.distributed_push_down_limit)
+        default_stage = QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
+
+    const auto & select = query_info.query->as<ASTSelectQuery &>();
+
+    auto expr_contains_sharding_key = [&](const auto & exprs) -> bool
+    {
+        std::unordered_set<std::string> expr_columns;
+        for (auto & expr : exprs)
+        {
+            auto id = expr->template as<ASTIdentifier>();
+            if (!id)
+                continue;
+            expr_columns.emplace(id->name());
+        }
+
+        for (const auto & column : sharding_key_expr->getRequiredColumns())
+        {
+            if (!expr_columns.contains(column))
+                return false;
+        }
+
+        return true;
+    };
+
+    // GROUP BY qualifiers
+    // - TODO: WITH TOTALS can be implemented
+    // - TODO: WITH ROLLUP can be implemented (I guess)
+    if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
+        return {};
+    // Window functions are not supported.
+    if (query_info.has_window)
+        return {};
+    // TODO: extremes support can be implemented
+    if (settings.extremes)
+        return {};
+
+    // DISTINCT
+    if (select.distinct)
+    {
+        if (!optimize_sharding_key_aggregation || !expr_contains_sharding_key(select.select()->children))
+            return {};
+    }
+
+    // GROUP BY
+    const ASTPtr group_by = select.groupBy();
+    if (!query_info.syntax_analyzer_result->aggregates.empty() || group_by)
+    {
+        if (!optimize_sharding_key_aggregation || !group_by || !expr_contains_sharding_key(group_by->children))
+            return {};
+    }
+
+    // LIMIT BY
+    if (const ASTPtr limit_by = select.limitBy())
+    {
+        if (!optimize_sharding_key_aggregation || !expr_contains_sharding_key(limit_by->children))
+            return {};
+    }
+
+    // ORDER BY
+    if (const ASTPtr order_by = select.orderBy())
+        return default_stage;
+
+    // LIMIT
+    // OFFSET
+    if (select.limitLength() || select.limitOffset())
+        return default_stage;
+
+    // Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
+    return QueryProcessingStage::Complete;
+}
+
 Pipe StorageDistributed::read(
    const Names & column_names,
    const StorageMetadataPtr & metadata_snapshot,
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@ -177,6 +177,24 @@ private:
    ClusterPtr
    skipUnusedShards(ClusterPtr cluster, const ASTPtr & query_ptr, const StorageMetadataPtr & metadata_snapshot, ContextPtr context) const;

+    /// This method returns optimal query processing stage.
+    ///
+    /// Here is the list of stages (from the less optimal to more optimal):
+    /// - WithMergeableState
+    /// - WithMergeableStateAfterAggregation
+    /// - WithMergeableStateAfterAggregationAndLimit
+    /// - Complete
+    ///
+    /// Some simple queries w/o GROUP BY/DISTINCT can use more optimal stage.
+    ///
+    /// Also in case of optimize_distributed_group_by_sharding_key=1 the queries
+    /// with GROUP BY/DISTINCT sharding_key can also use more optimal stage.
+    /// (see also optimize_skip_unused_shards/allow_nondeterministic_optimize_skip_unused_shards)
+    ///
+    /// @return QueryProcessingStage or empty std::optoinal
+    /// (in this case regular WithMergeableState should be used)
+    std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const SelectQueryInfo & query_info, const Settings & settings) const;
+
    size_t getRandomShardIndex(const Cluster::ShardsInfo & shards);

    const DistributedSettings & getDistributedSettingsRef() const { return distributed_settings; }
--- a/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
+++ b/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
@ -50,6 +50,7 @@ const char * auto_config_build[]
    "USE_LDAP", "@USE_LDAP@",
    "TZDATA_VERSION", "@TZDATA_VERSION@",
    "USE_KRB5", "@USE_KRB5@",
+    "USE_BZIP2", "@USE_BZIP2@",

    nullptr, nullptr
 };
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
--- a/tests/integration/runner
+++ b/tests/integration/runner
@ -277,7 +277,7 @@ if __name__ == "__main__":
        --volume={base_cfg}:/clickhouse-config --volume={cases_dir}:/ClickHouse/tests/integration \
        --volume={src_dir}/Server/grpc_protos:/ClickHouse/src/Server/grpc_protos \
        {dockerd_internal_volume} -e DOCKER_CLIENT_TIMEOUT=300 -e COMPOSE_HTTP_TIMEOUT=600 \
-        {env_tags} {env_cleanup} -e PYTEST_OPTS='{parallel} {opts} {tests_list}' {img} {command}".format(
+        {env_tags} {env_cleanup} -e PYTEST_OPTS='{parallel} {opts} {tests_list} -vvv' {img} {command}".format(
        net=net,
        tty=tty,
        bin=args.binary,
--- a/tests/integration/test_library_bridge/test.py
+++ b/tests/integration/test_library_bridge/test.py
@ -100,6 +100,7 @@ def test_load_ids(ch_cluster):
    if instance.is_built_with_memory_sanitizer():
        pytest.skip("Memory Sanitizer cannot work with third-party shared libraries")

+    instance.query('DROP DICTIONARY IF EXISTS lib_dict_c')
    instance.query('''
        CREATE DICTIONARY lib_dict_c (key UInt64, value1 UInt64, value2 UInt64, value3 UInt64)
        PRIMARY KEY key SOURCE(library(PATH '/etc/clickhouse-server/config.d/dictionaries_lib/dict_lib.so'))
@ -263,6 +264,7 @@ def test_bridge_dies_with_parent(ch_cluster):
    assert clickhouse_pid is None
    assert bridge_pid is None
    instance.start_clickhouse(20)
+    instance.query('DROP DICTIONARY lib_dict_c')


 if __name__ == '__main__':
--- a/tests/integration/test_merge_tree_s3_failover/test.py
+++ b/tests/integration/test_merge_tree_s3_failover/test.py
@ -68,17 +68,22 @@ def drop_table(cluster):
 # S3 request will be failed for an appropriate part file write.
 FILES_PER_PART_BASE = 5  # partition.dat, default_compression_codec.txt, count.txt, columns.txt, checksums.txt
 FILES_PER_PART_WIDE = FILES_PER_PART_BASE + 1 + 1 + 3 * 2  # Primary index, MinMax, Mark and data file for column(s)
+
+# In debug build there are additional requests (from MergeTreeDataPartWriterWide.cpp:554 due to additional validation).
+FILES_PER_PART_WIDE_DEBUG = 2  # Additional requests to S3 in debug build
+
 FILES_PER_PART_COMPACT = FILES_PER_PART_BASE + 1 + 1 + 2
+FILES_PER_PART_COMPACT_DEBUG = 0


@pytest.mark.parametrize(
-    "min_bytes_for_wide_part,request_count",
+    "min_bytes_for_wide_part,request_count,debug_request_count",
    [
-        (0, FILES_PER_PART_WIDE),
-        (1024 * 1024, FILES_PER_PART_COMPACT)
+        (0, FILES_PER_PART_WIDE, FILES_PER_PART_WIDE_DEBUG),
+        (1024 * 1024, FILES_PER_PART_COMPACT, FILES_PER_PART_COMPACT_DEBUG)
    ]
 )
-def test_write_failover(cluster, min_bytes_for_wide_part, request_count):
+def test_write_failover(cluster, min_bytes_for_wide_part, request_count, debug_request_count):
    node = cluster.instances["node"]

    node.query(
@ -95,17 +100,24 @@ def test_write_failover(cluster, min_bytes_for_wide_part, request_count):
        .format(min_bytes_for_wide_part)
    )

-    for request in range(request_count + 1):
+    is_debug_mode = False
+    success_count = 0
+
+    for request in range(request_count + debug_request_count + 1):
        # Fail N-th request to S3.
        fail_request(cluster, request + 1)

        data = "('2020-03-01',0,'data'),('2020-03-01',1,'data')"
-        positive = request == request_count
+        positive = request >= (request_count + debug_request_count if is_debug_mode else request_count)
        try:
            node.query("INSERT INTO s3_failover_test VALUES {}".format(data))
-
            assert positive, "Insert query should be failed, request {}".format(request)
+            success_count += 1
        except QueryRuntimeException as e:
+            if not is_debug_mode and positive:
+                is_debug_mode = True
+                positive = False
+
            assert not positive, "Insert query shouldn't be failed, request {}".format(request)
            assert str(e).find("Expected Error") != -1, "Unexpected error {}".format(str(e))

@ -114,7 +126,9 @@ def test_write_failover(cluster, min_bytes_for_wide_part, request_count):
            fail_request(cluster, 0)

            assert node.query("CHECK TABLE s3_failover_test") == '1\n'
-            assert node.query("SELECT * FROM s3_failover_test FORMAT Values") == data
+            assert success_count > 1 or node.query("SELECT * FROM s3_failover_test FORMAT Values") == data
+
+    assert success_count == (1 if is_debug_mode else debug_request_count + 1), "Insert query should be successful at least once"


 # Check that second data part move is ended successfully if first attempt was failed.
--- a/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_hdfs_zero_copy/configs/config.d/storage_conf.xml
@ -59,7 +59,6 @@
    <merge_tree>
        <min_bytes_for_wide_part>1024000</min_bytes_for_wide_part>
        <old_parts_lifetime>1</old_parts_lifetime>
-        <allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
    </merge_tree>

    <remote_servers>
--- a/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
+++ b/tests/integration/test_replicated_merge_tree_s3_zero_copy/configs/config.d/storage_conf.xml
@ -21,7 +21,6 @@

    <merge_tree>
        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
-        <allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
    </merge_tree>

    <remote_servers>
--- a/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml
+++ b/tests/integration/test_s3_zero_copy_replication/configs/config.d/s3.xml
@ -66,7 +66,6 @@
    <merge_tree>
        <min_bytes_for_wide_part>1024</min_bytes_for_wide_part>
        <old_parts_lifetime>1</old_parts_lifetime>
-        <allow_remote_fs_zero_copy_replication>1</allow_remote_fs_zero_copy_replication>
    </merge_tree>

    <remote_servers>
--- a/tests/integration/test_storage_kafka/test.py
+++ b/tests/integration/test_storage_kafka/test.py
@ -2,7 +2,6 @@ import json
 import os.path as p
 import random
 import socket
-import subprocess
 import threading
 import time
 import logging
@ -181,32 +180,6 @@ def avro_confluent_message(schema_registry_client, value):
    })
    return serializer.encode_record_with_schema('test_subject', schema, value)

-# Since everything is async and shaky when receiving messages from Kafka,
-# we may want to try and check results multiple times in a loop.
-def kafka_check_result(result, check=False, ref_file='test_kafka_json.reference'):
-    fpath = p.join(p.dirname(__file__), ref_file)
-    with open(fpath) as reference:
-        if check:
-            assert TSV(result) == TSV(reference)
-        else:
-            return TSV(result) == TSV(reference)
-
-def describe_consumer_group(kafka_cluster, name):
-    admin_client = KafkaAdminClient(bootstrap_servers="localhost:{}".format(kafka_cluster.kafka_port))
-    consumer_groups = admin_client.describe_consumer_groups([name])
-    res = []
-    for member in consumer_groups[0].members:
-        member_info = {}
-        member_info['member_id'] = member.member_id
-        member_info['client_id'] = member.client_id
-        member_info['client_host'] = member.client_host
-        member_topics_assignment = []
-        for (topic, partitions) in member.member_assignment.assignment:
-            member_topics_assignment.append({'topic': topic, 'partitions': partitions})
-        member_info['assignment'] = member_topics_assignment
-        res.append(member_info)
-    return res
-
 # Fixtures

@pytest.fixture(scope="module")
@ -262,7 +235,7 @@ kafka_topic_old	old

    kafka_check_result(result, True)

-    members = describe_consumer_group('old')
+    members = describe_consumer_group(kafka_cluster, 'old')
    assert members[0]['client_id'] == 'ClickHouse-instance-test-kafka'
    # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:9092 --describe --members --group old --verbose"))

@ -302,7 +275,7 @@ def test_kafka_settings_new_syntax(kafka_cluster):

    kafka_check_result(result, True)

-    members = describe_consumer_group('new')
+    members = describe_consumer_group(kafka_cluster, 'new')
    assert members[0]['client_id'] == 'instance test 1234'


@ -734,82 +707,6 @@ def kafka_setup_teardown():


 # Tests
-
-def test_kafka_settings_old_syntax(kafka_cluster):
-    assert TSV(instance.query("SELECT * FROM system.macros WHERE macro like 'kafka%' ORDER BY macro",
-                              ignore_error=True)) == TSV('''kafka_broker	kafka1
-kafka_client_id	instance
-kafka_format_json_each_row	JSONEachRow
-kafka_group_name_new	new
-kafka_group_name_old	old
-kafka_topic_new	new
-kafka_topic_old	old
-''')
-
-    instance.query('''
-        CREATE TABLE test.kafka (key UInt64, value UInt64)
-            ENGINE = Kafka('{kafka_broker}:19092', '{kafka_topic_old}', '{kafka_group_name_old}', '{kafka_format_json_each_row}', '\\n');
-        ''')
-
-    # Don't insert malformed messages since old settings syntax
-    # doesn't support skipping of broken messages.
-    messages = []
-    for i in range(50):
-        messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce(kafka_cluster, 'old', messages)
-
-    result = ''
-    while True:
-        result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
-        if kafka_check_result(result):
-            break
-
-    kafka_check_result(result, True)
-
-    members = describe_consumer_group(kafka_cluster, 'old')
-    assert members[0]['client_id'] == 'ClickHouse-instance-test-kafka'
-    # text_desc = kafka_cluster.exec_in_container(kafka_cluster.get_container_id('kafka1'),"kafka-consumer-groups --bootstrap-server localhost:{} --describe --members --group old --verbose".format(cluster.kafka_port)))
-
-
-def test_kafka_settings_new_syntax(kafka_cluster):
-    instance.query('''
-        CREATE TABLE test.kafka (key UInt64, value UInt64)
-            ENGINE = Kafka
-            SETTINGS kafka_broker_list = '{kafka_broker}:19092',
-                     kafka_topic_list = '{kafka_topic_new}',
-                     kafka_group_name = '{kafka_group_name_new}',
-                     kafka_format = '{kafka_format_json_each_row}',
-                     kafka_row_delimiter = '\\n',
-                     kafka_client_id = '{kafka_client_id} test 1234',
-                     kafka_skip_broken_messages = 1;
-        ''')
-
-    messages = []
-    for i in range(25):
-        messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce(kafka_cluster, 'new', messages)
-
-    # Insert couple of malformed messages.
-    kafka_produce(kafka_cluster, 'new', ['}{very_broken_message,'])
-    kafka_produce(kafka_cluster, 'new', ['}another{very_broken_message,'])
-
-    messages = []
-    for i in range(25, 50):
-        messages.append(json.dumps({'key': i, 'value': i}))
-    kafka_produce(kafka_cluster, 'new', messages)
-
-    result = ''
-    while True:
-        result += instance.query('SELECT * FROM test.kafka', ignore_error=True)
-        if kafka_check_result(result):
-            break
-
-    kafka_check_result(result, True)
-
-    members = describe_consumer_group(kafka_cluster, 'new')
-    assert members[0]['client_id'] == 'instance test 1234'
-
-
 def test_kafka_issue11308(kafka_cluster):
    # Check that matview does respect Kafka SETTINGS
    kafka_produce(kafka_cluster, 'issue11308', ['{"t": 123, "e": {"x": "woof"} }', '{"t": 123, "e": {"x": "woof"} }',
@ -1585,18 +1482,21 @@ def test_kafka_virtual_columns_with_materialized_view(kafka_cluster):
        messages.append(json.dumps({'key': i, 'value': i}))
    kafka_produce(kafka_cluster, 'virt2', messages, 0)

-    while True:
-        result = instance.query('SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view')
-        if kafka_check_result(result, False, 'test_kafka_virtual2.reference'):
-            break
+    sql = 'SELECT kafka_key, key, topic, value, offset, partition, timestamp FROM test.view ORDER BY kafka_key'
+    result = instance.query(sql)
+    iterations = 0
+    while not kafka_check_result(result, False, 'test_kafka_virtual2.reference') and iterations < 10:
+        time.sleep(3)
+        iterations += 1
+        result = instance.query(sql)
+
+    kafka_check_result(result, True, 'test_kafka_virtual2.reference')

    instance.query('''
        DROP TABLE test.consumer;
        DROP TABLE test.view;
    ''')

-    kafka_check_result(result, True, 'test_kafka_virtual2.reference')
-

 def test_kafka_insert(kafka_cluster):
    instance.query('''
--- a/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper_long.reference
@ -1,22 +1,22 @@
 d	Date					
 k	UInt64					
 i32	Int32					
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	10	42
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	DateTime(\'UTC\')					
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	DateTime(\'UTC\')					
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\')\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	9	41	1992-01-01 08:00:00
 2015-01-01	10	42	1970-01-01 00:00:00
 d	Date					
@ -25,14 +25,14 @@ i32	Int32
 dt	DateTime(\'UTC\')					
 n.ui8	Array(UInt8)					
 n.s	Array(String)					
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	DateTime(\'UTC\')					
 n.ui8	Array(UInt8)					
 n.s	Array(String)					
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	8	40	2012-12-12 12:12:12	[1,2,3]	['12','13','14']
 2015-01-01	9	41	1992-01-01 08:00:00	[]	[]
 2015-01-01	10	42	1970-01-01 00:00:00	[]	[]
@ -43,7 +43,7 @@ dt	DateTime(\'UTC\')
 n.ui8	Array(UInt8)					
 n.s	Array(String)					
 n.d	Array(Date)					
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
@ -51,7 +51,7 @@ dt	DateTime(\'UTC\')
 n.ui8	Array(UInt8)					
 n.s	Array(String)					
 n.d	Array(Date)					
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	7	39	2014-07-14 13:26:50	[10,20,30]	['120','130','140']	['2000-01-01','2000-01-01','2000-01-03']
 2015-01-01	8	40	2012-12-12 12:12:12	[1,2,3]	['12','13','14']	['1970-01-01','1970-01-01','1970-01-01']
 2015-01-01	9	41	1992-01-01 08:00:00	[]	[]	[]
@ -64,7 +64,7 @@ n.ui8	Array(UInt8)
 n.s	Array(String)					
 n.d	Array(Date)					
 s	String	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date),\n    `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date),\n    `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
@ -73,7 +73,7 @@ n.ui8	Array(UInt8)
 n.s	Array(String)					
 n.d	Array(Date)					
 s	String	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date),\n    `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `n.d` Array(Date),\n    `s` String DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	[10,20,30]	['asd','qwe','qwe']	['2000-01-01','2000-01-01','2000-01-03']	100500
 2015-01-01	7	39	2014-07-14 13:26:50	[10,20,30]	['120','130','140']	['2000-01-01','2000-01-01','2000-01-03']	0
 2015-01-01	8	40	2012-12-12 12:12:12	[1,2,3]	['12','13','14']	['1970-01-01','1970-01-01','1970-01-01']	0
@ -86,7 +86,7 @@ dt	DateTime(\'UTC\')
 n.ui8	Array(UInt8)					
 n.s	Array(String)					
 s	Int64	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
@ -94,7 +94,7 @@ dt	DateTime(\'UTC\')
 n.ui8	Array(UInt8)					
 n.s	Array(String)					
 s	Int64	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` Int64 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	[10,20,30]	['asd','qwe','qwe']	100500
 2015-01-01	7	39	2014-07-14 13:26:50	[10,20,30]	['120','130','140']	0
 2015-01-01	8	40	2012-12-12 12:12:12	[1,2,3]	['12','13','14']	0
@ -108,7 +108,7 @@ n.ui8	Array(UInt8)
 n.s	Array(String)					
 s	UInt32	DEFAULT	\'0\'			
 n.d	Array(Date)					
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\',\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\',\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
@ -117,7 +117,7 @@ n.ui8	Array(UInt8)
 n.s	Array(String)					
 s	UInt32	DEFAULT	\'0\'			
 n.d	Array(Date)					
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\',\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.ui8` Array(UInt8),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\',\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	[10,20,30]	['asd','qwe','qwe']	100500	['1970-01-01','1970-01-01','1970-01-01']
 2015-01-01	7	39	2014-07-14 13:26:50	[10,20,30]	['120','130','140']	0	['1970-01-01','1970-01-01','1970-01-01']
 2015-01-01	8	40	2012-12-12 12:12:12	[1,2,3]	['12','13','14']	0	['1970-01-01','1970-01-01','1970-01-01']
@ -129,14 +129,14 @@ i32	Int32
 dt	DateTime(\'UTC\')					
 n.s	Array(String)					
 s	UInt32	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	DateTime(\'UTC\')					
 n.s	Array(String)					
 s	UInt32	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `n.s` Array(String),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	['asd','qwe','qwe']	100500
 2015-01-01	7	39	2014-07-14 13:26:50	['120','130','140']	0
 2015-01-01	8	40	2012-12-12 12:12:12	['12','13','14']	0
@ -147,13 +147,13 @@ k	UInt64
 i32	Int32					
 dt	DateTime(\'UTC\')					
 s	UInt32	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	DateTime(\'UTC\')					
 s	UInt32	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	100500
 2015-01-01	7	39	2014-07-14 13:26:50	0
 2015-01-01	8	40	2012-12-12 12:12:12	0
@ -166,7 +166,7 @@ dt	DateTime(\'UTC\')
 s	UInt32	DEFAULT	\'0\'			
 n.s	Array(String)					
 n.d	Array(Date)					
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\',\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\',\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
@ -174,7 +174,7 @@ dt	DateTime(\'UTC\')
 s	UInt32	DEFAULT	\'0\'			
 n.s	Array(String)					
 n.d	Array(Date)					
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\',\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\',\n    `n.s` Array(String),\n    `n.d` Array(Date)\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	100500	[]	[]
 2015-01-01	7	39	2014-07-14 13:26:50	0	[]	[]
 2015-01-01	8	40	2012-12-12 12:12:12	0	[]	[]
@ -185,13 +185,13 @@ k	UInt64
 i32	Int32					
 dt	DateTime(\'UTC\')					
 s	UInt32	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	DateTime(\'UTC\')					
 s	UInt32	DEFAULT	\'0\'			
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` DateTime(\'UTC\'),\n    `s` UInt32 DEFAULT \'0\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15 13:26:50	100500
 2015-01-01	7	39	2014-07-14 13:26:50	0
 2015-01-01	8	40	2012-12-12 12:12:12	0
@ -202,13 +202,13 @@ k	UInt64
 i32	Int32					
 dt	Date					
 s	DateTime(\'UTC\')	DEFAULT	\'1970-01-01 00:00:00\'			
-CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` Date,\n    `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r1\', d, k, 8192)
+CREATE TABLE default.replicated_alter1\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` Date,\n    `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r1\', d, k, 8192)
 d	Date					
 k	UInt64					
 i32	Int32					
 dt	Date					
 s	DateTime(\'UTC\')	DEFAULT	\'1970-01-01 00:00:00\'			
-CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` Date,\n    `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/test_00062/alter\', \'r2\', d, k, 8192)
+CREATE TABLE default.replicated_alter2\n(\n    `d` Date,\n    `k` UInt64,\n    `i32` Int32,\n    `dt` Date,\n    `s` DateTime(\'UTC\') DEFAULT \'1970-01-01 00:00:00\'\n)\nENGINE = ReplicatedMergeTree(\'/clickhouse/tables/default/test_00062/alter\', \'r2\', d, k, 8192)
 2015-01-01	6	38	2014-07-15	1970-01-02 03:55:00
 2015-01-01	7	39	2014-07-14	1970-01-01 00:00:00
 2015-01-01	8	40	2012-12-12	1970-01-01 00:00:00
--- a/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00062_replicated_merge_tree_alter_zookeeper_long.sql
@ -3,8 +3,8 @@ DROP TABLE IF EXISTS replicated_alter2;

 SET replication_alter_partitions_sync = 2;

-CREATE TABLE replicated_alter1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00062/alter', 'r1', d, k, 8192);
-CREATE TABLE replicated_alter2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00062/alter', 'r2', d, k, 8192);
+CREATE TABLE replicated_alter1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00062/alter', 'r1', d, k, 8192);
+CREATE TABLE replicated_alter2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00062/alter', 'r2', d, k, 8192);

 INSERT INTO replicated_alter1 VALUES ('2015-01-01', 10, 42);

--- a/tests/queries/0_stateless/00083_create_merge_tree_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00083_create_merge_tree_zookeeper_long.reference
--- a/tests/queries/0_stateless/00083_create_merge_tree_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00083_create_merge_tree_zookeeper_long.sql
@ -56,7 +56,7 @@ CREATE TABLE aggregating_merge_tree_with_sampling
 	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = AggregatingMergeTree(d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);

 CREATE TABLE replicated_merge_tree
-	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00083/01/replicated_merge_tree/', 'r1', d, (a, b), 111);
+	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00083/01/replicated_merge_tree/', 'r1', d, (a, b), 111);
 CREATE TABLE replicated_collapsing_merge_tree
 	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/test_00083/01/replicated_collapsing_merge_tree/', 'r1', d, (a, b), 111, y);
 CREATE TABLE replicated_versioned_collapsing_merge_tree
@ -69,7 +69,7 @@ CREATE TABLE replicated_aggregating_merge_tree
 	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/test_00083/01/replicated_aggregating_merge_tree/', 'r1', d, (a, b), 111);

 CREATE TABLE replicated_merge_tree_with_sampling
-	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00083/01/replicated_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
+	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00083/01/replicated_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111);
 CREATE TABLE replicated_collapsing_merge_tree_with_sampling
 	(d Date, a String, b UInt8, x String, y Int8, z UInt32) ENGINE = ReplicatedCollapsingMergeTree('/clickhouse/tables/test_00083/01/replicated_collapsing_merge_tree_with_sampling/', 'r1', d, sipHash64(a) + b, (a, sipHash64(a) + b), 111, y);
 CREATE TABLE replicated_versioned_collapsing_merge_tree_with_sampling
--- a/tests/queries/0_stateless/00121_drop_column_zookeeper.sql
+++ b/tests/queries/0_stateless/00121_drop_column_zookeeper.sql
@ -1,12 +1,12 @@
 DROP TABLE IF EXISTS alter_00121;
-CREATE TABLE alter_00121 (d Date, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter_00121/t1', 'r1', d, (d), 8192);
+CREATE TABLE alter_00121 (d Date, x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/alter_00121/t1', 'r1', d, (d), 8192);

 INSERT INTO alter_00121 VALUES ('2014-01-01', 1);
 ALTER TABLE alter_00121 DROP COLUMN x;

 DROP TABLE alter_00121;

-CREATE TABLE alter_00121 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/alter_00121/t2', 'r1', d, (d), 8192);
+CREATE TABLE alter_00121 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/alter_00121/t2', 'r1', d, (d), 8192);

 INSERT INTO alter_00121 VALUES ('2014-01-01');
 SELECT * FROM alter_00121 ORDER BY d;
--- a/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference
+++ b/tests/queries/0_stateless/00184_shard_distributed_group_by_no_merge.reference
@ -25,6 +25,8 @@ ORDER BY LIMIT
 LIMIT BY
 0
 1
+0
+1
 LIMIT BY LIMIT
 0
 GROUP BY ORDER BY
--- a/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.reference
--- a/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00215_primary_key_order_zookeeper_long.sql
@ -1,5 +1,5 @@
 DROP TABLE IF EXISTS primary_key;
-CREATE TABLE primary_key (d Date DEFAULT today(), x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00215/primary_key', 'r1', d, -x, 1);
+CREATE TABLE primary_key (d Date DEFAULT today(), x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00215/primary_key', 'r1', d, -x, 1);

 INSERT INTO primary_key (x) VALUES (1), (2), (3);
 INSERT INTO primary_key (x) VALUES (1), (3), (2);
--- a/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.reference
+++ b/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.reference
--- a/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.sql
+++ b/tests/queries/0_stateless/00226_zookeeper_deduplication_and_unexpected_parts_long.sql
@ -1,5 +1,5 @@
 DROP TABLE IF EXISTS deduplication;
-CREATE TABLE deduplication (d Date DEFAULT '2015-01-01', x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00226/deduplication', 'r1', d, x, 1);
+CREATE TABLE deduplication (d Date DEFAULT '2015-01-01', x Int8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00226/deduplication', 'r1', d, x, 1);

 INSERT INTO deduplication (x) VALUES (1);
 INSERT INTO deduplication (x) VALUES (1);
--- a/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.reference
--- a/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00236_replicated_drop_on_non_leader_zookeeper_long.sql
@ -3,8 +3,8 @@ SET replication_alter_partitions_sync = 2;
 DROP TABLE IF EXISTS attach_r1;
 DROP TABLE IF EXISTS attach_r2;

-CREATE TABLE attach_r1 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00236/01/attach', 'r1', d, d, 8192);
-CREATE TABLE attach_r2 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00236/01/attach', 'r2', d, d, 8192);
+CREATE TABLE attach_r1 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00236/01/attach', 'r1', d, d, 8192);
+CREATE TABLE attach_r2 (d Date) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test_00236/01/attach', 'r2', d, d, 8192);

 INSERT INTO attach_r1 VALUES ('2014-01-01'), ('2014-02-01'), ('2014-03-01');

--- a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.reference
--- a/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00446_clear_column_in_partition_zookeeper_long.sql
@ -24,8 +24,8 @@ SELECT '===Replicated case===';
 DROP TABLE IF EXISTS clear_column1;
 DROP TABLE IF EXISTS clear_column2;
 SELECT sleep(1) FORMAT Null;
-CREATE TABLE clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test_00446/tables/clear_column', '1') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
-CREATE TABLE clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/test_00446/tables/clear_column', '2') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
+CREATE TABLE clear_column1 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_00446/tables/clear_column', '1') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;
+CREATE TABLE clear_column2 (d Date, i Int64) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_00446/tables/clear_column', '2') ORDER BY d PARTITION by toYYYYMM(d) SETTINGS min_bytes_for_wide_part = 0;

 INSERT INTO clear_column1 (d) VALUES ('2000-01-01'), ('2000-02-01');
 SYSTEM SYNC REPLICA clear_column2;
--- a/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.reference
--- a/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper_long.sql
@ -4,8 +4,8 @@ SELECT '*** Not partitioned ***';

 DROP TABLE IF EXISTS not_partitioned_replica1_00502;
 DROP TABLE IF EXISTS not_partitioned_replica2_00502;
-CREATE TABLE not_partitioned_replica1_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/not_partitioned_00502', '1') ORDER BY x;
-CREATE TABLE not_partitioned_replica2_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/not_partitioned_00502', '2') ORDER BY x;
+CREATE TABLE not_partitioned_replica1_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/not_partitioned_00502', '1') ORDER BY x;
+CREATE TABLE not_partitioned_replica2_00502(x UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/not_partitioned_00502', '2') ORDER BY x;

 INSERT INTO not_partitioned_replica1_00502 VALUES (1), (2), (3);
 INSERT INTO not_partitioned_replica1_00502 VALUES (4), (5);
@ -30,8 +30,8 @@ SELECT '*** Partitioned by week ***';

 DROP TABLE IF EXISTS partitioned_by_week_replica1;
 DROP TABLE IF EXISTS partitioned_by_week_replica2;
-CREATE TABLE partitioned_by_week_replica1(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_week_00502', '1') PARTITION BY toMonday(d) ORDER BY x;
-CREATE TABLE partitioned_by_week_replica2(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_week_00502', '2') PARTITION BY toMonday(d) ORDER BY x;
+CREATE TABLE partitioned_by_week_replica1(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_week_00502', '1') PARTITION BY toMonday(d) ORDER BY x;
+CREATE TABLE partitioned_by_week_replica2(d Date, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_week_00502', '2') PARTITION BY toMonday(d) ORDER BY x;

 -- 2000-01-03 belongs to a different week than 2000-01-01 and 2000-01-02
 INSERT INTO partitioned_by_week_replica1 VALUES ('2000-01-01', 1), ('2000-01-02', 2), ('2000-01-03', 3);
@ -57,8 +57,8 @@ SELECT '*** Partitioned by a (Date, UInt8) tuple ***';

 DROP TABLE IF EXISTS partitioned_by_tuple_replica1_00502;
 DROP TABLE IF EXISTS partitioned_by_tuple_replica2_00502;
-CREATE TABLE partitioned_by_tuple_replica1_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_tuple_00502', '1') ORDER BY x PARTITION BY (d, x);
-CREATE TABLE partitioned_by_tuple_replica2_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_tuple_00502', '2') ORDER BY x PARTITION BY (d, x);
+CREATE TABLE partitioned_by_tuple_replica1_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_tuple_00502', '1') ORDER BY x PARTITION BY (d, x);
+CREATE TABLE partitioned_by_tuple_replica2_00502(d Date, x UInt8, y UInt8) ENGINE = ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_tuple_00502', '2') ORDER BY x PARTITION BY (d, x);

 INSERT INTO partitioned_by_tuple_replica1_00502 VALUES ('2000-01-01', 1, 1), ('2000-01-01', 2, 2), ('2000-01-02', 1, 3);
 INSERT INTO partitioned_by_tuple_replica1_00502 VALUES ('2000-01-02', 1, 4), ('2000-01-01', 1, 5);
@ -84,8 +84,8 @@ SELECT '*** Partitioned by String ***';

 DROP TABLE IF EXISTS partitioned_by_string_replica1;
 DROP TABLE IF EXISTS partitioned_by_string_replica2;
-CREATE TABLE partitioned_by_string_replica1(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_string_00502', '1') PARTITION BY s ORDER BY x;
-CREATE TABLE partitioned_by_string_replica2(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/partitioned_by_string_00502', '2') PARTITION BY s ORDER BY x;
+CREATE TABLE partitioned_by_string_replica1(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_string_00502', '1') PARTITION BY s ORDER BY x;
+CREATE TABLE partitioned_by_string_replica2(s String, x UInt8) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/partitioned_by_string_00502', '2') PARTITION BY s ORDER BY x;

 INSERT INTO partitioned_by_string_replica1 VALUES ('aaa', 1), ('aaa', 2), ('bbb', 3);
 INSERT INTO partitioned_by_string_replica1 VALUES ('bbb', 4), ('aaa', 5);
@ -110,8 +110,8 @@ SELECT '*** Table without columns with fixed size ***';

 DROP TABLE IF EXISTS without_fixed_size_columns_replica1;
 DROP TABLE IF EXISTS without_fixed_size_columns_replica2;
-CREATE TABLE without_fixed_size_columns_replica1(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/without_fixed_size_columns_00502', '1') PARTITION BY length(s) ORDER BY s;
-CREATE TABLE without_fixed_size_columns_replica2(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/test/without_fixed_size_columns_00502', '2') PARTITION BY length(s) ORDER BY s;
+CREATE TABLE without_fixed_size_columns_replica1(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/without_fixed_size_columns_00502', '1') PARTITION BY length(s) ORDER BY s;
+CREATE TABLE without_fixed_size_columns_replica2(s String) ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test/without_fixed_size_columns_00502', '2') PARTITION BY length(s) ORDER BY s;

 INSERT INTO without_fixed_size_columns_replica1 VALUES ('a'), ('aa'), ('b'), ('cc');

--- a/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql
+++ b/tests/queries/0_stateless/00509_extended_storage_definition_syntax_zookeeper.sql
@ -5,7 +5,7 @@ SELECT '*** Replicated with sampling ***';
 DROP TABLE IF EXISTS replicated_with_sampling;

 CREATE TABLE replicated_with_sampling(x UInt8)
-    ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00509/replicated_with_sampling', 'r1')
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00509/replicated_with_sampling', 'r1')
    ORDER BY x
    SAMPLE BY x;

@ -72,7 +72,7 @@ SELECT '*** Table definition with SETTINGS ***';
 DROP TABLE IF EXISTS with_settings;

 CREATE TABLE with_settings(x UInt32)
-    ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00509/with_settings', 'r1')
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00509/with_settings', 'r1')
    ORDER BY x
    SETTINGS replicated_can_become_leader = 0;

--- a/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql
+++ b/tests/queries/0_stateless/00510_materizlized_view_and_deduplication_zookeeper.sql
@ -4,9 +4,9 @@ DROP TABLE IF EXISTS with_deduplication_mv;
 DROP TABLE IF EXISTS without_deduplication_mv;

 CREATE TABLE with_deduplication(x UInt32)
-    ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00510/with_deduplication', 'r1') ORDER BY x;
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00510/with_deduplication', 'r1') ORDER BY x;
 CREATE TABLE without_deduplication(x UInt32)
-    ENGINE ReplicatedMergeTree('/clickhouse/tables/test_00510/without_deduplication', 'r1') ORDER BY x SETTINGS replicated_deduplication_window = 0;
+    ENGINE ReplicatedMergeTree('/clickhouse/tables/{database}/test_00510/without_deduplication', 'r1') ORDER BY x SETTINGS replicated_deduplication_window = 0;

 CREATE MATERIALIZED VIEW with_deduplication_mv UUID '00000510-1000-4000-8000-000000000001'
    ENGINE = ReplicatedAggregatingMergeTree('/clickhouse/tables/test_00510/with_deduplication_mv', 'r1') ORDER BY dummy
--- a/tests/queries/0_stateless/00516_deduplication_after_drop_partition_zookeeper.sql
+++ b/tests/queries/0_stateless/00516_deduplication_after_drop_partition_zookeeper.sql
@ -1,6 +1,6 @@
 DROP TABLE IF EXISTS deduplication_by_partition;
 CREATE TABLE deduplication_by_partition(d Date, x UInt32) ENGINE =
-    ReplicatedMergeTree('/clickhouse/tables/test_00516/deduplication_by_partition', 'r1', d, x, 8192);
+    ReplicatedMergeTree('/clickhouse/tables/{database}/test_00516/deduplication_by_partition', 'r1', d, x, 8192);

 INSERT INTO deduplication_by_partition VALUES ('2000-01-01', 1);
 INSERT INTO deduplication_by_partition VALUES ('2000-01-01', 2), ('2000-01-01', 3);
--- a/tests/queries/0_stateless/00563_insert_into_remote_and_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00563_insert_into_remote_and_zookeeper_long.reference
--- a/tests/queries/0_stateless/00563_insert_into_remote_and_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00563_insert_into_remote_and_zookeeper_long.sql
@ -1,6 +1,6 @@
 -- Check that settings are correctly passed through Distributed table
 DROP TABLE IF EXISTS simple;
-CREATE TABLE simple (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/test_00563/tables/simple', '1') ORDER BY d;
+CREATE TABLE simple (d Int8) ENGINE = ReplicatedMergeTree('/clickhouse/{database}/test_00563/tables/simple', '1') ORDER BY d;

 -- TODO: replace '127.0.0.2' -> '127.0.0.1' after a fix
 INSERT INTO TABLE FUNCTION remote('127.0.0.2', currentDatabase(), 'simple') VALUES (1);
--- a/tests/queries/0_stateless/00623_replicated_truncate_table_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00623_replicated_truncate_table_zookeeper_long.reference
--- a/tests/queries/0_stateless/00623_replicated_truncate_table_zookeeper_long.sql
+++ b/tests/queries/0_stateless/00623_replicated_truncate_table_zookeeper_long.sql
@ -1,8 +1,8 @@
 DROP TABLE IF EXISTS replicated_truncate1;
 DROP TABLE IF EXISTS replicated_truncate2;

-CREATE TABLE replicated_truncate1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00623/truncate', 'r1', d, k, 8192);
-CREATE TABLE replicated_truncate2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_00623/truncate', 'r2', d, k, 8192);
+CREATE TABLE replicated_truncate1 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00623/truncate', 'r1', d, k, 8192);
+CREATE TABLE replicated_truncate2 (d Date, k UInt64, i32 Int32) ENGINE=ReplicatedMergeTree('/clickhouse/tables/{database}/test_00623/truncate', 'r2', d, k, 8192);

 SELECT '======Before Truncate======';
 INSERT INTO replicated_truncate1 VALUES ('2015-01-01', 10, 42);
--- a/tests/queries/0_stateless/00643_cast_zookeeper_long.reference
+++ b/tests/queries/0_stateless/00643_cast_zookeeper_long.reference
@ -3,7 +3,7 @@ CREATE TABLE default.cast1
    `x` UInt8,
    `e` Enum8('hello' = 1, 'world' = 2) DEFAULT CAST(x, 'Enum8(\'hello\' = 1, \'world\' = 2)')
 )
-ENGINE = ReplicatedMergeTree('/clickhouse/tables/test_00643/cast', 'r1')
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/default/test_00643/cast', 'r1')
 ORDER BY e
 SETTINGS index_granularity = 8192
 x	UInt8					
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit bf905ea2251191ff9911ae7ec0cfc35d41f9f7f6`