Merge remote-tracking branch 'ClickHouse/master' into column_level_compress_block

2024-11-26 01:22:04 +00:00 · 2024-01-18 19:12:57 +00:00 · 2024-01-18 19:12:57 +00:00 · 15700592f7
commit 15700592f7
parent b16a4cf361 ab4d0d293a
775 changed files with 12089 additions and 4744 deletions
--- a/.github/workflows/jepsen.yml
+++ b/.github/workflows/jepsen.yml
@ -8,7 +8,6 @@ on: # yamllint disable-line rule:truthy
  schedule:
    - cron: '0 */6 * * *'
  workflow_dispatch:
-  workflow_call:
 jobs:
  KeeperJepsenRelease:
    uses: ./.github/workflows/reusable_simple_job.yml
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -966,13 +966,20 @@ jobs:
 #############################################################################################
 ###################################### JEPSEN TESTS #########################################
 #############################################################################################
+  # This is special test NOT INCLUDED in FinishCheck
+  # When it's skipped, all dependent tasks will be skipped too.
+  # DO NOT add it there
  Jepsen:
-    # This is special test NOT INCLUDED in FinishCheck
-    # When it's skipped, all dependent tasks will be skipped too.
-    # DO NOT add it there
-    if: ${{ !failure() && !cancelled() && contains(github.event.pull_request.labels.*.name, 'jepsen-test') }}
+    # we need concurrency as the job uses dedicated instances in the cloud
+    concurrency:
+      group: jepsen
+    if: ${{ !failure() && !cancelled() }}
    needs: [RunConfig, BuilderBinRelease]
-    uses: ./.github/workflows/jepsen.yml
+    uses: ./.github/workflows/reusable_test.yml
+    with:
+      test_name: ClickHouse Keeper Jepsen
+      runner_type: style-checker
+      data: ${{ needs.RunConfig.outputs.data }}
 #############################################################################################
 ####################################### libFuzzer ###########################################
 #############################################################################################
--- a/.github/workflows/reusable_simple_job.yml
+++ b/.github/workflows/reusable_simple_job.yml
@ -58,6 +58,8 @@ jobs:
    env:
      GITHUB_JOB_OVERRIDDEN: ${{inputs.test_name}}
    steps:
+      - name: DebugInfo
+        uses: hmarr/debug-action@a701ed95a46e6f2fb0df25e1a558c16356fae35a
      - name: Check out repository code
        uses: ClickHouse/checkout@v1
        with:
--- a/base/base/Decimal.h
+++ b/base/base/Decimal.h
@ -99,7 +99,7 @@ public:
 };
 }

-constexpr DB::UInt64 max_uint_mask = std::numeric_limits<DB::UInt64>::max();
+constexpr UInt64 max_uint_mask = std::numeric_limits<UInt64>::max();

 namespace std
 {
@ -114,8 +114,8 @@ namespace std
    {
        size_t operator()(const DB::Decimal128 & x) const
        {
-            return std::hash<DB::Int64>()(x.value >> 64)
-                ^ std::hash<DB::Int64>()(x.value & max_uint_mask);
+            return std::hash<Int64>()(x.value >> 64)
+                ^ std::hash<Int64>()(x.value & max_uint_mask);
        }
    };

@ -134,8 +134,8 @@ namespace std
        size_t operator()(const DB::Decimal256 & x) const
        {
            // FIXME temp solution
-            return std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value >> 64 & max_uint_mask))
-                ^ std::hash<DB::Int64>()(static_cast<DB::Int64>(x.value & max_uint_mask));
+            return std::hash<Int64>()(static_cast<Int64>(x.value >> 64 & max_uint_mask))
+                ^ std::hash<Int64>()(static_cast<Int64>(x.value & max_uint_mask));
        }
    };
 }
--- a/base/base/types.h
+++ b/base/base/types.h
@ -3,15 +3,6 @@
 #include <cstdint>
 #include <string>

-using Int8 = int8_t;
-using Int16 = int16_t;
-using Int32 = int32_t;
-using Int64 = int64_t;
-
-#ifndef __cpp_char8_t
-using char8_t = unsigned char;
-#endif
-
 /// This is needed for more strict aliasing. https://godbolt.org/z/xpJBSb https://stackoverflow.com/a/57453713
 using UInt8 = char8_t;

@ -19,24 +10,12 @@ using UInt16 = uint16_t;
 using UInt32 = uint32_t;
 using UInt64 = uint64_t;

-using String = std::string;
-
-namespace DB
-{
-
-using UInt8 = ::UInt8;
-using UInt16 = ::UInt16;
-using UInt32 = ::UInt32;
-using UInt64 = ::UInt64;
-
-using Int8 = ::Int8;
-using Int16 = ::Int16;
-using Int32 = ::Int32;
-using Int64 = ::Int64;
+using Int8 = int8_t;
+using Int16 = int16_t;
+using Int32 = int32_t;
+using Int64 = int64_t;

 using Float32 = float;
 using Float64 = double;

 using String = std::string;
-
-}
--- a/cmake/sanitize.cmake
+++ b/cmake/sanitize.cmake
@ -82,3 +82,4 @@ if (SANITIZE_COVERAGE)
 endif()

 set (WITHOUT_COVERAGE_FLAGS "-fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table")
+set (WITHOUT_COVERAGE_FLAGS_LIST -fno-profile-instr-generate -fno-coverage-mapping -fno-sanitize-coverage=trace-pc-guard,pc-table)
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit b7ea89b817a18dc0eafc1f909d568869f02d2d04
+Subproject commit 1278e32bb0d5dc489f947e002bdf8c71b0ddaa63
--- a/contrib/avro
+++ b/contrib/avro
@ -1 +1 @@
-Subproject commit 2fb8a8a6ec0eab9109b68abf3b4857e8c476b918
+Subproject commit d43acc84d3d455b016f847d6666fbc3cd27f16a9
--- a/contrib/azure
+++ b/contrib/azure
@ -1 +1 @@
-Subproject commit 060c54dfb0abe869c065143303a9d3e9c54c29e3
+Subproject commit e71395e44f309f97b5a486f5c2c59b82f85dd2d2
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@ -44,12 +44,14 @@ set (SRCS_IOSTREAMS
    "${LIBRARY_DIR}/libs/iostreams/src/gzip.cpp"
    "${LIBRARY_DIR}/libs/iostreams/src/mapped_file.cpp"
    "${LIBRARY_DIR}/libs/iostreams/src/zlib.cpp"
+    "${LIBRARY_DIR}/libs/iostreams/src/zstd.cpp"
 )

 add_library (_boost_iostreams ${SRCS_IOSTREAMS})
 add_library (boost::iostreams ALIAS _boost_iostreams)
 target_include_directories (_boost_iostreams PRIVATE ${LIBRARY_DIR})
 target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zlib)
+target_link_libraries (_boost_iostreams PRIVATE ch_contrib::zstd)

 # program_options

--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -34,9 +34,9 @@ if (OS_LINUX)
    # avoid spurious latencies and additional work associated with
    # MADV_DONTNEED. See
    # https://github.com/ClickHouse/ClickHouse/issues/11121 for motivation.
-    set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
+    set (JEMALLOC_CONFIG_MALLOC_CONF "percpu_arena:percpu,oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
 else()
-    set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:5000,dirty_decay_ms:5000")
+    set (JEMALLOC_CONFIG_MALLOC_CONF "oversize_threshold:0,muzzy_decay_ms:0,dirty_decay_ms:5000")
 endif()
 # CACHE variable is empty to allow changing defaults without the necessity
 # to purge cache
@ -161,6 +161,9 @@ target_include_directories(_jemalloc SYSTEM PRIVATE

 target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_NO_PRIVATE_NAMESPACE)

+# Because our coverage callbacks call malloc, and recursive call of malloc could not work.
+target_compile_options(_jemalloc PRIVATE ${WITHOUT_COVERAGE_FLAGS_LIST})
+
 if (CMAKE_BUILD_TYPE_UC STREQUAL "DEBUG")
    target_compile_definitions(_jemalloc PRIVATE
        -DJEMALLOC_DEBUG=1
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -33,7 +33,6 @@ set(SRCS
 "${LIBCXX_SOURCE_DIR}/src/optional.cpp"
 "${LIBCXX_SOURCE_DIR}/src/random.cpp"
 "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
-"${LIBCXX_SOURCE_DIR}/src/regex.cpp"
 "${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
 "${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
 "${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"
--- a/contrib/llvm-project
+++ b/contrib/llvm-project
@ -1 +1 @@
-Subproject commit 1834e42289c58402c804a87be4d489892b88f3ec
+Subproject commit 2568a7cd1297c7c3044b0f3cc0c23a6f6444d856
--- a/contrib/rocksdb
+++ b/contrib/rocksdb
@ -1 +1 @@
-Subproject commit 66e3cbec31400ed3a23deb878c5d7f56f990f0ae
+Subproject commit dead55e60b873d5f70f0e9458fbbba2b2180f430
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -242,7 +242,7 @@ quit
        --create-query-fuzzer-runs=50 \
        --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
        $NEW_TESTS_OPT \
-        > >(tail -n 100000 > fuzzer.log) \
+        > fuzzer.log \
        2>&1 &
    fuzzer_pid=$!
    echo "Fuzzer pid is $fuzzer_pid"
@ -390,6 +390,7 @@ rg --text -F '<Fatal>' server.log > fatal.log ||:
 dmesg -T > dmesg.log ||:

 zstd --threads=0 server.log
+zstd --threads=0 fuzzer.log

 cat > report.html <<EOF ||:
 <!DOCTYPE html>
@ -413,7 +414,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
 <h1>AST Fuzzer for PR <a href="https://github.com/ClickHouse/ClickHouse/pull/${PR_TO_TEST}">#${PR_TO_TEST}</a> @ ${SHA_TO_TEST}</h1>
 <p class="links">
  <a href="run.log">run.log</a>
-  <a href="fuzzer.log">fuzzer.log</a>
+  <a href="fuzzer.log.zst">fuzzer.log.zst</a>
  <a href="server.log.zst">server.log.zst</a>
  <a href="main.log">main.log</a>
  <a href="dmesg.log">dmesg.log</a>
--- a/docker/test/stateful/Dockerfile
+++ b/docker/test/stateful/Dockerfile
@ -11,14 +11,6 @@ RUN apt-get update -y \
        npm \
    && apt-get clean

-COPY s3downloader /s3downloader
-
-ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
-ENV DATASETS="hits visits"
-
-# The following is already done in clickhouse/stateless-test
-# RUN npm install -g azurite
-# RUN npm install tslib
-
+COPY create.sql /
 COPY run.sh /
 CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/stateful/create.sql
+++ b/docker/test/stateful/create.sql
@ -0,0 +1,333 @@
+ATTACH TABLE datasets.hits_v1 UUID '78ebf6a1-d987-4579-b3ec-00c1a087b1f3'
+(
+    WatchID UInt64,
+    JavaEnable UInt8,
+    Title String,
+    GoodEvent Int16,
+    EventTime DateTime,
+    EventDate Date,
+    CounterID UInt32,
+    ClientIP UInt32,
+    ClientIP6 FixedString(16),
+    RegionID UInt32,
+    UserID UInt64,
+    CounterClass Int8,
+    OS UInt8,
+    UserAgent UInt8,
+    URL String,
+    Referer String,
+    URLDomain String,
+    RefererDomain String,
+    Refresh UInt8,
+    IsRobot UInt8,
+    RefererCategories Array(UInt16),
+    URLCategories Array(UInt16),
+    URLRegions Array(UInt32),
+    RefererRegions Array(UInt32),
+    ResolutionWidth UInt16,
+    ResolutionHeight UInt16,
+    ResolutionDepth UInt8,
+    FlashMajor UInt8,
+    FlashMinor UInt8,
+    FlashMinor2 String,
+    NetMajor UInt8,
+    NetMinor UInt8,
+    UserAgentMajor UInt16,
+    UserAgentMinor FixedString(2),
+    CookieEnable UInt8,
+    JavascriptEnable UInt8,
+    IsMobile UInt8,
+    MobilePhone UInt8,
+    MobilePhoneModel String,
+    Params String,
+    IPNetworkID UInt32,
+    TraficSourceID Int8,
+    SearchEngineID UInt16,
+    SearchPhrase String,
+    AdvEngineID UInt8,
+    IsArtifical UInt8,
+    WindowClientWidth UInt16,
+    WindowClientHeight UInt16,
+    ClientTimeZone Int16,
+    ClientEventTime DateTime,
+    SilverlightVersion1 UInt8,
+    SilverlightVersion2 UInt8,
+    SilverlightVersion3 UInt32,
+    SilverlightVersion4 UInt16,
+    PageCharset String,
+    CodeVersion UInt32,
+    IsLink UInt8,
+    IsDownload UInt8,
+    IsNotBounce UInt8,
+    FUniqID UInt64,
+    HID UInt32,
+    IsOldCounter UInt8,
+    IsEvent UInt8,
+    IsParameter UInt8,
+    DontCountHits UInt8,
+    WithHash UInt8,
+    HitColor FixedString(1),
+    UTCEventTime DateTime,
+    Age UInt8,
+    Sex UInt8,
+    Income UInt8,
+    Interests UInt16,
+    Robotness UInt8,
+    GeneralInterests Array(UInt16),
+    RemoteIP UInt32,
+    RemoteIP6 FixedString(16),
+    WindowName Int32,
+    OpenerName Int32,
+    HistoryLength Int16,
+    BrowserLanguage FixedString(2),
+    BrowserCountry FixedString(2),
+    SocialNetwork String,
+    SocialAction String,
+    HTTPError UInt16,
+    SendTiming Int32,
+    DNSTiming Int32,
+    ConnectTiming Int32,
+    ResponseStartTiming Int32,
+    ResponseEndTiming Int32,
+    FetchTiming Int32,
+    RedirectTiming Int32,
+    DOMInteractiveTiming Int32,
+    DOMContentLoadedTiming Int32,
+    DOMCompleteTiming Int32,
+    LoadEventStartTiming Int32,
+    LoadEventEndTiming Int32,
+    NSToDOMContentLoadedTiming Int32,
+    FirstPaintTiming Int32,
+    RedirectCount Int8,
+    SocialSourceNetworkID UInt8,
+    SocialSourcePage String,
+    ParamPrice Int64,
+    ParamOrderID String,
+    ParamCurrency FixedString(3),
+    ParamCurrencyID UInt16,
+    GoalsReached Array(UInt32),
+    OpenstatServiceName String,
+    OpenstatCampaignID String,
+    OpenstatAdID String,
+    OpenstatSourceID String,
+    UTMSource String,
+    UTMMedium String,
+    UTMCampaign String,
+    UTMContent String,
+    UTMTerm String,
+    FromTag String,
+    HasGCLID UInt8,
+    RefererHash UInt64,
+    URLHash UInt64,
+    CLID UInt32,
+    YCLID UInt64,
+    ShareService String,
+    ShareURL String,
+    ShareTitle String,
+    "ParsedParams.Key1" Array(String),
+    "ParsedParams.Key2" Array(String),
+    "ParsedParams.Key3" Array(String),
+    "ParsedParams.Key4" Array(String),
+    "ParsedParams.Key5" Array(String),
+    "ParsedParams.ValueDouble" Array(Float64),
+    IslandID FixedString(16),
+    RequestNum UInt32,
+    RequestTry UInt8
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMM(EventDate)
+ORDER BY (CounterID, EventDate, intHash32(UserID))
+SAMPLE BY intHash32(UserID)
+SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G',
+         disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
+
+ATTACH TABLE datasets.visits_v1 UUID '5131f834-711f-4168-98a5-968b691a104b'
+(
+    CounterID UInt32,
+    StartDate Date,
+    Sign Int8,
+    IsNew UInt8,
+    VisitID UInt64,
+    UserID UInt64,
+    StartTime DateTime,
+    Duration UInt32,
+    UTCStartTime DateTime,
+    PageViews Int32,
+    Hits Int32,
+    IsBounce UInt8,
+    Referer String,
+    StartURL String,
+    RefererDomain String,
+    StartURLDomain String,
+    EndURL String,
+    LinkURL String,
+    IsDownload UInt8,
+    TraficSourceID Int8,
+    SearchEngineID UInt16,
+    SearchPhrase String,
+    AdvEngineID UInt8,
+    PlaceID Int32,
+    RefererCategories Array(UInt16),
+    URLCategories Array(UInt16),
+    URLRegions Array(UInt32),
+    RefererRegions Array(UInt32),
+    IsYandex UInt8,
+    GoalReachesDepth Int32,
+    GoalReachesURL Int32,
+    GoalReachesAny Int32,
+    SocialSourceNetworkID UInt8,
+    SocialSourcePage String,
+    MobilePhoneModel String,
+    ClientEventTime DateTime,
+    RegionID UInt32,
+    ClientIP UInt32,
+    ClientIP6 FixedString(16),
+    RemoteIP UInt32,
+    RemoteIP6 FixedString(16),
+    IPNetworkID UInt32,
+    SilverlightVersion3 UInt32,
+    CodeVersion UInt32,
+    ResolutionWidth UInt16,
+    ResolutionHeight UInt16,
+    UserAgentMajor UInt16,
+    UserAgentMinor UInt16,
+    WindowClientWidth UInt16,
+    WindowClientHeight UInt16,
+    SilverlightVersion2 UInt8,
+    SilverlightVersion4 UInt16,
+    FlashVersion3 UInt16,
+    FlashVersion4 UInt16,
+    ClientTimeZone Int16,
+    OS UInt8,
+    UserAgent UInt8,
+    ResolutionDepth UInt8,
+    FlashMajor UInt8,
+    FlashMinor UInt8,
+    NetMajor UInt8,
+    NetMinor UInt8,
+    MobilePhone UInt8,
+    SilverlightVersion1 UInt8,
+    Age UInt8,
+    Sex UInt8,
+    Income UInt8,
+    JavaEnable UInt8,
+    CookieEnable UInt8,
+    JavascriptEnable UInt8,
+    IsMobile UInt8,
+    BrowserLanguage UInt16,
+    BrowserCountry UInt16,
+    Interests UInt16,
+    Robotness UInt8,
+    GeneralInterests Array(UInt16),
+    Params Array(String),
+    "Goals.ID" Array(UInt32),
+    "Goals.Serial" Array(UInt32),
+    "Goals.EventTime" Array(DateTime),
+    "Goals.Price" Array(Int64),
+    "Goals.OrderID" Array(String),
+    "Goals.CurrencyID" Array(UInt32),
+    WatchIDs Array(UInt64),
+    ParamSumPrice Int64,
+    ParamCurrency FixedString(3),
+    ParamCurrencyID UInt16,
+    ClickLogID UInt64,
+    ClickEventID Int32,
+    ClickGoodEvent Int32,
+    ClickEventTime DateTime,
+    ClickPriorityID Int32,
+    ClickPhraseID Int32,
+    ClickPageID Int32,
+    ClickPlaceID Int32,
+    ClickTypeID Int32,
+    ClickResourceID Int32,
+    ClickCost UInt32,
+    ClickClientIP UInt32,
+    ClickDomainID UInt32,
+    ClickURL String,
+    ClickAttempt UInt8,
+    ClickOrderID UInt32,
+    ClickBannerID UInt32,
+    ClickMarketCategoryID UInt32,
+    ClickMarketPP UInt32,
+    ClickMarketCategoryName String,
+    ClickMarketPPName String,
+    ClickAWAPSCampaignName String,
+    ClickPageName String,
+    ClickTargetType UInt16,
+    ClickTargetPhraseID UInt64,
+    ClickContextType UInt8,
+    ClickSelectType Int8,
+    ClickOptions String,
+    ClickGroupBannerID Int32,
+    OpenstatServiceName String,
+    OpenstatCampaignID String,
+    OpenstatAdID String,
+    OpenstatSourceID String,
+    UTMSource String,
+    UTMMedium String,
+    UTMCampaign String,
+    UTMContent String,
+    UTMTerm String,
+    FromTag String,
+    HasGCLID UInt8,
+    FirstVisit DateTime,
+    PredLastVisit Date,
+    LastVisit Date,
+    TotalVisits UInt32,
+    "TraficSource.ID" Array(Int8),
+    "TraficSource.SearchEngineID" Array(UInt16),
+    "TraficSource.AdvEngineID" Array(UInt8),
+    "TraficSource.PlaceID" Array(UInt16),
+    "TraficSource.SocialSourceNetworkID" Array(UInt8),
+    "TraficSource.Domain" Array(String),
+    "TraficSource.SearchPhrase" Array(String),
+    "TraficSource.SocialSourcePage" Array(String),
+    Attendance FixedString(16),
+    CLID UInt32,
+    YCLID UInt64,
+    NormalizedRefererHash UInt64,
+    SearchPhraseHash UInt64,
+    RefererDomainHash UInt64,
+    NormalizedStartURLHash UInt64,
+    StartURLDomainHash UInt64,
+    NormalizedEndURLHash UInt64,
+    TopLevelDomain UInt64,
+    URLScheme UInt64,
+    OpenstatServiceNameHash UInt64,
+    OpenstatCampaignIDHash UInt64,
+    OpenstatAdIDHash UInt64,
+    OpenstatSourceIDHash UInt64,
+    UTMSourceHash UInt64,
+    UTMMediumHash UInt64,
+    UTMCampaignHash UInt64,
+    UTMContentHash UInt64,
+    UTMTermHash UInt64,
+    FromHash UInt64,
+    WebVisorEnabled UInt8,
+    WebVisorActivity UInt32,
+    "ParsedParams.Key1" Array(String),
+    "ParsedParams.Key2" Array(String),
+    "ParsedParams.Key3" Array(String),
+    "ParsedParams.Key4" Array(String),
+    "ParsedParams.Key5" Array(String),
+    "ParsedParams.ValueDouble" Array(Float64),
+    "Market.Type" Array(UInt8),
+    "Market.GoalID" Array(UInt32),
+    "Market.OrderID" Array(String),
+    "Market.OrderPrice" Array(Int64),
+    "Market.PP" Array(UInt32),
+    "Market.DirectPlaceID" Array(UInt32),
+    "Market.DirectOrderID" Array(UInt32),
+    "Market.DirectBannerID" Array(UInt32),
+    "Market.GoodID" Array(String),
+    "Market.GoodName" Array(String),
+    "Market.GoodQuantity" Array(Int32),
+    "Market.GoodPrice" Array(Int64),
+    IslandID FixedString(16)
+)
+ENGINE = CollapsingMergeTree(Sign)
+PARTITION BY toYYYYMM(StartDate)
+ORDER BY (CounterID, StartDate, intHash32(UserID), VisitID)
+SAMPLE BY intHash32(UserID)
+SETTINGS disk = disk(type = cache, path = '/var/lib/clickhouse/filesystem_caches/', max_size = '4G',
+         disk = disk(type = web, endpoint = 'https://clickhouse-datasets-web.s3.us-east-1.amazonaws.com/'));
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -97,21 +97,9 @@ start

 setup_logs_replication

-# shellcheck disable=SC2086 # No quotes because I want to split it into words.
-/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
-chmod 777 -R /var/lib/clickhouse
 clickhouse-client --query "SHOW DATABASES"
-
-clickhouse-client --query "ATTACH DATABASE datasets ENGINE = Ordinary"
-
-service clickhouse-server restart
-
-# Wait for server to start accepting connections
-for _ in {1..120}; do
-    clickhouse-client --query "SELECT 1" && break
-    sleep 1
-done
-
+clickhouse-client --query "CREATE DATABASE datasets"
+clickhouse-client --multiquery < create.sql
 clickhouse-client --query "SHOW TABLES FROM datasets"

 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
--- a/docker/test/stateful/s3downloader
+++ b/docker/test/stateful/s3downloader
@ -1,126 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import os
-import sys
-import time
-import tarfile
-import logging
-import argparse
-import requests
-import tempfile
-
-
-DEFAULT_URL = "https://clickhouse-datasets.s3.amazonaws.com"
-
-AVAILABLE_DATASETS = {
-    "hits": "hits_v1.tar",
-    "visits": "visits_v1.tar",
-}
-
-RETRIES_COUNT = 5
-
-
-def _get_temp_file_name():
-    return os.path.join(
-        tempfile._get_default_tempdir(), next(tempfile._get_candidate_names())
-    )
-
-
-def build_url(base_url, dataset):
-    return os.path.join(base_url, dataset, "partitions", AVAILABLE_DATASETS[dataset])
-
-
-def download_with_progress(url, path):
-    logging.info("Downloading from %s to temp path %s", url, path)
-    for i in range(RETRIES_COUNT):
-        try:
-            with open(path, "wb") as f:
-                response = requests.get(url, stream=True)
-                response.raise_for_status()
-                total_length = response.headers.get("content-length")
-                if total_length is None or int(total_length) == 0:
-                    logging.info(
-                        "No content-length, will download file without progress"
-                    )
-                    f.write(response.content)
-                else:
-                    dl = 0
-                    total_length = int(total_length)
-                    logging.info("Content length is %ld bytes", total_length)
-                    for data in response.iter_content(chunk_size=4096):
-                        dl += len(data)
-                        f.write(data)
-                        if sys.stdout.isatty():
-                            done = int(50 * dl / total_length)
-                            percent = int(100 * float(dl) / total_length)
-                            sys.stdout.write(
-                                "\r[{}{}] {}%".format(
-                                    "=" * done, " " * (50 - done), percent
-                                )
-                            )
-                            sys.stdout.flush()
-            break
-        except Exception as ex:
-            sys.stdout.write("\n")
-            time.sleep(3)
-            logging.info("Exception while downloading %s, retry %s", ex, i + 1)
-            if os.path.exists(path):
-                os.remove(path)
-    else:
-        raise Exception(
-            "Cannot download dataset from {}, all retries exceeded".format(url)
-        )
-
-    sys.stdout.write("\n")
-    logging.info("Downloading finished")
-
-
-def unpack_to_clickhouse_directory(tar_path, clickhouse_path):
-    logging.info(
-        "Will unpack data from temp path %s to clickhouse db %s",
-        tar_path,
-        clickhouse_path,
-    )
-    with tarfile.open(tar_path, "r") as comp_file:
-        comp_file.extractall(path=clickhouse_path)
-    logging.info("Unpack finished")
-
-
-if __name__ == "__main__":
-    logging.basicConfig(level=logging.INFO)
-
-    parser = argparse.ArgumentParser(
-        description="Simple tool for dowloading datasets for clickhouse from S3"
-    )
-
-    parser.add_argument(
-        "--dataset-names",
-        required=True,
-        nargs="+",
-        choices=list(AVAILABLE_DATASETS.keys()),
-    )
-    parser.add_argument("--url-prefix", default=DEFAULT_URL)
-    parser.add_argument("--clickhouse-data-path", default="/var/lib/clickhouse/")
-
-    args = parser.parse_args()
-    datasets = args.dataset_names
-    logging.info("Will fetch following datasets: %s", ", ".join(datasets))
-    for dataset in datasets:
-        logging.info("Processing %s", dataset)
-        temp_archive_path = _get_temp_file_name()
-        try:
-            download_url_for_dataset = build_url(args.url_prefix, dataset)
-            download_with_progress(download_url_for_dataset, temp_archive_path)
-            unpack_to_clickhouse_directory(temp_archive_path, args.clickhouse_data_path)
-        except Exception as ex:
-            logging.info("Some exception occured %s", str(ex))
-            raise
-        finally:
-            logging.info(
-                "Will remove downloaded file %s from filesystem if it exists",
-                temp_archive_path,
-            )
-            if os.path.exists(temp_archive_path):
-                os.remove(temp_archive_path)
-        logging.info("Processing of %s finished", dataset)
-    logging.info("Fetch finished, enjoy your tables!")
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -46,7 +46,7 @@ RUN apt-get update -y \
            p7zip-full \
    && apt-get clean

-RUN pip3 install numpy scipy pandas Jinja2
+RUN pip3 install numpy scipy pandas Jinja2 pyarrow

 RUN mkdir -p /tmp/clickhouse-odbc-tmp \
   && wget -nv -O - ${odbc_driver_url} | tar --strip-components=1 -xz -C /tmp/clickhouse-odbc-tmp \
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@ -23,8 +23,6 @@ RUN apt-get update -y \

 COPY run.sh /

-ENV DATASETS="hits visits"
-ENV S3_URL="https://clickhouse-datasets.s3.amazonaws.com"
 ENV EXPORT_S3_STORAGE_POLICIES=1

 CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -59,12 +59,11 @@ start

 setup_logs_replication

-# shellcheck disable=SC2086 # No quotes because I want to split it into words.
-/s3downloader --url-prefix "$S3_URL" --dataset-names $DATASETS
-chmod 777 -R /var/lib/clickhouse
-clickhouse-client --query "ATTACH DATABASE IF NOT EXISTS datasets ENGINE = Ordinary"
-clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"
+clickhouse-client --query "CREATE DATABASE datasets"
+clickhouse-client --multiquery < create.sql
+clickhouse-client --query "SHOW TABLES FROM datasets"

+clickhouse-client --query "CREATE DATABASE IF NOT EXISTS test"

 stop
 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.initial.log
@ -193,7 +192,7 @@ stop

 # Let's enable S3 storage by default
 export USE_S3_STORAGE_FOR_MERGE_TREE=1
-export $RANDOMIZE_OBJECT_KEY_TYPE=1
+export RANDOMIZE_OBJECT_KEY_TYPE=1
 export ZOOKEEPER_FAULT_INJECTION=1
 configure

--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@ -78,6 +78,7 @@ remove_keeper_config "create_if_not_exists" "[01]"
 rm /etc/clickhouse-server/config.d/merge_tree.xml
 rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
 rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
+rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
 rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
 rm /etc/clickhouse-server/users.d/s3_cache_new.xml
 rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
@ -117,6 +118,7 @@ sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_defau
 rm /etc/clickhouse-server/config.d/merge_tree.xml
 rm /etc/clickhouse-server/config.d/enable_wait_for_shutdown_replicated_tables.xml
 rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml
+rm /etc/clickhouse-server/config.d/storage_conf_02963.xml
 rm /etc/clickhouse-server/users.d/nonconst_timezone.xml
 rm /etc/clickhouse-server/users.d/s3_cache_new.xml
 rm /etc/clickhouse-server/users.d/replicated_ddl_entry.xml
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -508,7 +508,7 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
 | [notEquals(!=, &lt;&gt;)](/docs/en/sql-reference/functions/comparison-functions.md/#notequals)             | ✔           | ✔      | ✔          | ✔          | ✔            | ✔        |
 | [like](/docs/en/sql-reference/functions/string-search-functions.md/#like)                                  | ✔           | ✔      | ✔          | ✔          | ✗            | ✔        |
 | [notLike](/docs/en/sql-reference/functions/string-search-functions.md/#notlike)                            | ✔           | ✔      | ✔          | ✔          | ✗            | ✔        |
-| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match)                                | ✗           | ✗      | ✔          | ✔          | ✗            | ✗        |
+| [match](/docs/en/sql-reference/functions/string-search-functions.md/#match)                                | ✗           | ✗      | ✔          | ✔          | ✗            | ✔        |
 | [startsWith](/docs/en/sql-reference/functions/string-functions.md/#startswith)                             | ✔           | ✔      | ✔          | ✔          | ✗            | ✔        |
 | [endsWith](/docs/en/sql-reference/functions/string-functions.md/#endswith)                                 | ✗           | ✗      | ✔          | ✔          | ✗            | ✔        |
 | [multiSearchAny](/docs/en/sql-reference/functions/string-search-functions.md/#multisearchany)              | ✗           | ✗      | ✔          | ✗          | ✗            | ✔        |
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -2356,6 +2356,8 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filenam
 ### Arrow format settings {#parquet-format-settings}

 - [output_format_arrow_low_cardinality_as_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_low_cardinality_as_dictionary) - enable output ClickHouse LowCardinality type as Dictionary Arrow type. Default value - `false`.
+- [output_format_arrow_use_64_bit_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_64_bit_indexes_for_dictionary) - use 64-bit integer type for Dictionary indexes. Default value - `false`.
+- [output_format_arrow_use_signed_indexes_for_dictionary](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_use_signed_indexes_for_dictionary) - use signed integer type for Dictionary indexes. Default value - `true`.
 - [output_format_arrow_string_as_string](/docs/en/operations/settings/settings-formats.md/#output_format_arrow_string_as_string) - use Arrow String type instead of Binary for String columns. Default value - `false`.
 - [input_format_arrow_case_insensitive_column_matching](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_case_insensitive_column_matching) - ignore case when matching Arrow columns with ClickHouse columns. Default value - `false`.
 - [input_format_arrow_allow_missing_columns](/docs/en/operations/settings/settings-formats.md/#input_format_arrow_allow_missing_columns) - allow missing columns while reading Arrow data. Default value - `false`.
--- a/docs/en/operations/allocation-profiling.md
+++ b/docs/en/operations/allocation-profiling.md
@ -0,0 +1,207 @@
+---
+slug: /en/operations/allocation-profiling
+sidebar_label: "Allocation profiling"
+title: "Allocation profiling"
+---
+
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
+# Allocation profiling
+
+ClickHouse uses [jemalloc](https://github.com/jemalloc/jemalloc) as its global allocator that comes with some tools for allocation sampling and profiling.  
+To make allocation profiling more convenient, `SYSTEM` commands are provided along 4LW commands in Keeper.
+
+## Sampling allocations and flushing heap profiles
+
+If we want to sample and profile allocations in `jemalloc`, we need to start ClickHouse/Keeper with profiling enabled using environment variable `MALLOC_CONF`.
+
+```sh
+MALLOC_CONF=background_thread:true,prof:true
+```
+
+`jemalloc` will sample allocation and store the information internally.
+
+We can tell `jemalloc` to flush current profile by running:
+
+<Tabs groupId="binary">
+<TabItem value="clickhouse" label="ClickHouse">
+
+    SYSTEM JEMALLOC FLUSH PROFILE
+
+</TabItem>
+<TabItem value="keeper" label="Keeper">
+
+    echo jmfp | nc localhost 9181
+
+</TabItem>
+</Tabs>
+
+By default, heap profile file will be generated in `/tmp/jemalloc_clickhouse._pid_._seqnum_.heap` where `_pid_` is the PID of ClickHouse and `_seqnum_` is the global sequence number for the current heap profile.  
+For Keeper, the default file is `/tmp/jemalloc_keeper._pid_._seqnum_.heap` following the same rules.
+
+A different location can be defined by appending the `MALLOC_CONF` environment variable with `prof_prefix` option.  
+For example, if we want to generate profiles in `/data` folder where the prefix for filename will be `my_current_profile` we can run ClickHouse/Keeper with following environment variable:
+```sh
+MALLOC_CONF=background_thread:true,prof:true,prof_prefix:/data/my_current_profile
+```
+Generated file will append to prefix PID and sequence number.
+
+## Analyzing heap profiles
+
+After we generated heap profiles, we need to analyze them.  
+For that, we need to use `jemalloc`'s tool called [jeprof](https://github.com/jemalloc/jemalloc/blob/dev/bin/jeprof.in) which can be installed in multiple ways:
+- installing `jemalloc` using system's package manager
+- cloning [jemalloc repo](https://github.com/jemalloc/jemalloc) and running autogen.sh from the root folder that will provide you with `jeprof` script inside the `bin` folder
+
+:::note
+`jeprof` uses `addr2line` to generate stacktraces which can be really slow.  
+If that’s the case, we recommend installing an [alternative implementation](https://github.com/gimli-rs/addr2line) of the tool.
+
+```
+git clone https://github.com/gimli-rs/addr2line
+cd addr2line
+cargo b --examples -r
+cp ./target/release/examples/addr2line path/to/current/addr2line
+```
+:::
+
+There are many different formats to generate from the heap profile using `jeprof`.
+We recommend to run `jeprof --help` to check usage and many different options the tool provides. 
+
+In general, `jeprof` command will look like this:
+
+```sh
+jeprof path/to/binary path/to/heap/profile --output_format [ > output_file]
+```
+
+If we want to compare which allocations happened between 2 profiles we can set the base argument:
+
+```sh
+jeprof path/to/binary --base path/to/first/heap/profile path/to/second/heap/profile --output_format [ > output_file]
+```
+
+For example:
+
+- if we want to generate a text file with each procedure written per line:
+
+```sh
+jeprof path/to/binary path/to/heap/profile --text > result.txt
+```
+
+- if we want to generate a PDF file with call-graph:
+
+```sh
+jeprof path/to/binary path/to/heap/profile --pdf > result.pdf
+```
+
+### Generating flame graph
+
+`jeprof` allows us to generate collapsed stacks for building flame graphs.
+
+We need to use `--collapsed` argument:
+
+```sh
+jeprof path/to/binary path/to/heap/profile --collapsed > result.collapsed
+```
+
+After that, we can use many different tools to visualize collapsed stacks.
+
+Most popular would be [FlameGraph](https://github.com/brendangregg/FlameGraph) which contains a script called `flamegraph.pl`:
+
+```sh
+cat result.collapsed | /path/to/FlameGraph/flamegraph.pl --color=mem --title="Allocation Flame Graph" --width 2400 > result.svg
+```
+
+Another interesting tool is [speedscope](https://www.speedscope.app/) that allows you to analyze collected stacks in a more interactive way.
+
+## Controlling allocation profiler during runtime
+
+If ClickHouse/Keeper were started with enabled profiler, they support additional commands for disabling/enabling allocation profiling during runtime.
+Using those commands, it's easier to profile only specific intervals.
+
+Disable profiler:
+
+<Tabs groupId="binary">
+<TabItem value="clickhouse" label="ClickHouse">
+
+    SYSTEM JEMALLOC DISABLE PROFILE
+
+</TabItem>
+<TabItem value="keeper" label="Keeper">
+
+    echo jmdp | nc localhost 9181
+
+</TabItem>
+</Tabs>
+
+Enable profiler:
+
+<Tabs groupId="binary">
+<TabItem value="clickhouse" label="ClickHouse">
+
+    SYSTEM JEMALLOC ENABLE PROFILE
+
+</TabItem>
+<TabItem value="keeper" label="Keeper">
+
+    echo jmep | nc localhost 9181
+
+</TabItem>
+</Tabs>
+
+It's also possible to control the initial state of the profiler by setting `prof_active` option which is enabled by default.  
+For example, if we don't want to sample allocations during startup but only after we enable the profiler, we can start ClickHouse/Keeper with following environment variable:
+```sh
+MALLOC_CONF=background_thread:true,prof:true,prof_active:false
+```
+
+and enable profiler at a later point.
+
+## Additional options for profiler
+
+`jemalloc` has many different options available related to profiler which can be controlled by modifying `MALLOC_CONF` environment variable.
+For example, interval between allocation samples can be controlled with `lg_prof_sample`.  
+If you want to dump heap profile every N bytes you can enable it using `lg_prof_interval`.  
+
+We recommend to check `jemalloc`s [reference page](https://jemalloc.net/jemalloc.3.html) for such options.
+
+## Other resources
+
+ClickHouse/Keeper expose `jemalloc` related metrics in many different ways.
+
+:::warning Warning
+It's important to be aware that none of these metrics are synchronized with each other and values may drift.
+:::
+
+### System table `asynchronous_metrics`
+
+```sql
+SELECT *
+FROM system.asynchronous_metrics
+WHERE metric ILIKE '%jemalloc%'
+FORMAT Vertical
+```
+
+[Reference](/en/operations/system-tables/asynchronous_metrics)
+
+### System table `jemalloc_bins`
+
+Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas.
+
+[Reference](/en/operations/system-tables/jemalloc_bins)
+
+### Prometheus
+
+All `jemalloc` related metrics from `asynchronous_metrics` are also exposed using Prometheus endpoint in both ClickHouse and Keeper.
+
+[Reference](/en/operations/server-configuration-parameters/settings#prometheus)
+
+### `jmst` 4LW command in Keeper
+
+Keeper supports `jmst` 4LW command which returns [basic allocator statistics](https://github.com/jemalloc/jemalloc/wiki/Use-Case%3A-Basic-Allocator-Statistics).
+
+Example:
+```sh
+echo jmst | nc localhost 9181
+```
--- a/docs/en/operations/cluster-discovery.md
+++ b/docs/en/operations/cluster-discovery.md
@ -65,6 +65,20 @@ With Cluster Discovery, rather than defining each node explicitly, you simply sp
    <cluster_name>
        <discovery>
            <path>/clickhouse/discovery/cluster_name</path>
+
+            <!-- # Optional configuration parameters: -->
+
+            <!-- ## Authentication credentials to access all other nodes in cluster: -->
+            <!-- <user>user1</user> -->
+            <!-- <password>pass123</password> -->
+            <!-- ### Alternatively to password, interserver secret may be used: -->
+            <!-- <secret>secret123</secret> -->
+
+            <!-- ## Shard for current node (see below): -->
+            <!-- <shard>1</shard> -->
+
+            <!-- ## Observer mode (see below): -->
+            <!-- <observer/> -->
        </discovery>
    </cluster_name>
 </remote_servers>
--- a/docs/en/operations/query-cache.md
+++ b/docs/en/operations/query-cache.md
@ -29,10 +29,6 @@ Transactionally inconsistent caching is traditionally provided by client tools o
 the same caching logic and configuration is often duplicated. With ClickHouse's query cache, the caching logic moves to the server side.
 This reduces maintenance effort and avoids redundancy.

-:::note
-Security consideration: The cached query result is tied to the user executing it. Authorization checks are performed when the query is executed. This means that if there are any alterations to the user's role or permissions between the time the query is cached and when the cache is accessed, the result will not reflect these changes. We recommend using different users to distinguish between different levels of access, instead of actively toggling roles for a single user between queries, as this practice may lead to unexpected query results.
-:::
-
 ## Configuration Settings and Usage

 Setting [use_query_cache](settings/settings.md#use-query-cache) can be used to control whether a specific query or all queries of the
--- a/docs/en/operations/settings/index.md
+++ b/docs/en/operations/settings/index.md
@ -1,5 +1,5 @@
 ---
-sidebar_label: Settings Overview
+title: "Settings Overview"
 sidebar_position: 1
 slug: /en/operations/settings/
 pagination_next: en/operations/settings/settings
@ -16,11 +16,34 @@ There are two main groups of ClickHouse settings:
 - Global server settings
 - Query-level settings

-The main distinction between global server settings and query-level settings is that
-global server settings must be set in configuration files while query-level settings
-can be set in configuration files or with SQL queries.
+The main distinction between global server settings and query-level settings is that global server settings must be set in configuration files, while query-level settings can be set in configuration files or with SQL queries.

 Read about [global server settings](/docs/en/operations/server-configuration-parameters/settings.md) to learn more about configuring your ClickHouse server at the global server level.

-Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query-level.
+Read about [query-level settings](/docs/en/operations/settings/settings-query-level.md) to learn more about configuring your ClickHouse server at the query level.

+## See non-default settings
+
+To view which settings have been changed from their default value:
+
+```sql
+SELECT name, value FROM system.settings WHERE changed
+```
+
+If you haven't changed any settings from their default value, then ClickHouse will return nothing.
+
+To check the value of a particular setting, specify the `name` of the setting in your query:
+
+```sql
+SELECT name, value FROM system.settings WHERE name = 'max_threads'
+```
+
+This command should return something like:
+
+```response
+┌─name────────┬─value─────┐
+│ max_threads │ 'auto(8)' │
+└─────────────┴───────────┘
+
+1 row in set. Elapsed: 0.002 sec.
+```
--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -1269,6 +1269,28 @@ Possible values:

 Default value: `0`.

+### output_format_arrow_use_signed_indexes_for_dictionary {#output_format_arrow_use_signed_indexes_for_dictionary}
+
+Use signed integer types instead of unsigned in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during  [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled.
+
+Possible values:
+
+- 0 — Unsigned integer types are used for indexes in `DICTIONARY` type.
+- 1 — Signed integer types are used for indexes in `DICTIONARY` type.
+
+Default value: `1`.
+
+### output_format_arrow_use_64_bit_indexes_for_dictionary {#output_format_arrow_use_64_bit_indexes_for_dictionary}
+
+Use 64-bit integer type in `DICTIONARY` type of the [Arrow](../../interfaces/formats.md/#data-format-arrow) format during  [LowCardinality](../../sql-reference/data-types/lowcardinality.md) output when `output_format_arrow_low_cardinality_as_dictionary` is enabled.
+
+Possible values:
+
+- 0 — Type for indexes in `DICTIONARY` type is determined automatically.
+- 1 — 64-bit integer type is used for indexes in `DICTIONARY` type.
+
+Default value: `0`.
+
 ### output_format_arrow_string_as_string {#output_format_arrow_string_as_string}

 Use Arrow String type instead of Binary for String columns.
@ -1575,7 +1597,13 @@ Result:

 Use ANSI escape sequences to paint colors in Pretty formats.

-Enabled by default.
+possible values:
+
+-   `0` — Disabled. Pretty formats do not use ANSI escape sequences.
+-   `1` — Enabled. Pretty formats will use ANSI escape sequences except for `NoEscapes` formats.
+-   `auto` - Enabled if `stdout` is a terminal except for `NoEscapes` formats.
+
+Default value is `auto`. 

 ### output_format_pretty_grid_charset {#output_format_pretty_grid_charset}

--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@ -88,6 +88,7 @@ ClickHouse-specific aggregate functions:
 - [quantileTDigestWeighted](/docs/en/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md)
 - [quantileBFloat16](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16)
 - [quantileBFloat16Weighted](/docs/en/sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16weighted)
+- [quantileDDSketch](/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch)
 - [simpleLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/simplelinearregression.md)
 - [stochasticLinearRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlinearregression.md)
 - [stochasticLogisticRegression](/docs/en/sql-reference/aggregate-functions/reference/stochasticlogisticregression.md)
--- a/docs/en/sql-reference/aggregate-functions/reference/median.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/median.md
@ -18,6 +18,7 @@ Functions:
 - `medianTDigest` — Alias for [quantileTDigest](../../../sql-reference/aggregate-functions/reference/quantiletdigest.md#quantiletdigest).
 - `medianTDigestWeighted` — Alias for [quantileTDigestWeighted](../../../sql-reference/aggregate-functions/reference/quantiletdigestweighted.md#quantiletdigestweighted).
 - `medianBFloat16` — Alias for [quantileBFloat16](../../../sql-reference/aggregate-functions/reference/quantilebfloat16.md#quantilebfloat16).
+- `medianDDSketch` — Alias for [quantileDDSketch](../../../sql-reference/aggregate-functions/reference/quantileddsketch.md#quantileddsketch).

 **Example**

--- a/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantileddsketch.md
@ -0,0 +1,61 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/quantileddsketch
+sidebar_position: 211
+title: quantileDDSketch
+---
+
+Computes an approximate [quantile](https://en.wikipedia.org/wiki/Quantile) of a sample with relative-error guarantees. It works by building a [DDSketch](https://www.vldb.org/pvldb/vol12/p2195-masson.pdf).
+
+**Syntax**
+
+``` sql
+quantileDDsketch[relative_accuracy, (level)](expr)
+```
+
+**Arguments**
+
+- `expr` — Column with numeric data. [Integer](../../../sql-reference/data-types/int-uint.md), [Float](../../../sql-reference/data-types/float.md).
+
+**Parameters**
+
+- `relative_accuracy` — Relative accuracy of the quantile. Possible values are in the range from 0 to 1. [Float](../../../sql-reference/data-types/float.md). The size of the sketch depends on the range of the data and the relative accuracy. The larger the range and the smaller the relative accuracy, the larger the sketch. The rough memory size of the of the sketch is `log(max_value/min_value)/relative_accuracy`. The recommended value is 0.001 or higher.
+
+- `level` — Level of quantile. Optional. Possible values are in the range from 0 to 1. Default value: 0.5. [Float](../../../sql-reference/data-types/float.md).
+
+**Returned value**
+
+- Approximate quantile of the specified level.
+
+Type: [Float64](../../../sql-reference/data-types/float.md#float32-float64).
+
+**Example**
+
+Input table has an integer and a float columns:
+
+``` text
+┌─a─┬─────b─┐
+│ 1 │ 1.001 │
+│ 2 │ 1.002 │
+│ 3 │ 1.003 │
+│ 4 │ 1.004 │
+└───┴───────┘
+```
+
+Query to calculate 0.75-quantile (third quartile):
+
+``` sql
+SELECT quantileDDSketch(0.01, 0.75)(a), quantileDDSketch(0.01, 0.75)(b) FROM example_table;
+```
+
+Result:
+
+``` text
+┌─quantileDDSketch(0.01, 0.75)(a)─┬─quantileDDSketch(0.01, 0.75)(b)─┐
+│               2.974233423476717 │                            1.01 │
+└─────────────────────────────────┴─────────────────────────────────┘
+```
+
+**See Also**
+
+- [median](../../../sql-reference/aggregate-functions/reference/median.md#median)
+- [quantiles](../../../sql-reference/aggregate-functions/reference/quantiles.md#quantiles)
--- a/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/quantiles.md
@ -9,7 +9,7 @@ sidebar_position: 201

 Syntax: `quantiles(level1, level2, …)(x)`

-All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.
+All the quantile functions also have corresponding quantiles functions: `quantiles`, `quantilesDeterministic`, `quantilesTiming`, `quantilesTimingWeighted`, `quantilesExact`, `quantilesExactWeighted`, `quantileInterpolatedWeighted`, `quantilesTDigest`, `quantilesBFloat16`, `quantilesDDSketch`. These functions calculate all the quantiles of the listed levels in one pass, and return an array of the resulting values.

 ## quantilesExactExclusive

--- a/docs/en/sql-reference/data-types/datetime.md
+++ b/docs/en/sql-reference/data-types/datetime.md
@ -18,6 +18,12 @@ Supported range of values: \[1970-01-01 00:00:00, 2106-02-07 06:28:15\].

 Resolution: 1 second.

+## Speed
+
+The `Date` datatype is faster than `DateTime` under _most_ conditions.
+
+The `Date` type requires 2 bytes of storage, while `DateTime` requires 4. However, when the database compresses the database, this difference is amplified. This amplification is due to the minutes and seconds in `DateTime` being less compressible. Filtering and aggregating `Date` instead of `DateTime` is also faster.
+
 ## Usage Remarks

 The point in time is saved as a [Unix timestamp](https://en.wikipedia.org/wiki/Unix_time), regardless of the time zone or daylight saving time. The time zone affects how the values of the `DateTime` type values are displayed in text format and how the values specified as strings are parsed (‘2020-01-01 05:00:01’).
--- a/docs/en/sql-reference/data-types/nullable.md
+++ b/docs/en/sql-reference/data-types/nullable.md
@ -4,11 +4,11 @@ sidebar_position: 55
 sidebar_label: Nullable
 ---

-# Nullable(typename)
+# Nullable(T)

-Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `TypeName`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.
+Allows to store special marker ([NULL](../../sql-reference/syntax.md)) that denotes “missing value” alongside normal values allowed by `T`. For example, a `Nullable(Int8)` type column can store `Int8` type values, and the rows that do not have a value will store `NULL`.

-For a `TypeName`, you can’t use composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md). Composite data types can contain `Nullable` type values, such as `Array(Nullable(Int8))`.
+`T` can’t be any of the composite data types [Array](../../sql-reference/data-types/array.md), [Map](../../sql-reference/data-types/map.md) and [Tuple](../../sql-reference/data-types/tuple.md) but composite data types can contain `Nullable` type values, e.g. `Array(Nullable(Int8))`.

 A `Nullable` type field can’t be included in table indexes.

--- a/docs/en/sql-reference/functions/array-functions.md
+++ b/docs/en/sql-reference/functions/array-functions.md
@ -657,6 +657,43 @@ SELECT arraySlice([1, 2, NULL, 4, 5], 2, 3) AS res;

 Array elements set to `NULL` are handled as normal values.

+## arrayShingles
+
+Generates an array of "shingles", i.e. consecutive sub-arrays with specified length of the input array.
+
+**Syntax**
+
+``` sql
+arrayShingles(array, length)
+```
+
+**Arguments**
+
+- `array` — Input array [Array](../../sql-reference/data-types/array.md).
+- `length` — The length of each shingle.
+
+**Returned value**
+
+- An array of generated shingles.
+
+Type: [Array](../../sql-reference/data-types/array.md).
+
+**Examples**
+
+Query:
+
+``` sql
+SELECT arrayShingles([1,2,3,4], 3) as res;
+```
+
+Result:
+
+``` text
+┌─res───────────────┐
+│ [[1,2,3],[2,3,4]] │
+└───────────────────┘
+```
+
 ## arraySort(\[func,\] arr, …) {#sort}

 Sorts the elements of the `arr` array in ascending order. If the `func` function is specified, sorting order is determined by the result of the `func` function applied to the elements of the array. If `func` accepts multiple arguments, the `arraySort` function is passed several arrays that the arguments of `func` will correspond to. Detailed examples are shown at the end of `arraySort` description.
--- a/docs/en/sql-reference/statements/create/table.md
+++ b/docs/en/sql-reference/statements/create/table.md
@ -293,6 +293,8 @@ You can't combine both ways in one query.

 Along with columns descriptions constraints could be defined:

+### CONSTRAINT
+
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
@ -307,6 +309,30 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]

 Adding large amount of constraints can negatively affect performance of big `INSERT` queries.

+### ASSUME
+
+The `ASSUME` clause is used to define a `CONSTRAINT` on a table that is assumed to be true. This constraint can then be used by the optimizer to enhance the performance of SQL queries.
+
+Take this example where `ASSUME CONSTRAINT` is used in the creation of the `users_a` table:
+
+```sql
+CREATE TABLE users_a (
+    uid Int16, 
+    name String, 
+    age Int16, 
+    name_len UInt8 MATERIALIZED length(name), 
+    CONSTRAINT c1 ASSUME length(name) = name_len
+) 
+ENGINE=MergeTree 
+ORDER BY (name_len, name);
+```
+
+Here, `ASSUME CONSTRAINT` is used to assert that the `length(name)` function always equals the value of the `name_len` column. This means that whenever `length(name)` is called in a query, ClickHouse can replace it with `name_len`, which should be faster because it avoids calling the `length()` function.
+
+Then, when executing the query `SELECT name FROM users_a WHERE length(name) < 5;`, ClickHouse can optimize it to `SELECT name FROM users_a WHERE name_len < 5`; because of the `ASSUME CONSTRAINT`. This can make the query run faster because it avoids calculating the length of `name` for each row.
+
+`ASSUME CONSTRAINT` **does not enforce the constraint**, it merely informs the optimizer that the constraint holds true. If the constraint is not actually true, the results of the queries may be incorrect. Therefore, you should only use `ASSUME CONSTRAINT` if you are sure that the constraint is true.
+
 ## TTL Expression

 Defines storage time for values. Can be specified only for MergeTree-family tables. For the detailed description, see [TTL for columns and tables](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-ttl).
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -343,13 +343,14 @@ SYSTEM START PULLING REPLICATION LOG [ON CLUSTER cluster_name] [[db.]replicated_
 Wait until a `ReplicatedMergeTree` table will be synced with other replicas in a cluster, but no more than `receive_timeout` seconds.

 ``` sql
-SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
+SYSTEM SYNC REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
 ```

 After running this statement the `[db.]replicated_merge_tree_family_table_name` fetches commands from the common replicated log into its own replication queue, and then the query waits till the replica processes all of the fetched commands. The following modifiers are supported:

 - If a `STRICT` modifier was specified then the query waits for the replication queue to become empty. The `STRICT` version may never succeed if new entries constantly appear in the replication queue.
- - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.
+ - If a `LIGHTWEIGHT` modifier was specified then the query waits only for `GET_PART`, `ATTACH_PART`, `DROP_RANGE`, `REPLACE_RANGE` and `DROP_PART` entries to be processed.  
+   Additionally, the LIGHTWEIGHT modifier supports an optional FROM 'srcReplicas' clause, where 'srcReplicas' is a comma-separated list of source replica names. This extension allows for more targeted synchronization by focusing only on replication tasks originating from the specified source replicas.
 - If a `PULL` modifier was specified then the query pulls new replication queue entries from ZooKeeper, but does not wait for anything to be processed.

 ### SYNC DATABASE REPLICA
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -2796,6 +2796,17 @@ SELECT TOP 3 name, value FROM system.settings;
 3. │ max_block_size          │ 65505   │
   └─────────────────────────┴─────────┘
 ```
+### output_format_pretty_color {#output_format_pretty_color}
+
+Включает/выключает управляющие последовательности ANSI в форматах Pretty. 
+
+Возможные значения:
+
+-   `0` — выключена. Не исползует ANSI последовательности в форматах Pretty.
+-   `1` — включена. Исползует ANSI последовательности с исключением форматов `NoEscapes`.
+-   `auto` - включена если `stdout` является терминалом с исключением форматов `NoEscapes`.
+
+Значение по умолчанию: `auto`

 ## system_events_show_zero_values {#system_events_show_zero_values}

--- a/docs/ru/sql-reference/statements/system.md
+++ b/docs/ru/sql-reference/statements/system.md
@ -280,7 +280,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge
 Ждет когда таблица семейства `ReplicatedMergeTree` будет синхронизирована с другими репликами в кластере, но не более `receive_timeout` секунд:

 ``` sql
-SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
+SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
 ```

 После выполнения этого запроса таблица `[db.]replicated_merge_tree_family_table_name` загружает команды из общего реплицированного лога в свою собственную очередь репликации. Затем запрос ждет, пока реплика не обработает все загруженные команды. Поддерживаются следующие модификаторы:
--- a/docs/zh/sql-reference/statements/system.md
+++ b/docs/zh/sql-reference/statements/system.md
@ -248,7 +248,7 @@ SYSTEM START REPLICATION QUEUES [ON CLUSTER cluster_name] [[db.]replicated_merge


 ``` sql
-SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT | PULL]
+SYSTEM SYNC REPLICA [db.]replicated_merge_tree_family_table_name [STRICT | LIGHTWEIGHT [FROM 'srcReplica1'[, 'srcReplica2'[, ...]]] | PULL]
 ```

 ### RESTART REPLICA {#query_language-system-restart-replica}
--- a/programs/keeper-client/Commands.cpp
+++ b/programs/keeper-client/Commands.cpp
@ -413,13 +413,13 @@ void ReconfigCommand::execute(const DB::ASTKeeperQuery * query, DB::KeeperClient
    switch (operation)
    {
        case static_cast<UInt8>(ReconfigCommand::Operation::ADD):
-            joining = query->args[1].safeGet<DB::String>();
+            joining = query->args[1].safeGet<String>();
            break;
        case static_cast<UInt8>(ReconfigCommand::Operation::REMOVE):
-            leaving = query->args[1].safeGet<DB::String>();
+            leaving = query->args[1].safeGet<String>();
            break;
        case static_cast<UInt8>(ReconfigCommand::Operation::SET):
-            new_members = query->args[1].safeGet<DB::String>();
+            new_members = query->args[1].safeGet<String>();
            break;
        default:
            UNREACHABLE();
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -95,6 +95,7 @@ if (BUILD_STANDALONE_KEEPER)
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/CurrentThread.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollections.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/NamedCollections/NamedCollectionConfiguration.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/Jemalloc.cpp

        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/IKeeper.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Common/ZooKeeper/TestKeeper.cpp
@ -126,15 +127,17 @@ if (BUILD_STANDALONE_KEEPER)
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorage.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageTransaction.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.cpp
-        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/DiskObjectStorageCommon.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIteratorAsync.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageIterator.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/StoredObject.cpp

-        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/registerDiskS3.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3ObjectStorage.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/S3Capabilities.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/diskSettings.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/S3/DiskS3Utils.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/ObjectStorageFactory.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/MetadataStorageFactory.cpp
+        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/ObjectStorages/RegisterDiskObjectStorage.cpp

        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/createReadBufferFromFileBase.cpp
        ${CMAKE_CURRENT_SOURCE_DIR}/../../src/Disks/IO/ReadBufferFromRemoteFSGather.cpp
--- a/programs/library-bridge/LibraryBridgeHandlers.cpp
+++ b/programs/library-bridge/LibraryBridgeHandlers.cpp
@ -2,6 +2,7 @@

 #include "CatBoostLibraryHandler.h"
 #include "CatBoostLibraryHandlerFactory.h"
+#include "Common/ProfileEvents.h"
 #include "ExternalDictionaryLibraryHandler.h"
 #include "ExternalDictionaryLibraryHandlerFactory.h"

@ -44,7 +45,7 @@ namespace
        response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);

        if (!response.sent())
-            *response.send() << message << std::endl;
+            *response.send() << message << '\n';

        LOG_WARNING(&Poco::Logger::get("LibraryBridge"), fmt::runtime(message));
    }
@ -96,7 +97,7 @@ ExternalDictionaryLibraryBridgeRequestHandler::ExternalDictionaryLibraryBridgeRe
 }


-void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void ExternalDictionaryLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    LOG_TRACE(log, "Request URI: {}", request.getURI());
    HTMLForm params(getContext()->getSettingsRef(), request);
@ -384,7 +385,7 @@ ExternalDictionaryLibraryBridgeExistsHandler::ExternalDictionaryLibraryBridgeExi
 }


-void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void ExternalDictionaryLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    try
    {
@ -423,7 +424,7 @@ CatBoostLibraryBridgeRequestHandler::CatBoostLibraryBridgeRequestHandler(
 }


-void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    LOG_TRACE(log, "Request URI: {}", request.getURI());
    HTMLForm params(getContext()->getSettingsRef(), request);
@ -463,6 +464,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
    {
        if (method == "catboost_list")
        {
+            auto & read_buf = request.getStream();
+            params.read(read_buf);
+
            ExternalModelInfos model_infos = CatBoostLibraryHandlerFactory::instance().getModelInfos();

            writeIntBinary(static_cast<UInt64>(model_infos.size()), out);
@ -500,6 +504,9 @@ void CatBoostLibraryBridgeRequestHandler::handleRequest(HTTPServerRequest & requ
        }
        else if (method == "catboost_removeAllModels")
        {
+            auto & read_buf = request.getStream();
+            params.read(read_buf);
+
            CatBoostLibraryHandlerFactory::instance().removeAllModels();

            String res = "1";
@ -621,7 +628,7 @@ CatBoostLibraryBridgeExistsHandler::CatBoostLibraryBridgeExistsHandler(size_t ke
 }


-void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void CatBoostLibraryBridgeExistsHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    try
    {
--- a/programs/library-bridge/LibraryBridgeHandlers.h
+++ b/programs/library-bridge/LibraryBridgeHandlers.h
@ -20,7 +20,7 @@ class ExternalDictionaryLibraryBridgeRequestHandler : public HTTPRequestHandler,
 public:
    ExternalDictionaryLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    static constexpr inline auto FORMAT = "RowBinary";
@ -36,7 +36,7 @@ class ExternalDictionaryLibraryBridgeExistsHandler : public HTTPRequestHandler,
 public:
    ExternalDictionaryLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    const size_t keep_alive_timeout;
@ -65,7 +65,7 @@ class CatBoostLibraryBridgeRequestHandler : public HTTPRequestHandler, WithConte
 public:
    CatBoostLibraryBridgeRequestHandler(size_t keep_alive_timeout_, ContextPtr context_);

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    const size_t keep_alive_timeout;
@ -79,7 +79,7 @@ class CatBoostLibraryBridgeExistsHandler : public HTTPRequestHandler, WithContex
 public:
    CatBoostLibraryBridgeExistsHandler(size_t keep_alive_timeout_, ContextPtr context_);

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    const size_t keep_alive_timeout;
--- a/programs/odbc-bridge/ColumnInfoHandler.cpp
+++ b/programs/odbc-bridge/ColumnInfoHandler.cpp
@ -69,7 +69,7 @@ namespace
 }


-void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
    LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -78,7 +78,7 @@ void ODBCColumnsInfoHandler::handleRequest(HTTPServerRequest & request, HTTPServ
    {
        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
        if (!response.sent())
-            *response.send() << message << std::endl;
+            *response.send() << message << '\n';
        LOG_WARNING(log, fmt::runtime(message));
    };

--- a/programs/odbc-bridge/ColumnInfoHandler.h
+++ b/programs/odbc-bridge/ColumnInfoHandler.h
@ -23,7 +23,7 @@ public:
    {
    }

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    Poco::Logger * log;
--- a/programs/odbc-bridge/IdentifierQuoteHandler.cpp
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.cpp
@ -21,7 +21,7 @@

 namespace DB
 {
-void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
    LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -30,7 +30,7 @@ void IdentifierQuoteHandler::handleRequest(HTTPServerRequest & request, HTTPServ
    {
        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
        if (!response.sent())
-            *response.send() << message << std::endl;
+            response.send()->writeln(message);
        LOG_WARNING(log, fmt::runtime(message));
    };

--- a/programs/odbc-bridge/IdentifierQuoteHandler.h
+++ b/programs/odbc-bridge/IdentifierQuoteHandler.h
@ -21,7 +21,7 @@ public:
    {
    }

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    Poco::Logger * log;
--- a/programs/odbc-bridge/MainHandler.cpp
+++ b/programs/odbc-bridge/MainHandler.cpp
@ -46,12 +46,12 @@ void ODBCHandler::processError(HTTPServerResponse & response, const std::string
 {
    response.setStatusAndReason(HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
    if (!response.sent())
-        *response.send() << message << std::endl;
+        *response.send() << message << '\n';
    LOG_WARNING(log, fmt::runtime(message));
 }


-void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void ODBCHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    HTMLForm params(getContext()->getSettingsRef(), request);
    LOG_TRACE(log, "Request URI: {}", request.getURI());
--- a/programs/odbc-bridge/MainHandler.h
+++ b/programs/odbc-bridge/MainHandler.h
@ -30,7 +30,7 @@ public:
    {
    }

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    Poco::Logger * log;
--- a/programs/odbc-bridge/PingHandler.cpp
+++ b/programs/odbc-bridge/PingHandler.cpp
@ -6,7 +6,7 @@

 namespace DB
 {
-void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response)
+void PingHandler::handleRequest(HTTPServerRequest & /* request */, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    try
    {
--- a/programs/odbc-bridge/PingHandler.h
+++ b/programs/odbc-bridge/PingHandler.h
@ -10,7 +10,7 @@ class PingHandler : public HTTPRequestHandler
 {
 public:
    explicit PingHandler(size_t keep_alive_timeout_) : keep_alive_timeout(keep_alive_timeout_) {}
-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    size_t keep_alive_timeout;
--- a/programs/odbc-bridge/SchemaAllowedHandler.cpp
+++ b/programs/odbc-bridge/SchemaAllowedHandler.cpp
@ -29,7 +29,7 @@ namespace
 }


-void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response)
+void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & /*write_event*/)
 {
    HTMLForm params(getContext()->getSettingsRef(), request, request.getStream());
    LOG_TRACE(log, "Request URI: {}", request.getURI());
@ -38,7 +38,7 @@ void SchemaAllowedHandler::handleRequest(HTTPServerRequest & request, HTTPServer
    {
        response.setStatusAndReason(Poco::Net::HTTPResponse::HTTP_INTERNAL_SERVER_ERROR);
        if (!response.sent())
-            *response.send() << message << std::endl;
+            *response.send() << message << '\n';
        LOG_WARNING(log, fmt::runtime(message));
    };

--- a/programs/odbc-bridge/SchemaAllowedHandler.h
+++ b/programs/odbc-bridge/SchemaAllowedHandler.h
@ -24,7 +24,7 @@ public:
    {
    }

-    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response) override;
+    void handleRequest(HTTPServerRequest & request, HTTPServerResponse & response, const ProfileEvents::Event & write_event) override;

 private:
    Poco::Logger * log;
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -153,6 +153,18 @@ namespace ProfileEvents
 {
    extern const Event MainConfigLoads;
    extern const Event ServerStartupMilliseconds;
+    extern const Event InterfaceNativeSendBytes;
+    extern const Event InterfaceNativeReceiveBytes;
+    extern const Event InterfaceHTTPSendBytes;
+    extern const Event InterfaceHTTPReceiveBytes;
+    extern const Event InterfacePrometheusSendBytes;
+    extern const Event InterfacePrometheusReceiveBytes;
+    extern const Event InterfaceInterserverSendBytes;
+    extern const Event InterfaceInterserverReceiveBytes;
+    extern const Event InterfaceMySQLSendBytes;
+    extern const Event InterfaceMySQLReceiveBytes;
+    extern const Event InterfacePostgreSQLSendBytes;
+    extern const Event InterfacePostgreSQLReceiveBytes;
 }

 namespace fs = std::filesystem;
@ -1455,6 +1467,8 @@ try

                global_context->reloadAuxiliaryZooKeepersConfigIfChanged(config);

+                global_context->reloadQueryMaskingRulesIfChanged(config);
+
                std::lock_guard lock(servers_lock);
                updateServers(*config, server_pool, async_metrics, servers, servers_to_start_before_tables);
            }
@ -2049,7 +2063,7 @@ std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
    auto create_factory = [&](const std::string & type, const std::string & conf_name) -> TCPServerConnectionFactory::Ptr
    {
        if (type == "tcp")
-            return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false));
+            return TCPServerConnectionFactory::Ptr(new TCPHandlerFactory(*this, false, false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes));

        if (type == "tls")
 #if USE_SSL
@ -2061,20 +2075,20 @@ std::unique_ptr<TCPProtocolStackFactory> Server::buildProtocolStackFromConfig(
        if (type == "proxy1")
            return TCPServerConnectionFactory::Ptr(new ProxyV1HandlerFactory(*this, conf_name));
        if (type == "mysql")
-            return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this));
+            return TCPServerConnectionFactory::Ptr(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes));
        if (type == "postgres")
-            return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this));
+            return TCPServerConnectionFactory::Ptr(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes));
        if (type == "http")
            return TCPServerConnectionFactory::Ptr(
-                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"))
+                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes)
            );
        if (type == "prometheus")
            return TCPServerConnectionFactory::Ptr(
-                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"))
+                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes)
            );
        if (type == "interserver")
            return TCPServerConnectionFactory::Ptr(
-                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"))
+                new HTTPServerConnectionFactory(httpContext(), http_params, createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"), ProfileEvents::InterfaceInterserverReceiveBytes, ProfileEvents::InterfaceInterserverSendBytes)
            );

        throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Protocol configuration error, unknown protocol name '{}'", type);
@ -2207,7 +2221,7 @@ void Server::createServers(
                    port_name,
                    "http://" + address.toString(),
                    std::make_unique<HTTPServer>(
-                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
+                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
            });
        }

@ -2227,7 +2241,7 @@ void Server::createServers(
                    port_name,
                    "https://" + address.toString(),
                    std::make_unique<HTTPServer>(
-                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
+                        httpContext(), createHandlerFactory(*this, config, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfaceHTTPReceiveBytes, ProfileEvents::InterfaceHTTPSendBytes));
 #else
                UNUSED(port);
                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "HTTPS protocol is disabled because Poco library was built without NetSSL support.");
@ -2250,7 +2264,7 @@ void Server::createServers(
                    port_name,
                    "native protocol (tcp): " + address.toString(),
                    std::make_unique<TCPServer>(
-                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
+                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
                        server_pool,
                        socket,
                        new Poco::Net::TCPServerParams));
@ -2272,7 +2286,7 @@ void Server::createServers(
                    port_name,
                    "native protocol (tcp) with PROXY: " + address.toString(),
                    std::make_unique<TCPServer>(
-                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
+                        new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
                        server_pool,
                        socket,
                        new Poco::Net::TCPServerParams));
@ -2295,7 +2309,7 @@ void Server::createServers(
                    port_name,
                    "secure native protocol (tcp_secure): " + address.toString(),
                    std::make_unique<TCPServer>(
-                        new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
+                        new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false, ProfileEvents::InterfaceNativeReceiveBytes, ProfileEvents::InterfaceNativeSendBytes),
                        server_pool,
                        socket,
                        new Poco::Net::TCPServerParams));
@ -2319,7 +2333,7 @@ void Server::createServers(
                    listen_host,
                    port_name,
                    "MySQL compatibility protocol: " + address.toString(),
-                    std::make_unique<TCPServer>(new MySQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+                    std::make_unique<TCPServer>(new MySQLHandlerFactory(*this, ProfileEvents::InterfaceMySQLReceiveBytes, ProfileEvents::InterfaceMySQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
            });
        }

@ -2336,7 +2350,7 @@ void Server::createServers(
                    listen_host,
                    port_name,
                    "PostgreSQL compatibility protocol: " + address.toString(),
-                    std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
+                    std::make_unique<TCPServer>(new PostgreSQLHandlerFactory(*this, ProfileEvents::InterfacePostgreSQLReceiveBytes, ProfileEvents::InterfacePostgreSQLSendBytes), server_pool, socket, new Poco::Net::TCPServerParams));
            });
        }

@ -2370,7 +2384,7 @@ void Server::createServers(
                    port_name,
                    "Prometheus: http://" + address.toString(),
                    std::make_unique<HTTPServer>(
-                        httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
+                        httpContext(), createHandlerFactory(*this, config, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params, ProfileEvents::InterfacePrometheusReceiveBytes, ProfileEvents::InterfacePrometheusSendBytes));
            });
        }
    }
@ -2416,7 +2430,9 @@ void Server::createInterserverServers(
                        createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPHandler-factory"),
                        server_pool,
                        socket,
-                        http_params));
+                        http_params,
+                        ProfileEvents::InterfaceInterserverReceiveBytes,
+                        ProfileEvents::InterfaceInterserverSendBytes));
            });
        }

@ -2439,7 +2455,9 @@ void Server::createInterserverServers(
                        createHandlerFactory(*this, config, async_metrics, "InterserverIOHTTPSHandler-factory"),
                        server_pool,
                        socket,
-                        http_params));
+                        http_params,
+                        ProfileEvents::InterfaceInterserverReceiveBytes,
+                        ProfileEvents::InterfaceInterserverSendBytes));
 #else
                UNUSED(port);
                throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL support for TCP protocol is disabled because Poco library was built without NetSSL support.");
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@ -200,6 +200,7 @@ enum class AccessType
    M(SYSTEM_UNFREEZE, "SYSTEM UNFREEZE", GLOBAL, SYSTEM) \
    M(SYSTEM_FAILPOINT, "SYSTEM ENABLE FAILPOINT, SYSTEM DISABLE FAILPOINT", GLOBAL, SYSTEM) \
    M(SYSTEM_LISTEN, "SYSTEM START LISTEN, SYSTEM STOP LISTEN", GLOBAL, SYSTEM) \
+    M(SYSTEM_JEMALLOC, "SYSTEM JEMALLOC PURGE, SYSTEM JEMALLOC ENABLE PROFILE, SYSTEM JEMALLOC DISABLE PROFILE, SYSTEM JEMALLOC FLUSH PROFILE", GLOBAL, SYSTEM) \
    M(SYSTEM, "", GROUP, ALL) /* allows to execute SYSTEM {SHUTDOWN|RELOAD CONFIG|...} */ \
    \
    M(dictGet, "dictHas, dictGetHierarchy, dictIsIn", DICTIONARY, ALL) /* allows to execute functions dictGet(), dictHas(), dictGetHierarchy(), dictIsIn() */\
--- a/src/Access/MultipleAccessStorage.cpp
+++ b/src/Access/MultipleAccessStorage.cpp
@ -179,7 +179,7 @@ ConstStoragePtr MultipleAccessStorage::getStorage(const UUID & id) const
    return const_cast<MultipleAccessStorage *>(this)->getStorage(id);
 }

-StoragePtr MultipleAccessStorage::findStorageByName(const DB::String & storage_name)
+StoragePtr MultipleAccessStorage::findStorageByName(const String & storage_name)
 {
    auto storages = getStoragesInternal();
    for (const auto & storage : *storages)
@ -192,13 +192,13 @@ StoragePtr MultipleAccessStorage::findStorageByName(const DB::String & storage_n
 }


-ConstStoragePtr MultipleAccessStorage::findStorageByName(const DB::String & storage_name) const
+ConstStoragePtr MultipleAccessStorage::findStorageByName(const String & storage_name) const
 {
    return const_cast<MultipleAccessStorage *>(this)->findStorageByName(storage_name);
 }


-StoragePtr MultipleAccessStorage::getStorageByName(const DB::String & storage_name)
+StoragePtr MultipleAccessStorage::getStorageByName(const String & storage_name)
 {
    auto storage = findStorageByName(storage_name);
    if (storage)
@ -208,12 +208,12 @@ StoragePtr MultipleAccessStorage::getStorageByName(const DB::String & storage_na
 }


-ConstStoragePtr MultipleAccessStorage::getStorageByName(const DB::String & storage_name) const
+ConstStoragePtr MultipleAccessStorage::getStorageByName(const String & storage_name) const
 {
    return const_cast<MultipleAccessStorage *>(this)->getStorageByName(storage_name);
 }

-StoragePtr MultipleAccessStorage::findExcludingStorage(AccessEntityType type, const DB::String & name, DB::MultipleAccessStorage::StoragePtr exclude) const
+StoragePtr MultipleAccessStorage::findExcludingStorage(AccessEntityType type, const String & name, DB::MultipleAccessStorage::StoragePtr exclude) const
 {
    auto storages = getStoragesInternal();
    for (const auto & storage : *storages)
--- a/src/AggregateFunctions/AggregateFunctionQuantile.h
+++ b/src/AggregateFunctions/AggregateFunctionQuantile.h
@ -31,7 +31,7 @@ namespace ErrorCodes

 template <typename> class QuantileTiming;
 template <typename> class QuantileGK;
-
+template <typename> class QuantileDDSketch;

 /** Generic aggregate function for calculation of quantiles.
  * It depends on quantile calculation data structure. Look at Quantile*.h for various implementations.
@ -64,6 +64,7 @@ private:
    using ColVecType = ColumnVectorOrDecimal<Value>;

    static constexpr bool returns_float = !(std::is_same_v<FloatReturnType, void>);
+    static constexpr bool is_quantile_ddsketch = std::is_same_v<Data, QuantileDDSketch<Value>>;
    static_assert(!is_decimal<Value> || !returns_float);

    QuantileLevels<Float64> levels;
@ -74,6 +75,9 @@ private:
    /// Used for the approximate version of the algorithm (Greenwald-Khanna)
    ssize_t accuracy = 10000;

+    /// Used for the quantile sketch
+    Float64 relative_accuracy = 0.01;
+
    DataTypePtr & argument_type;

 public:
@ -87,7 +91,36 @@ public:
        if (!returns_many && levels.size() > 1)
            throw Exception(ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires one level parameter or less", getName());

-        if constexpr (has_accuracy_parameter)
+        if constexpr (is_quantile_ddsketch)
+        {
+            if (params.empty())
+                throw Exception(
+                    ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH, "Aggregate function {} requires at least one param", getName());
+
+            const auto & relative_accuracy_field = params[0];
+            if (relative_accuracy_field.getType() != Field::Types::Float64)
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Aggregate function {} requires relative accuracy parameter with Float64 type", getName());
+
+            relative_accuracy = relative_accuracy_field.get<Float64>();
+
+            if (relative_accuracy <= 0 || relative_accuracy >= 1 || isNaN(relative_accuracy))
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Aggregate function {} requires relative accuracy parameter with value between 0 and 1 but is {}",
+                    getName(),
+                    relative_accuracy);
+            // Throw exception if the relative accuracy is too small.
+            // This is to avoid the case where the user specifies a relative accuracy that is too small
+            // and the sketch is not able to allocate enough memory to satisfy the accuracy requirement.
+            if (relative_accuracy < 1e-6)
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Aggregate function {} requires relative accuracy parameter with value greater than 1e-6 but is {}",
+                    getName(),
+                    relative_accuracy);
+        }
+        else if constexpr (has_accuracy_parameter)
        {
            if (params.empty())
                throw Exception(
@ -116,7 +149,9 @@ public:

    void create(AggregateDataPtr __restrict place) const override /// NOLINT
    {
-        if constexpr (has_accuracy_parameter)
+        if constexpr (is_quantile_ddsketch)
+            new (place) Data(relative_accuracy);
+        else if constexpr (has_accuracy_parameter)
            new (place) Data(accuracy);
        else
            new (place) Data;
@ -147,6 +182,10 @@ public:
    {
        /// Return normalized state type: quantiles*(1)(...)
        Array params{1};
+        if constexpr (is_quantile_ddsketch)
+            params = {relative_accuracy, 1};
+        else if constexpr (has_accuracy_parameter)
+            params = {accuracy, 1};
        AggregateFunctionProperties properties;
        return std::make_shared<DataTypeAggregateFunction>(
            AggregateFunctionFactory::instance().get(
@ -295,4 +334,7 @@ struct NameQuantilesBFloat16Weighted { static constexpr auto name = "quantilesBF
 struct NameQuantileGK { static constexpr auto name = "quantileGK"; };
 struct NameQuantilesGK { static constexpr auto name = "quantilesGK"; };

+struct NameQuantileDDSketch { static constexpr auto name = "quantileDDSketch"; };
+struct NameQuantilesDDSketch { static constexpr auto name = "quantilesDDSketch"; };
+
 }
--- a/src/AggregateFunctions/AggregateFunctionQuantileDDSketch.cpp
+++ b/src/AggregateFunctions/AggregateFunctionQuantileDDSketch.cpp
@ -0,0 +1,61 @@
+#include <AggregateFunctions/AggregateFunctionQuantile.h>
+#include <AggregateFunctions/QuantileDDSketch.h>
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/Helpers.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <Core/Field.h>
+
+
+namespace DB
+{
+struct Settings;
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+namespace
+{
+
+template <typename Value, bool float_return> using FuncQuantileDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantileDDSketch, false, std::conditional_t<float_return, Float64, void>, false, true>;
+template <typename Value, bool float_return> using FuncQuantilesDDSketch = AggregateFunctionQuantile<Value, QuantileDDSketch<Value>, NameQuantilesDDSketch, false, std::conditional_t<float_return, Float64, void>, true, true>;
+
+
+template <template <typename, bool> class Function>
+AggregateFunctionPtr createAggregateFunctionQuantile(
+    const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+{
+    /// Second argument type check doesn't depend on the type of the first one.
+    Function<void, true>::assertSecondArg(argument_types);
+
+    const DataTypePtr & argument_type = argument_types[0];
+    WhichDataType which(argument_type);
+
+#define DISPATCH(TYPE) \
+    if (which.idx == TypeIndex::TYPE) return std::make_shared<Function<TYPE, true>>(argument_types, params);
+    FOR_BASIC_NUMERIC_TYPES(DISPATCH)
+#undef DISPATCH
+    if (which.idx == TypeIndex::Date) return std::make_shared<Function<DataTypeDate::FieldType, false>>(argument_types, params);
+    if (which.idx == TypeIndex::DateTime) return std::make_shared<Function<DataTypeDateTime::FieldType, false>>(argument_types, params);
+
+    throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT, "Illegal type {} of argument for aggregate function {}",
+                    argument_type->getName(), name);
+}
+
+}
+
+void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory & factory)
+{
+    /// For aggregate functions returning array we cannot return NULL on empty set.
+    AggregateFunctionProperties properties = { .returns_default_when_only_null = true };
+
+    factory.registerFunction(NameQuantileDDSketch::name, createAggregateFunctionQuantile<FuncQuantileDDSketch>);
+    factory.registerFunction(NameQuantilesDDSketch::name, { createAggregateFunctionQuantile<FuncQuantilesDDSketch>, properties });
+
+    /// 'median' is an alias for 'quantile'
+    factory.registerAlias("medianDDSketch", NameQuantileDDSketch::name);
+}
+
+}
--- a/src/AggregateFunctions/DDSketch.h
+++ b/src/AggregateFunctions/DDSketch.h
@ -0,0 +1,253 @@
+#pragma once
+
+#include <memory> // for std::unique_ptr
+#include <cmath>
+#include <stdexcept>
+#include <limits>
+#include <iostream>
+#include <base/types.h>
+
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+
+#include <AggregateFunctions/DDSketch/Mapping.h>
+#include <AggregateFunctions/DDSketch/Store.h>
+#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int INCORRECT_DATA;
+}
+
+class DDSketchDenseLogarithmic
+{
+public:
+    explicit DDSketchDenseLogarithmic(Float64 relative_accuracy = 0.01)
+        : mapping(std::make_unique<DDSketchLogarithmicMapping>(relative_accuracy)),
+          store(std::make_unique<DDSketchDenseStore>()),
+          negative_store(std::make_unique<DDSketchDenseStore>()),
+          zero_count(0.0),
+          count(0.0)
+    {
+    }
+
+    DDSketchDenseLogarithmic(std::unique_ptr<DDSketchLogarithmicMapping> mapping_,
+             std::unique_ptr<DDSketchDenseStore> store_,
+             std::unique_ptr<DDSketchDenseStore> negative_store_,
+             Float64 zero_count_)
+        : mapping(std::move(mapping_)),
+          store(std::move(store_)),
+          negative_store(std::move(negative_store_)),
+          zero_count(zero_count_),
+          count(store->count + negative_store->count + zero_count_)
+    {
+    }
+
+    void add(Float64 val, Float64 weight = 1.0)
+    {
+        if (weight <= 0.0)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "weight must be a positive Float64");
+        }
+
+        if (val > mapping->getMinPossible())
+        {
+            store->add(mapping->key(val), weight);
+        }
+        else if (val < -mapping->getMinPossible())
+        {
+            negative_store->add(mapping->key(-val), weight);
+        }
+        else
+        {
+            zero_count += weight;
+        }
+
+        count += weight;
+    }
+
+    Float64 get(Float64 quantile) const
+    {
+        if (quantile < 0 || quantile > 1 || count == 0)
+        {
+            return std::numeric_limits<Float64>::quiet_NaN(); // Return NaN if the conditions are not met
+        }
+
+        Float64 rank = quantile * (count - 1);
+        Float64 quantile_value;
+        if (rank < negative_store->count)
+        {
+            Float64 reversed_rank = negative_store->count - rank - 1;
+            int key = negative_store->keyAtRank(reversed_rank, false);
+            quantile_value = -mapping->value(key);
+        }
+        else if (rank < zero_count + negative_store->count)
+        {
+            quantile_value = 0;
+        }
+        else
+        {
+            int key = store->keyAtRank(rank - zero_count - negative_store->count, true);
+            quantile_value = mapping->value(key);
+        }
+        return quantile_value;
+    }
+
+    void copy(const DDSketchDenseLogarithmic& other)
+    {
+        Float64 rel_acc = (other.mapping->getGamma() - 1) / (other.mapping->getGamma() + 1);
+        mapping = std::make_unique<DDSketchLogarithmicMapping>(rel_acc);
+        store = std::make_unique<DDSketchDenseStore>();
+        negative_store = std::make_unique<DDSketchDenseStore>();
+        store->copy(other.store.get());
+        negative_store->copy(other.negative_store.get());
+        zero_count = other.zero_count;
+        count = other.count;
+    }
+
+    void merge(const DDSketchDenseLogarithmic& other)
+    {
+        if (mapping->getGamma() != other.mapping->getGamma())
+        {
+            // modify the one with higher precision to match the one with lower precision
+            if (mapping->getGamma() > other.mapping->getGamma())
+            {
+                DDSketchDenseLogarithmic new_sketch = other.changeMapping(mapping->getGamma());
+                this->merge(new_sketch);
+                return;
+            }
+            else
+            {
+                DDSketchDenseLogarithmic new_sketch = changeMapping(other.mapping->getGamma());
+                copy(new_sketch);
+            }
+        }
+
+        // If the other sketch is empty, do nothing
+        if (other.count == 0)
+        {
+            return;
+        }
+
+        // If this sketch is empty, copy the other sketch
+        if (count == 0)
+        {
+            copy(other);
+            return;
+        }
+
+        count += other.count;
+        zero_count += other.zero_count;
+
+        store->merge(other.store.get());
+        negative_store->merge(other.negative_store.get());
+    }
+
+    void serialize(WriteBuffer& buf) const
+    {
+        // Write the mapping
+        writeBinary(enc.FlagIndexMappingBaseLogarithmic.byte, buf);
+        mapping->serialize(buf);
+
+        // Write the positive and negative stores
+        writeBinary(enc.FlagTypePositiveStore, buf);
+        store->serialize(buf);
+
+        writeBinary(enc.FlagTypeNegativeStore, buf);
+        negative_store->serialize(buf);
+
+        // Write the zero count
+        writeBinary(enc.FlagZeroCountVarFloat.byte, buf);
+        writeBinary(zero_count, buf);
+    }
+
+    void deserialize(ReadBuffer& buf)
+    {
+        // Read the mapping
+        UInt8 flag = 0;
+        readBinary(flag, buf);
+        if (flag != enc.FlagIndexMappingBaseLogarithmic.byte)
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for mapping");
+        }
+        mapping->deserialize(buf);
+
+        // Read the positive and negative stores
+        readBinary(flag, buf);
+        if (flag != enc.FlagTypePositiveStore)
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for positive store");
+        }
+        store->deserialize(buf);
+
+        readBinary(flag, buf);
+        if (flag != enc.FlagTypeNegativeStore)
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for negative store");
+        }
+        negative_store->deserialize(buf);
+
+        // Read the zero count
+        readBinary(flag, buf);
+        if (flag != enc.FlagZeroCountVarFloat.byte)
+        {
+            throw Exception(ErrorCodes::INCORRECT_DATA, "Invalid flag for zero count");
+        }
+        readBinary(zero_count, buf);
+        count = static_cast<Float64>(negative_store->count + zero_count + store->count);
+    }
+
+private:
+    std::unique_ptr<DDSketchLogarithmicMapping> mapping;
+    std::unique_ptr<DDSketchDenseStore> store;
+    std::unique_ptr<DDSketchDenseStore> negative_store;
+    Float64 zero_count;
+    Float64 count;
+    DDSketchEncoding enc;
+
+
+    DDSketchDenseLogarithmic changeMapping(Float64 new_gamma) const
+    {
+        auto new_mapping = std::make_unique<DDSketchLogarithmicMapping>((new_gamma - 1) / (new_gamma + 1));
+
+        auto new_positive_store = std::make_unique<DDSketchDenseStore>();
+        auto new_negative_store = std::make_unique<DDSketchDenseStore>();
+
+        auto remap_store = [this, &new_mapping](DDSketchDenseStore& old_store, std::unique_ptr<DDSketchDenseStore>& target_store)
+        {
+            for (int i = 0; i < old_store.length(); ++i)
+            {
+                int old_index = i + old_store.offset;
+                Float64 old_bin_count = old_store.bins[i];
+
+                Float64 in_lower_bound = this->mapping->lowerBound(old_index);
+                Float64 in_upper_bound = this->mapping->lowerBound(old_index + 1);
+                Float64 in_size = in_upper_bound - in_lower_bound;
+
+                int new_index = new_mapping->key(in_lower_bound);
+                // Distribute counts to new bins
+                for (; new_mapping->lowerBound(new_index) < in_upper_bound; ++new_index)
+                {
+                    Float64 out_lower_bound = new_mapping->lowerBound(new_index);
+                    Float64 out_upper_bound = new_mapping->lowerBound(new_index + 1);
+                    Float64 lower_intersection_bound = std::max(out_lower_bound, in_lower_bound);
+                    Float64 higher_intersection_bound = std::min(out_upper_bound, in_upper_bound);
+                    Float64 intersection_size = higher_intersection_bound - lower_intersection_bound;
+                    Float64 proportion = intersection_size / in_size;
+                    target_store->add(new_index, proportion * old_bin_count);
+                }
+            }
+        };
+
+        remap_store(*store, new_positive_store);
+        remap_store(*negative_store, new_negative_store);
+
+        return DDSketchDenseLogarithmic(std::move(new_mapping), std::move(new_positive_store), std::move(new_negative_store), zero_count);
+    }
+};
+
+}
--- a/src/AggregateFunctions/DDSketch/DDSketchEncoding.h
+++ b/src/AggregateFunctions/DDSketch/DDSketchEncoding.h
@ -0,0 +1,101 @@
+#pragma once
+
+#include <vector>
+#include <stdexcept>
+
+/**
+  * An encoded DDSketch comprises multiple contiguous blocks (sequences of bytes).
+  * Each block is prefixed with a flag that indicates what the block contains and how the data is encoded in the block.
+  * A flag is a single byte, which itself contains two parts:
+  * - the flag type (the 2 least significant bits),
+  * - the subflag (the 6 most significant bits).
+  *
+  * There are four flag types, for:
+  * - sketch features,
+  * - index mapping,
+  * - positive value store,
+  * - negative value store.
+  *
+  * The meaning of the subflag depends on the flag type:
+  * - for the sketch feature flag type, it indicates what feature is encoded,
+  * - for the index mapping flag type, it indicates what mapping is encoded and how,
+  * - for the store flag types, it indicates how bins are encoded.
+  */
+namespace DB
+{
+class DDSketchEncoding
+{
+private:
+    static constexpr UInt8 numBitsForType = 2;
+    static constexpr UInt8 flagTypeMask = (1 << numBitsForType) - 1;
+    static constexpr UInt8 subFlagMask = ~flagTypeMask;
+    static constexpr UInt8 flagTypeSketchFeatures = 0b00;
+
+public:
+    class Flag
+    {
+    public:
+        UInt8 byte;
+        Flag(UInt8 t, UInt8 s) : byte(t | s) { }
+        [[maybe_unused]] UInt8 Type() const { return byte & flagTypeMask; }
+        [[maybe_unused]] UInt8 SubFlag() const { return byte & subFlagMask; }
+    };
+
+    // FLAG TYPES
+    static constexpr UInt8 FlagTypeIndexMapping = 0b10;
+    static constexpr UInt8 FlagTypePositiveStore = 0b01;
+    static constexpr UInt8 FlagTypeNegativeStore = 0b11;
+
+    // SKETCH FEATURES
+
+    // Encoding format:
+    // - [byte] flag
+    // - [varfloat64] count of the zero bin
+    const Flag FlagZeroCountVarFloat = Flag(flagTypeSketchFeatures, 1 << numBitsForType);
+
+    // INDEX MAPPING
+    // Encoding format:
+    // - [byte] flag
+    // - [float64LE] gamma
+    // - [float64LE] index offset
+    const Flag FlagIndexMappingBaseLogarithmic = Flag(FlagTypeIndexMapping, 0 << numBitsForType);
+
+    // BINS
+    // Encoding format:
+    // - [byte] flag
+    // - [uvarint64] number of bins N
+    // - [varint64] index of first bin
+    // - [varfloat64] count of first bin
+    // - [varint64] difference between the index of the second bin and the index
+    // of the first bin
+    // - [varfloat64] count of second bin
+    // - ...
+    // - [varint64] difference between the index of the N-th bin and the index
+    // of the (N-1)-th bin
+    // - [varfloat64] count of N-th bin
+    static constexpr UInt8 BinEncodingIndexDeltasAndCounts = 1 << numBitsForType;
+
+    // Encoding format:
+    // - [byte] flag
+    // - [uvarint64] number of bins N
+    // - [varint64] index of first bin
+    // - [varint64] difference between the index of the second bin and the index
+    // of the first bin
+    // - ...
+    // - [varint64] difference between the index of the N-th bin and the index
+    // of the (N-1)-th bin
+    static constexpr UInt8 BinEncodingIndexDeltas = 2 << numBitsForType;
+
+    // Encoding format:
+    // - [byte] flag
+    // - [uvarint64] number of bins N
+    // - [varint64] index of first bin
+    // - [varint64] difference between two successive indexes
+    // - [varfloat64] count of first bin
+    // - [varfloat64] count of second bin
+    // - ...
+    // - [varfloat64] count of N-th bin
+    static constexpr UInt8 BinEncodingContiguousCounts = 3 << numBitsForType;
+};
+
+}
--- a/src/AggregateFunctions/DDSketch/Mapping.h
+++ b/src/AggregateFunctions/DDSketch/Mapping.h
@ -0,0 +1,110 @@
+#pragma once
+
+#include <base/types.h>
+#include <cmath>
+#include <stdexcept>
+#include <limits>
+
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+class DDSketchLogarithmicMapping
+{
+public:
+    explicit DDSketchLogarithmicMapping(Float64 relative_accuracy_, Float64 offset_ = 0.0)
+        : relative_accuracy(relative_accuracy_), offset(offset_)
+    {
+        if (relative_accuracy <= 0 || relative_accuracy >= 1)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Relative accuracy must be between 0 and 1 but is {}", relative_accuracy);
+        }
+
+        gamma = (1 + relative_accuracy) / (1 - relative_accuracy);
+        multiplier = 1 / std::log(gamma);
+        min_possible = std::numeric_limits<Float64>::min() * gamma;
+        max_possible = std::numeric_limits<Float64>::max() / gamma;
+    }
+
+    ~DDSketchLogarithmicMapping() = default;
+
+    int key(Float64 value) const
+    {
+        if (value < min_possible || value > max_possible)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Value {} is out of range [{}, {}]", value, min_possible, max_possible);
+        }
+        return static_cast<int>(logGamma(value) + offset);
+    }
+
+    Float64 value(int key) const
+    {
+        return lowerBound(key) * (1 + relative_accuracy);
+    }
+
+    Float64 logGamma(Float64 value) const
+    {
+        return std::log(value) * multiplier;
+    }
+
+    Float64 powGamma(Float64 value) const
+    {
+        return std::exp(value / multiplier);
+    }
+
+    Float64 lowerBound(int index) const
+    {
+        return powGamma(static_cast<Float64>(index) - offset);
+    }
+
+    Float64 getGamma() const
+    {
+        return gamma;
+    }
+
+    Float64 getMinPossible() const
+    {
+        return min_possible;
+    }
+
+    [[maybe_unused]] Float64 getMaxPossible() const
+    {
+        return max_possible;
+    }
+
+    void serialize(WriteBuffer& buf) const
+    {
+        writeBinary(gamma, buf);
+        writeBinary(offset, buf);
+    }
+
+    void deserialize(ReadBuffer& buf)
+    {
+        readBinary(gamma, buf);
+        readBinary(offset, buf);
+        if (gamma <= 1.0)
+        {
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid gamma value after deserialization: {}", gamma);
+        }
+        multiplier = 1 / std::log(gamma);
+        min_possible = std::numeric_limits<Float64>::min() * gamma;
+        max_possible = std::numeric_limits<Float64>::max() / gamma;
+    }
+
+protected:
+    Float64 relative_accuracy;
+    Float64 gamma;
+    Float64 min_possible;
+    Float64 max_possible;
+    Float64 multiplier;
+    Float64 offset;
+};
+
+}
--- a/src/AggregateFunctions/DDSketch/Store.h
+++ b/src/AggregateFunctions/DDSketch/Store.h
@ -0,0 +1,260 @@
+#pragma once
+
+#include <base/types.h>
+#include <vector>
+#include <cmath>
+#include <limits>
+
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+#include <AggregateFunctions/DDSketch/DDSketchEncoding.h>
+
+
+// We start with 128 bins and grow the number of bins by 128
+// each time we need to extend the range of the bins.
+// This is done to avoid reallocating the bins vector too often.
+constexpr UInt32 CHUNK_SIZE = 128;
+
+namespace DB
+{
+
+class DDSketchDenseStore
+{
+public:
+    Float64 count = 0;
+    int min_key = std::numeric_limits<int>::max();
+    int max_key = std::numeric_limits<int>::min();
+    int offset = 0;
+    std::vector<Float64> bins;
+
+    explicit DDSketchDenseStore(UInt32 chunk_size_ = CHUNK_SIZE) : chunk_size(chunk_size_) {}
+
+    void copy(DDSketchDenseStore* other)
+    {
+        bins = other->bins;
+        count = other->count;
+        min_key = other->min_key;
+        max_key = other->max_key;
+        offset = other->offset;
+    }
+
+    int length()
+    {
+        return static_cast<int>(bins.size());
+    }
+
+    void add(int key, Float64 weight)
+    {
+        int idx = getIndex(key);
+        bins[idx] += weight;
+        count += weight;
+    }
+
+    int keyAtRank(Float64 rank, bool lower)
+    {
+        Float64 running_ct = 0.0;
+        for (size_t i = 0; i < bins.size(); ++i)
+        {
+            running_ct += bins[i];
+            if ((lower && running_ct > rank) || (!lower && running_ct >= rank + 1))
+            {
+                return static_cast<int>(i) + offset;
+            }
+        }
+        return max_key;
+    }
+
+    void merge(DDSketchDenseStore* other)
+    {
+        if (other->count == 0) return;
+
+        if (count == 0)
+        {
+            copy(other);
+            return;
+        }
+
+        if (other->min_key < min_key || other->max_key > max_key)
+        {
+            extendRange(other->min_key, other->max_key);
+        }
+
+        for (int key = other->min_key; key <= other->max_key; ++key)
+        {
+            bins[key - offset] += other->bins[key - other->offset];
+        }
+
+        count += other->count;
+    }
+
+    void serialize(WriteBuffer& buf) const
+    {
+
+        // Calculate the size of the dense and sparse encodings to choose the smallest one
+        UInt64 num_bins = 0, num_non_empty_bins = 0;
+        if (count != 0)
+        {
+            num_bins = max_key - min_key + 1;
+        }
+
+        size_t sparse_encoding_overhead = 0;
+        for (int index = min_key; index <= max_key; ++index)
+        {
+            if (bins[index - offset] != 0)
+            {
+                num_non_empty_bins++;
+                sparse_encoding_overhead += 2; // 2 bytes for index delta
+            }
+        }
+
+        size_t dense_encoding_overhead = (num_bins - num_non_empty_bins) * estimatedFloatSize(0.0);
+
+        // Choose the smallest encoding and write to buffer
+        if (dense_encoding_overhead <= sparse_encoding_overhead)
+        {
+            // Write the dense encoding
+            writeBinary(enc.BinEncodingContiguousCounts, buf); // Flag for dense encoding
+            writeVarUInt(num_bins, buf);
+            writeVarInt(min_key, buf);
+            writeVarInt(1, buf); // indexDelta in dense encoding
+            for (int index = min_key; index <= max_key; ++index)
+            {
+                writeFloatBinary(bins[index - offset], buf);
+            }
+        }
+        else
+        {
+            // Write the sparse encoding
+            writeBinary(enc.BinEncodingIndexDeltasAndCounts, buf); // Flag for sparse encoding
+            writeVarUInt(num_non_empty_bins, buf);
+            int previous_index = 0;
+            for (int index = min_key; index <= max_key; ++index)
+            {
+                Float64 bin_count = bins[index - offset];
+                if (bin_count != 0)
+                {
+                    writeVarInt(index - previous_index, buf);
+                    writeFloatBinary(bin_count, buf);
+                    previous_index = index;
+                }
+            }
+        }
+    }
+
+    void deserialize(ReadBuffer& buf)
+    {
+        UInt8 encoding_mode;
+        readBinary(encoding_mode, buf);
+        if (encoding_mode == enc.BinEncodingContiguousCounts)
+        {
+            UInt64 num_bins;
+            readVarUInt(num_bins, buf);
+            int start_key;
+            readVarInt(start_key, buf);
+            int index_delta;
+            readVarInt(index_delta, buf);
+
+            for (UInt64 i = 0; i < num_bins; ++i)
+            {
+                Float64 bin_count;
+                readFloatBinary(bin_count, buf);
+                add(start_key, bin_count);
+                start_key += index_delta;
+            }
+        }
+        else
+        {
+            UInt64 num_non_empty_bins;
+            readVarUInt(num_non_empty_bins, buf);
+            int previous_index = 0;
+            for (UInt64 i = 0; i < num_non_empty_bins; ++i)
+            {
+                int index_delta;
+                readVarInt(index_delta, buf);
+                Float64 bin_count;
+                readFloatBinary(bin_count, buf);
+                previous_index += index_delta;
+                add(previous_index, bin_count);
+            }
+        }
+    }
+
+private:
+    UInt32 chunk_size;
+    DDSketchEncoding enc;
+
+    int getIndex(int key)
+    {
+        if (key < min_key || key > max_key)
+        {
+            extendRange(key, key);
+        }
+        return key - offset;
+    }
+
+    UInt32 getNewLength(int new_min_key, int new_max_key) const
+    {
+        int desired_length = new_max_key - new_min_key + 1;
+        return static_cast<UInt32>(chunk_size * std::ceil(static_cast<Float64>(desired_length) / chunk_size)); // Fixed float conversion
+    }
+
+    void extendRange(int key, int second_key)
+    {
+        int new_min_key = std::min({key, min_key});
+        int new_max_key = std::max({second_key, max_key});
+
+        if (length() == 0)
+        {
+            bins = std::vector<Float64>(getNewLength(new_min_key, new_max_key), 0.0);
+            offset = new_min_key;
+            adjust(new_min_key, new_max_key);
+        }
+        else if (new_min_key >= offset && new_max_key < offset + length())
+        {
+            min_key = new_min_key;
+            max_key = new_max_key;
+        }
+        else
+        {
+            UInt32 new_length = getNewLength(new_min_key, new_max_key);
+            if (new_length > bins.size())
+            {
+                bins.resize(new_length);
+                bins.resize(bins.capacity());
+            }
+            adjust(new_min_key, new_max_key);
+        }
+    }
+
+    void adjust(int new_min_key, int new_max_key)
+    {
+        centerBins(new_min_key, new_max_key);
+        min_key = new_min_key;
+        max_key = new_max_key;
+    }
+
+    void shiftBins(int shift)
+    {
+        int new_offset = offset - shift;
+        if (new_offset > offset)
+            std::rotate(bins.begin(), bins.begin() + (new_offset - offset) % bins.size(), bins.end());
+        else
+            std::rotate(bins.begin(), bins.end() - (offset - new_offset) % bins.size(), bins.end());
+        offset = new_offset;
+    }
+
+    void centerBins(int new_min_key, int new_max_key)
+    {
+        int margins = length() - (new_max_key - new_min_key + 1);
+        int new_offset = new_min_key - margins / 2;
+        shiftBins(offset - new_offset);
+    }
+
+    size_t estimatedFloatSize(Float64 value) const
+    {
+        // Assuming IEEE 754 double-precision binary floating-point format: binary64
+        return sizeof(value);
+    }
+};
+
+}
--- a/src/AggregateFunctions/QuantileDDSketch.h
+++ b/src/AggregateFunctions/QuantileDDSketch.h
@ -0,0 +1,108 @@
+#pragma once
+
+#include <base/types.h>
+#include <base/sort.h>
+#include <AggregateFunctions/DDSketch.h>
+
+#include <IO/ReadBuffer.h>
+#include <IO/WriteBuffer.h>
+
+
+namespace DB
+{
+
+/**
+ * A DDSketch is a fully-mergeable quantile sketch with relative-error guarantees. That is, for any value x,
+ * the value returned by the sketch is guaranteed to be in the (1 +- epsilon) * x range. The sketch is
+ * parameterized by a relative accuracy epsilon, which is the maximum relative error of any quantile estimate.
+ *
+ * The sketch is implemented as a set of logarithmically-spaced bins. Each bin is a pair of a value and a count.
+ *
+ * The sketch is fully mergeable, meaning that the merge of two sketches is equivalent to the sketch of the
+ * union of the input datasets. The memory size of the sketch depends on the range that is covered by
+ * the input values: the larger that range, the more bins are needed to keep track of the input values.
+ * As a rough estimate, if working on durations using DDSketches.unboundedDense(0.02) (relative accuracy of 2%),
+ * about 2kB (275 bins) are needed to cover values between 1 millisecond and 1 minute, and about 6kB (802 bins)
+ * to cover values between 1 nanosecond and 1 day.
+ *
+ * This implementation maintains the binary compatibility with the DDSketch ProtoBuf format
+ * https://github.com/DataDog/sketches-java/blob/master/src/protobuf/proto/DDSketch.proto.
+ * Which enables sending the pre-aggregated sketches to the ClickHouse server and calculating the quantiles
+ * during the query time. See DDSketchEncoding.h for byte-level details.
+ *
+*/
+
+template <typename Value>
+class QuantileDDSketch
+{
+public:
+    using Weight = UInt64;
+
+    QuantileDDSketch() = default;
+
+    explicit QuantileDDSketch(Float64 relative_accuracy) : data(relative_accuracy) { }
+
+    void add(const Value & x)
+    {
+        add(x, 1);
+    }
+
+    void add(const Value & x, Weight w)
+    {
+        if (!isNaN(x))
+            data.add(x, w);
+    }
+
+    void merge(const QuantileDDSketch &other)
+    {
+        data.merge(other.data);
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        data.serialize(buf);
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        data.deserialize(buf);
+    }
+
+    Value get(Float64 level) const
+    {
+        return getImpl<Value>(level);
+    }
+
+    void getMany(const Float64 * levels, const size_t * indices, size_t size, Value * result) const
+    {
+        getManyImpl(levels, indices, size, result);
+    }
+
+    Float64 getFloat(Float64 level) const
+    {
+        return getImpl<Float64>(level);
+    }
+
+    void getManyFloat(const Float64 * levels, const size_t * indices, size_t size, Float64 * result) const
+    {
+        getManyImpl(levels, indices, size, result);
+    }
+
+private:
+    DDSketchDenseLogarithmic data;
+
+    template <typename T>
+    T getImpl(Float64 level) const
+    {
+        return static_cast<T>(data.get(level));
+    }
+
+    template <typename T>
+    void getManyImpl(const Float64 * levels, const size_t *, size_t num_levels, T * result) const
+    {
+        for (size_t i = 0; i < num_levels; ++i)
+            result[i] = getImpl<T>(levels[i]);
+    }
+};
+
+}
--- a/src/AggregateFunctions/registerAggregateFunctions.cpp
+++ b/src/AggregateFunctions/registerAggregateFunctions.cpp
@ -31,6 +31,7 @@ void registerAggregateFunctionsQuantileTimingWeighted(AggregateFunctionFactory &
 void registerAggregateFunctionsQuantileTDigest(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileTDigestWeighted(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileBFloat16(AggregateFunctionFactory &);
+void registerAggregateFunctionsQuantileDDSketch(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileBFloat16Weighted(AggregateFunctionFactory &);
 void registerAggregateFunctionsQuantileApprox(AggregateFunctionFactory &);
 void registerAggregateFunctionsSequenceMatch(AggregateFunctionFactory &);
@ -127,6 +128,7 @@ void registerAggregateFunctions()
        registerAggregateFunctionsQuantileTDigest(factory);
        registerAggregateFunctionsQuantileTDigestWeighted(factory);
        registerAggregateFunctionsQuantileBFloat16(factory);
+        registerAggregateFunctionsQuantileDDSketch(factory);
        registerAggregateFunctionsQuantileBFloat16Weighted(factory);
        registerAggregateFunctionsQuantileApprox(factory);
        registerAggregateFunctionsSequenceMatch(factory);
--- a/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
+++ b/src/Analyzer/Passes/AutoFinalOnQueryPass.cpp
@ -1,4 +1,4 @@
-#include "AutoFinalOnQueryPass.h"
+#include <Analyzer/Passes/AutoFinalOnQueryPass.h>

 #include <Storages/IStorage.h>

--- a/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
+++ b/src/Analyzer/Passes/ConvertQueryToCNFPass.cpp
@ -8,14 +8,12 @@
 #include <Analyzer/ConstantNode.h>
 #include <Analyzer/Passes/CNF.h>
 #include <Analyzer/Utils.h>
+#include <Analyzer/HashUtils.h>

 #include <Storages/IStorage.h>

 #include <Functions/FunctionFactory.h>
-#include "Analyzer/HashUtils.h"
-#include "Analyzer/IQueryTreeNode.h"
-#include "Interpreters/ComparisonGraph.h"
-#include "base/types.h"
+#include <Interpreters/ComparisonGraph.h>

 namespace DB
 {
--- a/src/Analyzer/Passes/CountDistinctPass.cpp
+++ b/src/Analyzer/Passes/CountDistinctPass.cpp
@ -61,6 +61,8 @@ public:
            return;

        auto & count_distinct_argument_column = count_distinct_arguments_nodes[0];
+        if (count_distinct_argument_column->getNodeType() != QueryTreeNodeType::COLUMN)
+            return;
        auto & count_distinct_argument_column_typed = count_distinct_argument_column->as<ColumnNode &>();

        /// Build subquery SELECT count_distinct_argument_column FROM table_expression GROUP BY count_distinct_argument_column
--- a/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
+++ b/src/Analyzer/Passes/FunctionToSubcolumnsPass.cpp
@ -49,6 +49,9 @@ public:
        if (!first_argument_column_node)
            return;

+        if (first_argument_column_node->getColumnName() == "__grouping_set")
+            return;
+
        auto column_source = first_argument_column_node->getColumnSource();
        auto * table_node = column_source->as<TableNode>();

--- a/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
+++ b/src/Analyzer/Passes/GroupingFunctionsResolvePass.cpp
@ -227,19 +227,20 @@ void resolveGroupingFunctions(QueryTreeNodePtr & query_node, ContextPtr context)
    visitor.visit(query_node);
 }

-class GroupingFunctionsResolveVisitor : public InDepthQueryTreeVisitor<GroupingFunctionsResolveVisitor>
+class GroupingFunctionsResolveVisitor : public InDepthQueryTreeVisitorWithContext<GroupingFunctionsResolveVisitor>
 {
+    using Base = InDepthQueryTreeVisitorWithContext<GroupingFunctionsResolveVisitor>;
 public:
    explicit GroupingFunctionsResolveVisitor(ContextPtr context_)
-        : context(std::move(context_))
+        : Base(std::move(context_))
    {}

-    void visitImpl(QueryTreeNodePtr & node)
+    void enterImpl(QueryTreeNodePtr & node)
    {
        if (node->getNodeType() != QueryTreeNodeType::QUERY)
            return;

-        resolveGroupingFunctions(node, context);
+        resolveGroupingFunctions(node, getContext());
    }

 private:
--- a/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp
+++ b/src/Analyzer/Passes/OptimizeDateOrDateTimeConverterWithPreimagePass.cpp
@ -91,6 +91,9 @@ public:
        const auto * column_id = func_node->getArguments().getNodes()[0]->as<ColumnNode>();
        if (!column_id) return;

+        if (column_id->getColumnName() == "__grouping_set")
+            return;
+
        const auto * column_type = column_id->getColumnType().get();
        if (!isDateOrDate32(column_type) && !isDateTime(column_type) && !isDateTime64(column_type)) return;

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -121,6 +121,7 @@ namespace ErrorCodes
    extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
    extern const int SYNTAX_ERROR;
    extern const int UNEXPECTED_EXPRESSION;
+    extern const int INVALID_IDENTIFIER;
 }

 /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -2423,7 +2424,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
 {
    size_t parts_size = table_identifier.getPartsSize();
    if (parts_size < 1 || parts_size > 2)
-        throw Exception(ErrorCodes::BAD_ARGUMENTS,
+        throw Exception(ErrorCodes::INVALID_IDENTIFIER,
            "Expected table identifier to contain 1 or 2 parts. Actual '{}'",
            table_identifier.getFullName());

@ -2820,7 +2821,7 @@ bool QueryAnalyzer::tryBindIdentifierToTableExpression(const IdentifierLookup &
    {
        size_t parts_size = identifier_lookup.identifier.getPartsSize();
        if (parts_size != 1 && parts_size != 2)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            throw Exception(ErrorCodes::INVALID_IDENTIFIER,
                "Expected identifier '{}' to contain 1 or 2 parts to be resolved as table expression. In scope {}",
                identifier_lookup.identifier.getFullName(),
                table_expression_node->formatASTForErrorMessage());
@ -3048,7 +3049,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
    {
        size_t parts_size = identifier_lookup.identifier.getPartsSize();
        if (parts_size != 1 && parts_size != 2)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+            throw Exception(ErrorCodes::INVALID_IDENTIFIER,
                "Expected identifier '{}' to contain 1 or 2 parts to be resolved as table expression. In scope {}",
                identifier_lookup.identifier.getFullName(),
                table_expression_node->formatASTForErrorMessage());
@ -3139,6 +3140,64 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
        }
    }

+    auto check_nested_column_not_in_using = [&join_using_column_name_to_column_node, &identifier_lookup](const QueryTreeNodePtr & node)
+    {
+        /** tldr: When an identifier is resolved into the function `nested` or `getSubcolumn`, and
+          * some column in its argument is in the USING list and its type has to be updated, we throw an error to avoid overcomplication.
+          *
+          * Identifiers can be resolved into functions in case of nested or subcolumns.
+          * For example `t.t.t` can be resolved into `getSubcolumn(t, 't.t')` function in case of `t` is `Tuple`.
+          * So, `t` in USING list is resolved from JOIN itself and has supertype of columns from left and right table.
+          * But `t` in `getSubcolumn` argument is still resolved from table and we need to update its type.
+          *
+          * Example:
+          *
+          * SELECT t.t FROM (
+          *     SELECT ((1, 's'), 's') :: Tuple(t Tuple(t UInt32, s1 String), s1 String) as t
+          * ) AS a FULL JOIN (
+          *     SELECT ((1, 's'), 's') :: Tuple(t Tuple(t Int32, s2 String), s2 String) as t
+          * ) AS b USING t;
+          *
+          * Result type of `t` is `Tuple(Tuple(Int64, String), String)` (different type and no names for subcolumns),
+          * so it may be tricky to have a correct type for `t.t` that is resolved into getSubcolumn(t, 't').
+          *
+          * It can be more complicated in case of Nested subcolumns, in that case in query:
+          *     SELECT t FROM ... JOIN ... USING (t.t)
+          * Here, `t` is resolved into function `nested(['t', 's'], t.t, t.s) so, `t.t` should be from JOIN and `t.s` should be from table.
+          *
+          * Updating type accordingly is pretty complicated, so just forbid such cases.
+          *
+          * While it still may work for storages that support selecting subcolumns directly without `getSubcolumn` function:
+          *     SELECT t, t.t, toTypeName(t), toTypeName(t.t) FROM t1 AS a FULL JOIN t2 AS b USING t.t;
+          * We just support it as a best-effort: `t` will have original type from table, but `t.t` will have super-type from JOIN.
+          * Probably it's good to prohibit such cases as well, but it's not clear how to check it in general case.
+          */
+        if (node->getNodeType() != QueryTreeNodeType::FUNCTION)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected node type {}, expected function node", node->getNodeType());
+
+        const auto & function_argument_nodes = node->as<FunctionNode &>().getArguments().getNodes();
+        for (const auto & argument_node : function_argument_nodes)
+        {
+            if (argument_node->getNodeType() == QueryTreeNodeType::COLUMN)
+            {
+                const auto & column_name = argument_node->as<ColumnNode &>().getColumnName();
+                if (join_using_column_name_to_column_node.contains(column_name))
+                    throw Exception(ErrorCodes::AMBIGUOUS_IDENTIFIER,
+                        "Cannot select subcolumn for identifier '{}' while joining using column '{}'",
+                            identifier_lookup.identifier, column_name);
+            }
+            else if (argument_node->getNodeType() == QueryTreeNodeType::CONSTANT)
+            {
+                continue;
+            }
+            else
+            {
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected node type {} for argument node in {}",
+                    argument_node->getNodeType(), node->formatASTForErrorMessage());
+            }
+        }
+    };
+
    std::optional<JoinTableSide> resolved_side;
    QueryTreeNodePtr resolved_identifier;

@ -3172,12 +3231,23 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo

    if (left_resolved_identifier && right_resolved_identifier)
    {
-        auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
-        auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
+        auto using_column_node_it = join_using_column_name_to_column_node.end();
+        if (left_resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN && right_resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN)
+        {
+            auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
+            auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
+            if (left_resolved_column.getColumnName() == right_resolved_column.getColumnName())
+                using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
+        }
+        else
+        {
+            if (left_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
+                check_nested_column_not_in_using(left_resolved_identifier);
+            if (right_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
+                check_nested_column_not_in_using(right_resolved_identifier);
+        }

-        auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
-        if (using_column_node_it != join_using_column_name_to_column_node.end()
-            && left_resolved_column.getColumnName() == right_resolved_column.getColumnName())
+        if (using_column_node_it != join_using_column_name_to_column_node.end())
        {
            JoinTableSide using_column_inner_column_table_side = isRight(join_kind) ? JoinTableSide::Right : JoinTableSide::Left;
            auto & using_column_node = using_column_node_it->second->as<ColumnNode &>();
@ -3252,39 +3322,45 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo
    else if (left_resolved_identifier)
    {
        resolved_side = JoinTableSide::Left;
-        auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
-
        resolved_identifier = left_resolved_identifier;

-        auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
-        if (using_column_node_it != join_using_column_name_to_column_node.end() &&
-            !using_column_node_it->second->getColumnType()->equals(*left_resolved_column.getColumnType()))
+        if (left_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
        {
-            auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
-            left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
-            resolved_identifier = std::move(left_resolved_column_clone);
+            check_nested_column_not_in_using(left_resolved_identifier);
        }
        else
        {
-            resolved_identifier = left_resolved_identifier;
+            auto & left_resolved_column = left_resolved_identifier->as<ColumnNode &>();
+            auto using_column_node_it = join_using_column_name_to_column_node.find(left_resolved_column.getColumnName());
+            if (using_column_node_it != join_using_column_name_to_column_node.end() &&
+                !using_column_node_it->second->getColumnType()->equals(*left_resolved_column.getColumnType()))
+            {
+                auto left_resolved_column_clone = std::static_pointer_cast<ColumnNode>(left_resolved_column.clone());
+                left_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
+                resolved_identifier = std::move(left_resolved_column_clone);
+            }
        }
    }
    else if (right_resolved_identifier)
    {
        resolved_side = JoinTableSide::Right;
-        auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
+        resolved_identifier = right_resolved_identifier;

-        auto using_column_node_it = join_using_column_name_to_column_node.find(right_resolved_column.getColumnName());
-        if (using_column_node_it != join_using_column_name_to_column_node.end() &&
-            !using_column_node_it->second->getColumnType()->equals(*right_resolved_column.getColumnType()))
+        if (right_resolved_identifier->getNodeType() != QueryTreeNodeType::COLUMN)
        {
-            auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
-            right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
-            resolved_identifier = std::move(right_resolved_column_clone);
+            check_nested_column_not_in_using(right_resolved_identifier);
        }
        else
        {
-            resolved_identifier = right_resolved_identifier;
+            auto & right_resolved_column = right_resolved_identifier->as<ColumnNode &>();
+            auto using_column_node_it = join_using_column_name_to_column_node.find(right_resolved_column.getColumnName());
+            if (using_column_node_it != join_using_column_name_to_column_node.end() &&
+                !using_column_node_it->second->getColumnType()->equals(*right_resolved_column.getColumnType()))
+            {
+                auto right_resolved_column_clone = std::static_pointer_cast<ColumnNode>(right_resolved_column.clone());
+                right_resolved_column_clone->setColumnType(using_column_node_it->second->getColumnType());
+                resolved_identifier = std::move(right_resolved_column_clone);
+            }
        }
    }

@ -4768,7 +4844,7 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
        {
            size_t parts_size = identifier.getPartsSize();
            if (parts_size < 1 || parts_size > 2)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                throw Exception(ErrorCodes::INVALID_IDENTIFIER,
                    "Expected {} function first argument identifier to contain 1 or 2 parts. Actual '{}'. In scope {}",
                    function_name,
                    identifier.getFullName(),
--- a/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
+++ b/src/Analyzer/Passes/RemoveUnusedProjectionColumnsPass.cpp
@ -52,6 +52,9 @@ public:
            return;

        auto & column_node = node->as<ColumnNode &>();
+        if (column_node.getColumnName() == "__grouping_set")
+            return;
+
        auto column_source_node = column_node.getColumnSource();
        auto column_source_node_type = column_source_node->getNodeType();

--- a/src/Analyzer/Passes/UniqToCountPass.cpp
+++ b/src/Analyzer/Passes/UniqToCountPass.cpp
@ -1,4 +1,4 @@
-#include "UniqToCountPass.h"
+#include <Analyzer/Passes/UniqToCountPass.h>

 #include <AggregateFunctions/AggregateFunctionFactory.h>
 #include <AggregateFunctions/IAggregateFunction.h>
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@ -190,6 +190,12 @@ void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node)
    }
 }

+void QueryTreePassManager::runOnlyResolve(QueryTreeNodePtr query_tree_node)
+{
+    // Run only QueryAnalysisPass and GroupingFunctionsResolvePass passes.
+    run(query_tree_node, 2);
+}
+
 void QueryTreePassManager::run(QueryTreeNodePtr query_tree_node, size_t up_to_pass_index)
 {
    size_t passes_size = passes.size();
@ -243,6 +249,8 @@ void QueryTreePassManager::dump(WriteBuffer & buffer, size_t up_to_pass_index)
 void addQueryTreePasses(QueryTreePassManager & manager)
 {
    manager.addPass(std::make_unique<QueryAnalysisPass>());
+    manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
+
    manager.addPass(std::make_unique<RemoveUnusedProjectionColumnsPass>());
    manager.addPass(std::make_unique<FunctionToSubcolumnsPass>());

@ -278,7 +286,6 @@ void addQueryTreePasses(QueryTreePassManager & manager)

    manager.addPass(std::make_unique<LogicalExpressionOptimizerPass>());

-    manager.addPass(std::make_unique<GroupingFunctionsResolvePass>());
    manager.addPass(std::make_unique<AutoFinalOnQueryPass>());
    manager.addPass(std::make_unique<CrossToInnerJoinPass>());
    manager.addPass(std::make_unique<ShardNumColumnToFunctionPass>());
--- a/src/Analyzer/QueryTreePassManager.h
+++ b/src/Analyzer/QueryTreePassManager.h
@ -27,6 +27,9 @@ public:
    /// Run query tree passes on query tree
    void run(QueryTreeNodePtr query_tree_node);

+    /// Run only query tree passes responsible to name resolution.
+    void runOnlyResolve(QueryTreeNodePtr query_tree_node);
+
    /** Run query tree passes on query tree up to up_to_pass_index.
      * Throws exception if up_to_pass_index is greater than passes size.
      */
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -118,18 +118,18 @@ ConnectionPoolWithFailover::Status ConnectionPoolWithFailover::getStatus() const
    return result;
 }

-std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(const ConnectionTimeouts & timeouts,
-                                                                        const Settings & settings,
-                                                                        PoolMode pool_mode,
-                                                                        AsyncCallback async_callback,
-                                                                        std::optional<bool> skip_unavailable_endpoints)
+std::vector<IConnectionPool::Entry> ConnectionPoolWithFailover::getMany(
+    const ConnectionTimeouts & timeouts,
+    const Settings & settings,
+    PoolMode pool_mode,
+    AsyncCallback async_callback,
+    std::optional<bool> skip_unavailable_endpoints,
+    GetPriorityForLoadBalancing::Func priority_func)
 {
    TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
-    {
-        return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback);
-    };
+    { return tryGetEntry(pool, timeouts, fail_message, settings, nullptr, async_callback); };

-    std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
+    std::vector<TryResult> results = getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);

    std::vector<Entry> entries;
    entries.reserve(results.size());
@ -153,17 +153,17 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g

 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyChecked(
    const ConnectionTimeouts & timeouts,
-    const Settings & settings, PoolMode pool_mode,
+    const Settings & settings,
+    PoolMode pool_mode,
    const QualifiedTableName & table_to_check,
    AsyncCallback async_callback,
-    std::optional<bool> skip_unavailable_endpoints)
+    std::optional<bool> skip_unavailable_endpoints,
+    GetPriorityForLoadBalancing::Func priority_func)
 {
    TryGetEntryFunc try_get_entry = [&](NestedPool & pool, std::string & fail_message)
-    {
-        return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback);
-    };
+    { return tryGetEntry(pool, timeouts, fail_message, settings, &table_to_check, async_callback); };

-    return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints);
+    return getManyImpl(settings, pool_mode, try_get_entry, skip_unavailable_endpoints, priority_func);
 }

 ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings & settings)
@ -175,14 +175,16 @@ ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::ma
 }

 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
-        const Settings & settings,
-        PoolMode pool_mode,
-        const TryGetEntryFunc & try_get_entry,
-        std::optional<bool> skip_unavailable_endpoints)
+    const Settings & settings,
+    PoolMode pool_mode,
+    const TryGetEntryFunc & try_get_entry,
+    std::optional<bool> skip_unavailable_endpoints,
+    GetPriorityForLoadBalancing::Func priority_func)
 {
    if (nested_pools.empty())
-        throw DB::Exception(DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED,
-                            "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty");
+        throw DB::Exception(
+            DB::ErrorCodes::ALL_CONNECTION_TRIES_FAILED,
+            "Cannot get connection from ConnectionPoolWithFailover cause nested pools are empty");

    if (!skip_unavailable_endpoints.has_value())
        skip_unavailable_endpoints = settings.skip_unavailable_shards;
@ -203,14 +205,13 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
    else
        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Unknown pool allocation mode");

-    GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
+    if (!priority_func)
+        priority_func = makeGetPriorityFunc(settings);

    UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
    bool fallback_to_stale_replicas = settings.fallback_to_stale_replicas_for_distributed_queries.value;

-    return Base::getMany(min_entries, max_entries, max_tries,
-        max_ignored_errors, fallback_to_stale_replicas,
-        try_get_entry, get_priority);
+    return Base::getMany(min_entries, max_entries, max_tries, max_ignored_errors, fallback_to_stale_replicas, try_get_entry, priority_func);
 }

 ConnectionPoolWithFailover::TryResult
@ -251,11 +252,14 @@ ConnectionPoolWithFailover::tryGetEntry(
    return result;
 }

-std::vector<ConnectionPoolWithFailover::Base::ShuffledPool> ConnectionPoolWithFailover::getShuffledPools(const Settings & settings)
+std::vector<ConnectionPoolWithFailover::Base::ShuffledPool>
+ConnectionPoolWithFailover::getShuffledPools(const Settings & settings, GetPriorityForLoadBalancing::Func priority_func)
 {
-    GetPriorityFunc get_priority = makeGetPriorityFunc(settings);
+    if (!priority_func)
+        priority_func = makeGetPriorityFunc(settings);
+
    UInt64 max_ignored_errors = settings.distributed_replica_max_ignored_errors.value;
-    return Base::getShuffledPools(max_ignored_errors, get_priority);
+    return Base::getShuffledPools(max_ignored_errors, priority_func);
 }

 }
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@ -54,10 +54,13 @@ public:
    /** Allocates up to the specified number of connections to work.
      * Connections provide access to different replicas of one shard.
      */
-    std::vector<Entry> getMany(const ConnectionTimeouts & timeouts,
-                               const Settings & settings, PoolMode pool_mode,
-                               AsyncCallback async_callback = {},
-                               std::optional<bool> skip_unavailable_endpoints = std::nullopt);
+    std::vector<Entry> getMany(
+        const ConnectionTimeouts & timeouts,
+        const Settings & settings,
+        PoolMode pool_mode,
+        AsyncCallback async_callback = {},
+        std::optional<bool> skip_unavailable_endpoints = std::nullopt,
+        GetPriorityForLoadBalancing::Func priority_func = {});

    /// The same as getMany(), but return std::vector<TryResult>.
    std::vector<TryResult> getManyForTableFunction(const ConnectionTimeouts & timeouts,
@ -69,12 +72,13 @@ public:
    /// The same as getMany(), but check that replication delay for table_to_check is acceptable.
    /// Delay threshold is taken from settings.
    std::vector<TryResult> getManyChecked(
-            const ConnectionTimeouts & timeouts,
-            const Settings & settings,
-            PoolMode pool_mode,
-            const QualifiedTableName & table_to_check,
-            AsyncCallback async_callback = {},
-            std::optional<bool> skip_unavailable_endpoints = std::nullopt);
+        const ConnectionTimeouts & timeouts,
+        const Settings & settings,
+        PoolMode pool_mode,
+        const QualifiedTableName & table_to_check,
+        AsyncCallback async_callback = {},
+        std::optional<bool> skip_unavailable_endpoints = std::nullopt,
+        GetPriorityForLoadBalancing::Func priority_func = {});

    struct NestedPoolStatus
    {
@ -87,7 +91,7 @@ public:
    using Status = std::vector<NestedPoolStatus>;
    Status getStatus() const;

-    std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings);
+    std::vector<Base::ShuffledPool> getShuffledPools(const Settings & settings, GetPriorityFunc priority_func = {});

    size_t getMaxErrorCup() const { return Base::max_error_cap; }

@ -96,13 +100,16 @@ public:
        Base::updateSharedErrorCounts(shuffled_pools);
    }

+    size_t getPoolSize() const { return Base::getPoolSize(); }
+
 private:
    /// Get the values of relevant settings and call Base::getMany()
    std::vector<TryResult> getManyImpl(
-            const Settings & settings,
-            PoolMode pool_mode,
-            const TryGetEntryFunc & try_get_entry,
-            std::optional<bool> skip_unavailable_endpoints = std::nullopt);
+        const Settings & settings,
+        PoolMode pool_mode,
+        const TryGetEntryFunc & try_get_entry,
+        std::optional<bool> skip_unavailable_endpoints = std::nullopt,
+        GetPriorityForLoadBalancing::Func priority_func = {});

    /// Try to get a connection from the pool and check that it is good.
    /// If table_to_check is not null and the check is enabled in settings, check that replication delay
@ -115,7 +122,7 @@ private:
            const QualifiedTableName * table_to_check = nullptr,
            AsyncCallback async_callback = {});

-    GetPriorityFunc makeGetPriorityFunc(const Settings & settings);
+    GetPriorityForLoadBalancing::Func makeGetPriorityFunc(const Settings & settings);

    GetPriorityForLoadBalancing get_priority_load_balancing;
 };
--- a/src/Client/HedgedConnections.cpp
+++ b/src/Client/HedgedConnections.cpp
@ -28,16 +28,18 @@ HedgedConnections::HedgedConnections(
    const ThrottlerPtr & throttler_,
    PoolMode pool_mode,
    std::shared_ptr<QualifiedTableName> table_to_check_,
-    AsyncCallback async_callback)
+    AsyncCallback async_callback,
+    GetPriorityForLoadBalancing::Func priority_func)
    : hedged_connections_factory(
-          pool_,
-          context_->getSettingsRef(),
-          timeouts_,
-          context_->getSettingsRef().connections_with_failover_max_tries.value,
-          context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value,
-          context_->getSettingsRef().max_parallel_replicas.value,
-          context_->getSettingsRef().skip_unavailable_shards.value,
-          table_to_check_)
+        pool_,
+        context_->getSettingsRef(),
+        timeouts_,
+        context_->getSettingsRef().connections_with_failover_max_tries.value,
+        context_->getSettingsRef().fallback_to_stale_replicas_for_distributed_queries.value,
+        context_->getSettingsRef().max_parallel_replicas.value,
+        context_->getSettingsRef().skip_unavailable_shards.value,
+        table_to_check_,
+        priority_func)
    , context(std::move(context_))
    , settings(context->getSettingsRef())
    , throttler(throttler_)
--- a/src/Client/HedgedConnections.h
+++ b/src/Client/HedgedConnections.h
@ -70,13 +70,15 @@ public:
        size_t index;
    };

-    HedgedConnections(const ConnectionPoolWithFailoverPtr & pool_,
-                      ContextPtr context_,
-                      const ConnectionTimeouts & timeouts_,
-                      const ThrottlerPtr & throttler,
-                      PoolMode pool_mode,
-                      std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr,
-                      AsyncCallback async_callback = {});
+    HedgedConnections(
+        const ConnectionPoolWithFailoverPtr & pool_,
+        ContextPtr context_,
+        const ConnectionTimeouts & timeouts_,
+        const ThrottlerPtr & throttler,
+        PoolMode pool_mode,
+        std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr,
+        AsyncCallback async_callback = {},
+        GetPriorityForLoadBalancing::Func priority_func = {});

    void sendScalarsData(Scalars & data) override;

--- a/src/Client/HedgedConnectionsFactory.cpp
+++ b/src/Client/HedgedConnectionsFactory.cpp
@ -29,7 +29,8 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
    bool fallback_to_stale_replicas_,
    UInt64 max_parallel_replicas_,
    bool skip_unavailable_shards_,
-    std::shared_ptr<QualifiedTableName> table_to_check_)
+    std::shared_ptr<QualifiedTableName> table_to_check_,
+    GetPriorityForLoadBalancing::Func priority_func)
    : pool(pool_)
    , timeouts(timeouts_)
    , table_to_check(table_to_check_)
@ -39,7 +40,7 @@ HedgedConnectionsFactory::HedgedConnectionsFactory(
    , max_parallel_replicas(max_parallel_replicas_)
    , skip_unavailable_shards(skip_unavailable_shards_)
 {
-    shuffled_pools = pool->getShuffledPools(settings_);
+    shuffled_pools = pool->getShuffledPools(settings_, priority_func);
    for (auto shuffled_pool : shuffled_pools)
        replicas.emplace_back(std::make_unique<ConnectionEstablisherAsync>(shuffled_pool.pool, &timeouts, settings_, log, table_to_check.get()));
 }
@ -323,8 +324,7 @@ HedgedConnectionsFactory::State HedgedConnectionsFactory::processFinishedConnect
    else
    {
        ShuffledPool & shuffled_pool = shuffled_pools[index];
-        LOG_WARNING(
-            log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
+        LOG_INFO(log, "Connection failed at try №{}, reason: {}", (shuffled_pool.error_count + 1), fail_message);
        ProfileEvents::increment(ProfileEvents::DistributedConnectionFailTry);

        shuffled_pool.error_count = std::min(pool->getMaxErrorCup(), shuffled_pool.error_count + 1);
--- a/src/Client/HedgedConnectionsFactory.h
+++ b/src/Client/HedgedConnectionsFactory.h
@ -53,7 +53,8 @@ public:
        bool fallback_to_stale_replicas_,
        UInt64 max_parallel_replicas_,
        bool skip_unavailable_shards_,
-        std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr);
+        std::shared_ptr<QualifiedTableName> table_to_check_ = nullptr,
+        GetPriorityForLoadBalancing::Func priority_func = {});

    /// Create and return active connections according to pool_mode.
    std::vector<Connection *> getManyConnections(PoolMode pool_mode, AsyncCallback async_callback = {});
--- a/src/Columns/FilterDescription.cpp
+++ b/src/Columns/FilterDescription.cpp
@ -106,7 +106,7 @@ SparseFilterDescription::SparseFilterDescription(const IColumn & column)
        throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER,
            "Illegal type {} of column for sparse filter. Must be Sparse(UInt8)", column.getName());

-    filter_indices = &column_sparse->getOffsetsColumn();
+    filter_indices = &assert_cast<const ColumnUInt64 &>(column_sparse->getOffsetsColumn());
 }

 }
--- a/src/Columns/FilterDescription.h
+++ b/src/Columns/FilterDescription.h
@ -2,6 +2,7 @@

 #include <Columns/IColumn.h>
 #include <Columns/ColumnsCommon.h>
+#include <Columns/ColumnsNumber.h>


 namespace DB
@ -22,9 +23,15 @@ struct ConstantFilterDescription

 struct IFilterDescription
 {
+    /// has_one can be pre-compute during creating the filter description in some cases
+    Int64 has_one = -1;
    virtual ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const = 0;
    virtual size_t countBytesInFilter() const = 0;
    virtual ~IFilterDescription() = default;
+    bool hasOne() { return has_one >= 0 ? has_one : hasOneImpl();}
+protected:
+    /// Calculate if filter has a non-zero from the filter values, may update has_one
+    virtual bool hasOneImpl() = 0;
 };

 /// Obtain a filter from non constant Column, that may have type: UInt8, Nullable(UInt8).
@ -37,15 +44,19 @@ struct FilterDescription final : public IFilterDescription

    ColumnPtr filter(const IColumn & column, ssize_t result_size_hint) const override { return column.filter(*data, result_size_hint); }
    size_t countBytesInFilter() const override { return DB::countBytesInFilter(*data); }
+protected:
+    bool hasOneImpl() override { return data ? (has_one = !memoryIsZero(data->data(), 0, data->size())) : false; }
 };

 struct SparseFilterDescription final : public IFilterDescription
 {
-    const IColumn * filter_indices = nullptr;
+    const ColumnUInt64 * filter_indices = nullptr;
    explicit SparseFilterDescription(const IColumn & column);

    ColumnPtr filter(const IColumn & column, ssize_t) const override { return column.index(*filter_indices, 0); }
    size_t countBytesInFilter() const override { return filter_indices->size(); }
+protected:
+    bool hasOneImpl() override { return filter_indices && !filter_indices->empty(); }
 };

 struct ColumnWithTypeAndName;
--- a/src/Columns/IColumnUnique.h
+++ b/src/Columns/IColumnUnique.h
@ -173,7 +173,7 @@ public:
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Method hasEqualValues is not supported for ColumnUnique.");
    }

-    /** Given some value (usually, of type @e ColumnType) @p value that is convertible to DB::StringRef, obtains its
+    /** Given some value (usually, of type @e ColumnType) @p value that is convertible to StringRef, obtains its
     * index in the DB::ColumnUnique::reverse_index hashtable.
     *
     * The reverse index (StringRef => UInt64) is built lazily, so there are two variants:
--- a/src/Common/Allocator.cpp
+++ b/src/Common/Allocator.cpp
@ -48,11 +48,11 @@ void prefaultPages([[maybe_unused]] void * buf_, [[maybe_unused]] size_t len_)
        return;

    auto [buf, len] = adjustToPageSize(buf_, len_, page_size);
-    if (auto res = ::madvise(buf, len, MADV_POPULATE_WRITE); res < 0)
+    if (::madvise(buf, len, MADV_POPULATE_WRITE) < 0)
        LOG_TRACE(
            LogFrequencyLimiter(&Poco::Logger::get("Allocator"), 1),
            "Attempt to populate pages failed: {} (EINVAL is expected for kernels < 5.14)",
-            errnoToString(res));
+            errnoToString(errno));
 #endif
 }

--- a/src/Common/AsyncLoader.cpp
+++ b/src/Common/AsyncLoader.cpp
@ -36,13 +36,36 @@ static constexpr size_t PRINT_MESSAGE_EACH_N_SECONDS = 5;

 void logAboutProgress(Poco::Logger * log, size_t processed, size_t total, AtomicStopwatch & watch)
 {
-    if (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS))
+    if (total && (processed % PRINT_MESSAGE_EACH_N_OBJECTS == 0 || watch.compareAndRestart(PRINT_MESSAGE_EACH_N_SECONDS)))
    {
        LOG_INFO(log, "Processed: {}%", processed * 100.0 / total);
        watch.restart();
    }
 }

+AsyncLoader::Pool::Pool(const AsyncLoader::PoolInitializer & init)
+    : name(init.name)
+    , priority(init.priority)
+    , thread_pool(std::make_unique<ThreadPool>(
+        init.metric_threads,
+        init.metric_active_threads,
+        init.metric_scheduled_threads,
+        /* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
+        /* max_free_threads = */ 0, // We do not require free threads
+        /* queue_size = */0)) // Unlimited queue to avoid blocking during worker spawning
+    , max_threads(init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores())
+{}
+
+AsyncLoader::Pool::Pool(Pool&& o) noexcept
+    : name(o.name)
+    , priority(o.priority)
+    , thread_pool(std::move(o.thread_pool))
+    , ready_queue(std::move(o.ready_queue))
+    , max_threads(o.max_threads)
+    , workers(o.workers)
+    , suspended_workers(o.suspended_workers.load()) // All these constructors are needed because std::atomic is neither copy-constructible, nor move-constructible. We never move pools after init, so it is safe.
+{}
+
 void cancelOnDependencyFailure(const LoadJobPtr & self, const LoadJobPtr & dependency, std::exception_ptr & cancel)
 {
    cancel = std::make_exception_ptr(Exception(ErrorCodes::ASYNC_LOAD_CANCELED,
@ -84,39 +107,38 @@ size_t LoadJob::waitersCount() const
    return waiters;
 }

-size_t LoadJob::ok()
+void LoadJob::ok()
 {
    std::unique_lock lock{mutex};
    load_status = LoadStatus::OK;
-    return finish();
+    finish();
 }

-size_t LoadJob::failed(const std::exception_ptr & ptr)
+void LoadJob::failed(const std::exception_ptr & ptr)
 {
    std::unique_lock lock{mutex};
    load_status = LoadStatus::FAILED;
    load_exception = ptr;
-    return finish();
+    finish();
 }

-size_t LoadJob::canceled(const std::exception_ptr & ptr)
+void LoadJob::canceled(const std::exception_ptr & ptr)
 {
    std::unique_lock lock{mutex};
    load_status = LoadStatus::CANCELED;
    load_exception = ptr;
-    return finish();
+    finish();
 }

-size_t LoadJob::finish()
+void LoadJob::finish()
 {
-    // To ensure functions are destructed before `AsyncLoader::wait()` return
+    // To ensure functions are destructed before `AsyncLoader::wait()` returns
    func = {};
    dependency_failure = {};

    finish_time = std::chrono::system_clock::now();
    if (waiters > 0)
        finished.notify_all();
-    return std::exchange(suspended_waiters, 0);
 }

 void LoadJob::scheduled(UInt64 job_id_)
@ -134,7 +156,7 @@ void LoadJob::enqueued()

 void LoadJob::execute(AsyncLoader & loader, size_t pool, const LoadJobPtr & self)
 {
-    execution_pool_id = pool;
+    execution_pool_id.store(pool);
    start_time = std::chrono::system_clock::now();
    func(loader, self);
 }
@ -187,19 +209,7 @@ AsyncLoader::AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool lo
 {
    pools.reserve(pool_initializers.size());
    for (auto && init : pool_initializers)
-        pools.push_back({
-            .name = init.name,
-            .priority = init.priority,
-            .thread_pool = std::make_unique<ThreadPool>(
-                init.metric_threads,
-                init.metric_active_threads,
-                init.metric_scheduled_threads,
-                /* max_threads = */ std::numeric_limits<size_t>::max(), // Unlimited number of threads, we do worker management ourselves
-                /* max_free_threads = */ 0, // We do not require free threads
-                /* queue_size = */0), // Unlimited queue to avoid blocking during worker spawning
-            .ready_queue = {},
-            .max_threads = init.max_threads > 0 ? init.max_threads : getNumberOfPhysicalCPUCores()
-        });
+        pools.push_back(Pool(init));
 }

 AsyncLoader::~AsyncLoader()
@ -498,6 +508,11 @@ std::vector<AsyncLoader::JobState> AsyncLoader::getJobStates() const
    return result;
 }

+size_t AsyncLoader::suspendedWorkersCount(size_t pool_id)
+{
+    return pools[pool_id].suspended_workers.load();
+}
+
 void AsyncLoader::checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock)
 {
    LoadJobSet left = jobs;
@ -538,20 +553,12 @@ void AsyncLoader::finish(const LoadJobPtr & job, LoadStatus status, std::excepti
    chassert(scheduled_jobs.contains(job)); // Job was pending

    // Notify waiters
-    size_t resumed_workers = 0; // Number of workers resumed in the execution pool of the job
    if (status == LoadStatus::OK)
-        resumed_workers = job->ok();
+        job->ok();
    else if (status == LoadStatus::FAILED)
-        resumed_workers = job->failed(reason);
+        job->failed(reason);
    else if (status == LoadStatus::CANCELED)
-        resumed_workers = job->canceled(reason);
-
-    // Adjust suspended workers count
-    if (resumed_workers)
-    {
-        Pool & pool = pools[job->executionPool()];
-        pool.suspended_workers -= resumed_workers;
-    }
+        job->canceled(reason);

    Info & info = scheduled_jobs[job];
    if (info.isReady())
@ -637,9 +644,6 @@ void AsyncLoader::prioritize(const LoadJobPtr & job, size_t new_pool_id, std::un
    }

    job->pool_id.store(new_pool_id);
-    // TODO(serxa): we should adjust suspended_workers and suspended_waiters here.
-    // Otherwise suspended_workers we be left inconsistent. Fix it and add a test.
-    // Scenario: schedule a job A, wait for it from a job B in the same pool, prioritize A

    // Recurse into dependencies
    for (const auto & dep : job->dependencies)
@ -697,6 +701,8 @@ void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr
    if (job->job_id == 0)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Load job '{}' waits for not scheduled load job '{}'", current_load_job->name, job->name);

+    scope_guard suspended_lock;
+
    // Deadlock detection and resolution
    if (current_load_job && job->load_status == LoadStatus::PENDING)
    {
@ -719,11 +725,30 @@ void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr
        if (worker_pool == job->pool_id)
        {
            job_lock.unlock(); // Avoid reverse locking order
-            workerIsSuspendedByWait(worker_pool, job);
+            std::unique_lock lock{mutex};
            job_lock.lock();
+
+            // Rechecks are required because we have reacquired mutexes
+            if (job->load_status != LoadStatus::PENDING)
+                return; // Job is already done, no wait required
+
+            if (worker_pool == job->pool_id)
+            {
+                // To resolve "blocked pool" deadlocks we spawn a new worker for every suspended worker, if required
+                // This can lead to a visible excess of `max_threads` specified for a pool,
+                // but actual number of NOT suspended workers may exceed `max_threads` ONLY in intermittent state.
+                Pool & pool = pools[worker_pool];
+                pool.suspended_workers.fetch_add(1);
+                suspended_lock = [&pool] { chassert(pool.suspended_workers.load()); pool.suspended_workers.fetch_sub(1); };
+                if (canSpawnWorker(pool, lock))
+                    spawn(pool, lock);
+            }
        }
    }

+    if (job->load_status != LoadStatus::PENDING) // Shortcut just to avoid incrementing ProfileEvents
+        return;
+
    Stopwatch watch;
    job->waiters++;
    job->finished.wait(job_lock, [&] { return job->load_status != LoadStatus::PENDING; });
@ -731,34 +756,12 @@ void AsyncLoader::wait(std::unique_lock<std::mutex> & job_lock, const LoadJobPtr
    ProfileEvents::increment(ProfileEvents::AsyncLoaderWaitMicroseconds, watch.elapsedMicroseconds());
 }

-void AsyncLoader::workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job)
-{
-    std::unique_lock lock{mutex};
-    std::unique_lock job_lock{job->mutex};
-
-    if (job->load_status != LoadStatus::PENDING)
-        return; // Job is already done, worker can continue execution
-
-    // To resolve "blocked pool" deadlocks we spawn a new worker for every suspended worker, if required
-    // This can lead to a visible excess of `max_threads` specified for a pool,
-    // but actual number of NOT suspended workers may exceed `max_threads` ONLY in intermittent state.
-    Pool & pool = pools[pool_id];
-    pool.suspended_workers++;
-    job->suspended_waiters++;
-    if (canSpawnWorker(pool, lock))
-        spawn(pool, lock);
-
-    // TODO(serxa): it is a good idea to propagate `job` and all its dependencies in `pool.ready_queue` by introducing
-    // key {suspended_waiters, ready_seqno} instead of plain `ready_seqno`, to force newly spawn workers to work on jobs
-    // that are being waited. But it doesn't affect correctness. So let's not complicate it for time being.
-}
-
 bool AsyncLoader::canSpawnWorker(Pool & pool, std::unique_lock<std::mutex> &)
 {
    // TODO(serxa): optimization: we should not spawn new worker on the first enqueue during `finish()` because current worker will take this job.
    return is_running
        && !pool.ready_queue.empty()
-        && pool.workers < pool.max_threads + pool.suspended_workers
+        && pool.workers < pool.max_threads + pool.suspended_workers.load()
        && (!current_priority || *current_priority >= pool.priority);
 }

@ -766,7 +769,7 @@ bool AsyncLoader::canWorkerLive(Pool & pool, std::unique_lock<std::mutex> &)
 {
    return is_running
        && !pool.ready_queue.empty()
-        && pool.workers <= pool.max_threads + pool.suspended_workers
+        && pool.workers <= pool.max_threads + pool.suspended_workers.load()
        && (!current_priority || *current_priority >= pool.priority);
 }

--- a/src/Common/AsyncLoader.h
+++ b/src/Common/AsyncLoader.h
@ -98,10 +98,10 @@ public:
 private:
    friend class AsyncLoader;

-    [[nodiscard]] size_t ok();
-    [[nodiscard]] size_t failed(const std::exception_ptr & ptr);
-    [[nodiscard]] size_t canceled(const std::exception_ptr & ptr);
-    [[nodiscard]] size_t finish();
+    void ok();
+    void failed(const std::exception_ptr & ptr);
+    void canceled(const std::exception_ptr & ptr);
+    void finish();

    void scheduled(UInt64 job_id_);
    void enqueued();
@ -122,8 +122,7 @@ private:

    mutable std::mutex mutex;
    mutable std::condition_variable finished;
-    mutable size_t waiters = 0; // All waiters, including suspended
-    mutable size_t suspended_waiters = 0;
+    mutable size_t waiters = 0;
    LoadStatus load_status{LoadStatus::PENDING};
    std::exception_ptr load_exception;

@ -282,6 +281,20 @@ inline LoadTaskPtr makeLoadTask(AsyncLoader & loader, LoadJobSet && jobs, LoadJo
 // 8)  The job is destructed.
 class AsyncLoader : private boost::noncopyable
 {
+public:
+    using Metric = CurrentMetrics::Metric;
+
+    // Helper struct for AsyncLoader construction
+    struct PoolInitializer
+    {
+        String name;
+        Metric metric_threads;
+        Metric metric_active_threads;
+        Metric metric_scheduled_threads;
+        size_t max_threads; // Zero means use all CPU cores
+        Priority priority;
+    };
+
 private:
    // Thread pool for job execution.
    // Pools control the following aspects of job execution:
@ -296,8 +309,10 @@ private:
        std::map<UInt64, LoadJobPtr> ready_queue; // FIFO queue of jobs to be executed in this pool. Map is used for faster erasing. Key is `ready_seqno`
        size_t max_threads; // Max number of workers to be spawn
        size_t workers = 0; // Number of currently executing workers
-        size_t suspended_workers = 0; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution)
+        std::atomic<size_t> suspended_workers{0}; // Number of workers that are blocked by `wait()` call on a job executing in the same pool (for deadlock resolution)

+        explicit Pool(const PoolInitializer & init);
+        Pool(Pool&& o) noexcept;
        bool isActive() const { return workers > 0 || !ready_queue.empty(); }
    };

@ -315,19 +330,6 @@ private:
    };

 public:
-    using Metric = CurrentMetrics::Metric;
-
-    // Helper struct for AsyncLoader construction
-    struct PoolInitializer
-    {
-        String name;
-        Metric metric_threads;
-        Metric metric_active_threads;
-        Metric metric_scheduled_threads;
-        size_t max_threads; // Zero means use all CPU cores
-        Priority priority;
-    };
-
    AsyncLoader(std::vector<PoolInitializer> pool_initializers, bool log_failures_, bool log_progress_);

    // Stops AsyncLoader before destruction
@ -360,12 +362,16 @@ public:
    void schedule(const LoadTaskPtrs & tasks);

    // Increase priority of a job and all its dependencies recursively.
-    // Jobs from higher (than `new_pool`) priority pools are not changed.
+    // Jobs from pools with priority higher than `new_pool` are not changed.
    void prioritize(const LoadJobPtr & job, size_t new_pool);

    // Sync wait for a pending job to be finished: OK, FAILED or CANCELED status.
    // Throws if job is FAILED or CANCELED unless `no_throw` is set. Returns or throws immediately if called on non-pending job.
-    // If job was not scheduled, it will be implicitly scheduled before the wait (deadlock auto-resolution).
+    // Waiting for a not scheduled job is considered to be LOGICAL_ERROR, use waitLoad() helper instead to make sure the job is scheduled.
+    // There are more rules if `wait()` is called from another job:
+    //  1) waiting on a dependent job is considered to be LOGICAL_ERROR;
+    //  2) waiting on a job in the same pool might lead to more workers spawned in that pool to resolve "blocked pool" deadlock;
+    //  3) waiting on a job with lower priority lead to priority inheritance to avoid priority inversion.
    void wait(const LoadJobPtr & job, bool no_throw = false);

    // Remove finished jobs, cancel scheduled jobs, wait for executing jobs to finish and remove them.
@ -393,9 +399,7 @@ public:

    // For introspection and debug only, see `system.asynchronous_loader` table.
    std::vector<JobState> getJobStates() const;
-
-    // For deadlock resolution. Should not be used directly.
-    void workerIsSuspendedByWait(size_t pool_id, const LoadJobPtr & job);
+    size_t suspendedWorkersCount(size_t pool_id);

 private:
    void checkCycle(const LoadJobSet & jobs, std::unique_lock<std::mutex> & lock);
--- a/src/Common/CacheBase.h
+++ b/src/Common/CacheBase.h
@ -5,15 +5,15 @@
 #include <Common/LRUCachePolicy.h>
 #include <Common/SLRUCachePolicy.h>

+#include <base/UUID.h>
+#include <base/defines.h>
+
 #include <atomic>
-#include <cassert>
-#include <chrono>
 #include <memory>
 #include <mutex>
+#include <optional>
 #include <unordered_map>

-#include <base/defines.h>
-

 namespace DB
 {
@ -227,10 +227,10 @@ public:
        cache_policy->setMaxSizeInBytes(max_size_in_bytes);
    }

-    void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries)
+    void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries)
    {
        std::lock_guard lock(mutex);
-        cache_policy->setQuotaForUser(user_name, max_size_in_bytes, max_entries);
+        cache_policy->setQuotaForUser(user_id, max_size_in_bytes, max_entries);
    }

    virtual ~CacheBase() = default;
--- a/src/Common/CurrentMetrics.h
+++ b/src/Common/CurrentMetrics.h
@ -24,7 +24,7 @@ namespace CurrentMetrics
 {
    /// Metric identifier (index in array).
    using Metric = StrongTypedef<size_t, struct MetricTag>;
-    using Value = DB::Int64;
+    using Value = Int64;

    /// Get name of metric by identifier. Returns statically allocated string.
    const char * getName(Metric event);
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@ -69,14 +69,14 @@ void handle_error_code([[maybe_unused]] const std::string & msg, int code, bool
 Exception::MessageMasked::MessageMasked(const std::string & msg_)
    : msg(msg_)
 {
-    if (auto * masker = SensitiveDataMasker::getInstance())
+    if (auto masker = SensitiveDataMasker::getInstance())
        masker->wipeSensitiveData(msg);
 }

 Exception::MessageMasked::MessageMasked(std::string && msg_)
    : msg(std::move(msg_))
 {
-    if (auto * masker = SensitiveDataMasker::getInstance())
+    if (auto masker = SensitiveDataMasker::getInstance())
        masker->wipeSensitiveData(msg);
 }

--- a/src/Common/FileChecker.h
+++ b/src/Common/FileChecker.h
@ -3,6 +3,7 @@
 #include <Storages/CheckResults.h>
 #include <map>
 #include <base/types.h>
+#include <memory>
 #include <mutex>

 namespace Poco { class Logger; }
--- a/src/Common/GetPriorityForLoadBalancing.cpp
+++ b/src/Common/GetPriorityForLoadBalancing.cpp
@ -9,7 +9,8 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-std::function<Priority(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
+GetPriorityForLoadBalancing::Func
+GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
 {
    std::function<Priority(size_t index)> get_priority;
    switch (load_balance)
@ -33,19 +34,26 @@ std::function<Priority(size_t index)> GetPriorityForLoadBalancing::getPriorityFu
            get_priority = [offset](size_t i) { return i != offset ? Priority{1} : Priority{0}; };
            break;
        case LoadBalancing::ROUND_ROBIN:
-            if (last_used >= pool_size)
-                last_used = 0;
+            auto local_last_used = last_used % pool_size;
            ++last_used;
-            /* Consider pool_size equals to 5
-             * last_used = 1 -> get_priority: 0 1 2 3 4
-             * last_used = 2 -> get_priority: 4 0 1 2 3
-             * last_used = 3 -> get_priority: 4 3 0 1 2
-             * ...
-             * */
-            get_priority = [this, pool_size](size_t i)
+
+            // Example: pool_size = 5
+            // | local_last_used | i=0 | i=1 | i=2 | i=3 | i=4 |
+            // | 0               | 4   | 0   | 1   | 2   | 3   |
+            // | 1               | 3   | 4   | 0   | 1   | 2   |
+            // | 2               | 2   | 3   | 4   | 0   | 1   |
+            // | 3               | 1   | 2   | 3   | 4   | 0   |
+            // | 4               | 0   | 1   | 2   | 3   | 4   |
+
+            get_priority = [pool_size, local_last_used](size_t i)
            {
-                ++i; // To make `i` indexing start with 1 instead of 0 as `last_used` does
-                return Priority{static_cast<Int64>(i < last_used ? pool_size - i : i - last_used)};
+                size_t priority = pool_size - 1;
+                if (i < local_last_used)
+                    priority = pool_size - 1 - (local_last_used - i);
+                if (i > local_last_used)
+                    priority = i - local_last_used - 1;
+
+                return Priority{static_cast<Int64>(priority)};
            };
            break;
    }
--- a/src/Common/GetPriorityForLoadBalancing.h
+++ b/src/Common/GetPriorityForLoadBalancing.h
@ -8,7 +8,12 @@ namespace DB
 class GetPriorityForLoadBalancing
 {
 public:
-    explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
+    using Func = std::function<Priority(size_t index)>;
+
+    explicit GetPriorityForLoadBalancing(LoadBalancing load_balancing_, size_t last_used_ = 0)
+        : load_balancing(load_balancing_), last_used(last_used_)
+    {
+    }
    GetPriorityForLoadBalancing() = default;

    bool operator == (const GetPriorityForLoadBalancing & other) const
@ -23,7 +28,7 @@ public:
        return !(*this == other);
    }

-    std::function<Priority(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
+    Func getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;

    std::vector<size_t> hostname_prefix_distance; /// Prefix distances from name of this host to the names of hosts of pools.
    std::vector<size_t> hostname_levenshtein_distance; /// Levenshtein Distances from name of this host to the names of hosts of pools.
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@ -24,7 +24,7 @@
 /** Taken from MurmurHash. This is Murmur finalizer.
  * Faster than intHash32 when inserting into the hash table UInt64 -> UInt64, where the key is the visitor ID.
  */
-inline DB::UInt64 intHash64(DB::UInt64 x)
+inline UInt64 intHash64(UInt64 x)
 {
    x ^= x >> 33;
    x *= 0xff51afd7ed558ccdULL;
@ -60,7 +60,7 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
 /// NOTE: Intel intrinsic can be confusing.
 /// - https://code.google.com/archive/p/sse-intrinsics/wikis/PmovIntrinsicBug.wiki
 /// - https://stackoverflow.com/questions/15752770/mm-crc32-u64-poorly-defined
-inline DB::UInt64 intHashCRC32(DB::UInt64 x)
+inline UInt64 intHashCRC32(UInt64 x)
 {
 #ifdef __SSE4_2__
    return _mm_crc32_u64(-1ULL, x);
@ -76,7 +76,7 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
    return intHash64(x);
 #endif
 }
-inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
+inline UInt64 intHashCRC32(UInt64 x, UInt64 updated_value)
 {
 #ifdef __SSE4_2__
    return _mm_crc32_u64(updated_value, x);
@ -93,14 +93,14 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
 }

 template <typename T>
-requires std::has_unique_object_representations_v<T> && (sizeof(T) % sizeof(DB::UInt64) == 0)
-inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value)
+requires std::has_unique_object_representations_v<T> && (sizeof(T) % sizeof(UInt64) == 0)
+inline UInt64 intHashCRC32(const T & x, UInt64 updated_value)
 {
    const auto * begin = reinterpret_cast<const char *>(&x);
    for (size_t i = 0; i < sizeof(T); i += sizeof(UInt64))
    {
-        updated_value = intHashCRC32(unalignedLoad<DB::UInt64>(begin), updated_value);
-        begin += sizeof(DB::UInt64);
+        updated_value = intHashCRC32(unalignedLoad<UInt64>(begin), updated_value);
+        begin += sizeof(UInt64);
    }

    return updated_value;
@ -108,7 +108,7 @@ inline DB::UInt64 intHashCRC32(const T & x, DB::UInt64 updated_value)

 template <std::floating_point T>
 requires(sizeof(T) <= sizeof(UInt64))
-inline DB::UInt64 intHashCRC32(T x, DB::UInt64 updated_value)
+inline UInt64 intHashCRC32(T x, UInt64 updated_value)
 {
    static_assert(std::numeric_limits<T>::is_iec559);

@ -126,7 +126,7 @@ inline DB::UInt64 intHashCRC32(T x, DB::UInt64 updated_value)
    return intHashCRC32(repr, updated_value);
 }

-inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 updated_value)
+inline UInt32 updateWeakHash32(const UInt8 * pos, size_t size, UInt32 updated_value)
 {
    if (size < 8)
    {
@ -206,12 +206,12 @@ inline UInt32 updateWeakHash32(const DB::UInt8 * pos, size_t size, DB::UInt32 up
    {
        /// If string size is not divisible by 8.
        /// Lets' assume the string was 'abcdefghXYZ', so it's tail is 'XYZ'.
-        DB::UInt8 tail_size = end - pos;
+        UInt8 tail_size = end - pos;
        /// Load tailing 8 bytes. Word is 'defghXYZ'.
        auto word = unalignedLoadLittleEndian<UInt64>(end - 8);
        /// Prepare mask which will set other 5 bytes to 0. It is 0xFFFFFFFFFFFFFFFF << 5 = 0xFFFFFF0000000000.
        /// word & mask = '\0\0\0\0\0XYZ' (bytes are reversed because of little ending)
-        word &= (~UInt64(0)) << DB::UInt8(8 * (8 - tail_size));
+        word &= (~UInt64(0)) << UInt8(8 * (8 - tail_size));
        /// Use least byte to store tail length.
        word |= tail_size;
        /// Now word is '\3\0\0\0\0XYZ'
@ -225,11 +225,11 @@ template <typename T>
 requires (sizeof(T) <= sizeof(UInt64))
 inline size_t DefaultHash64(T key)
 {
-    DB::UInt64 out {0};
+    UInt64 out {0};
    if constexpr (std::endian::native == std::endian::little)
        std::memcpy(&out, &key, sizeof(T));
    else
-        std::memcpy(reinterpret_cast<char*>(&out) + sizeof(DB::UInt64) - sizeof(T), &key, sizeof(T));
+        std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
    return intHash64(out);
 }

@ -284,9 +284,9 @@ template <typename T> struct HashCRC32;

 template <typename T>
 requires (sizeof(T) <= sizeof(UInt64))
-inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
+inline size_t hashCRC32(T key, UInt64 updated_value = -1)
 {
-    DB::UInt64 out {0};
+    UInt64 out {0};
    if constexpr (std::endian::native == std::endian::little)
        std::memcpy(&out, &key, sizeof(T));
    else
@ -296,7 +296,7 @@ inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)

 template <typename T>
 requires (sizeof(T) > sizeof(UInt64))
-inline size_t hashCRC32(T key, DB::UInt64 updated_value = -1)
+inline size_t hashCRC32(T key, UInt64 updated_value = -1)
 {
    return intHashCRC32(key, updated_value);
 }
@ -310,20 +310,20 @@ template <> struct HashCRC32<T>\
    }\
 };

-DEFINE_HASH(DB::UInt8)
-DEFINE_HASH(DB::UInt16)
-DEFINE_HASH(DB::UInt32)
-DEFINE_HASH(DB::UInt64)
-DEFINE_HASH(DB::UInt128)
-DEFINE_HASH(DB::UInt256)
-DEFINE_HASH(DB::Int8)
-DEFINE_HASH(DB::Int16)
-DEFINE_HASH(DB::Int32)
-DEFINE_HASH(DB::Int64)
-DEFINE_HASH(DB::Int128)
-DEFINE_HASH(DB::Int256)
-DEFINE_HASH(DB::Float32)
-DEFINE_HASH(DB::Float64)
+DEFINE_HASH(UInt8)
+DEFINE_HASH(UInt16)
+DEFINE_HASH(UInt32)
+DEFINE_HASH(UInt64)
+DEFINE_HASH(UInt128)
+DEFINE_HASH(UInt256)
+DEFINE_HASH(Int8)
+DEFINE_HASH(Int16)
+DEFINE_HASH(Int32)
+DEFINE_HASH(Int64)
+DEFINE_HASH(Int128)
+DEFINE_HASH(Int256)
+DEFINE_HASH(Float32)
+DEFINE_HASH(Float64)
 DEFINE_HASH(DB::UUID)
 DEFINE_HASH(DB::IPv4)
 DEFINE_HASH(DB::IPv6)
@ -464,10 +464,10 @@ struct UInt256HashCRC32 : public UInt256Hash {};
 #endif

 template <>
-struct DefaultHash<DB::UInt128> : public UInt128Hash {};
+struct DefaultHash<UInt128> : public UInt128Hash {};

 template <>
-struct DefaultHash<DB::UInt256> : public UInt256Hash {};
+struct DefaultHash<UInt256> : public UInt256Hash {};

 template <>
 struct DefaultHash<DB::UUID> : public UUIDHash {};
@ -501,8 +501,8 @@ struct TrivialHash
  * NOTE As mentioned, this function is slower than intHash64.
  * But occasionally, it is faster, when written in a loop and loop is vectorized.
  */
-template <DB::UInt64 salt>
-inline DB::UInt32 intHash32(DB::UInt64 key)
+template <UInt64 salt>
+inline UInt32 intHash32(UInt64 key)
 {
    key ^= salt;

@ -518,7 +518,7 @@ inline DB::UInt32 intHash32(DB::UInt64 key)


 /// For containers.
-template <typename T, DB::UInt64 salt = 0>
+template <typename T, UInt64 salt = 0>
 struct IntHash32
 {
    size_t operator() (const T & key) const
@ -533,11 +533,11 @@ struct IntHash32
        }
        else if constexpr (sizeof(T) <= sizeof(UInt64))
        {
-            DB::UInt64 out {0};
+            UInt64 out {0};
            if constexpr (std::endian::native == std::endian::little)
                std::memcpy(&out, &key, sizeof(T));
            else
-                std::memcpy(reinterpret_cast<char*>(&out) + sizeof(DB::UInt64) - sizeof(T), &key, sizeof(T));
+                std::memcpy(reinterpret_cast<char*>(&out) + sizeof(UInt64) - sizeof(T), &key, sizeof(T));
            return intHash32<salt>(out);
        }

--- a/src/Common/HashTable/StringHashTable.h
+++ b/src/Common/HashTable/StringHashTable.h
@ -9,7 +9,7 @@


 using StringKey8 = UInt64;
-using StringKey16 = DB::UInt128;
+using StringKey16 = UInt128;
 struct StringKey24
 {
    UInt64 a;
--- a/src/Common/ICachePolicy.h
+++ b/src/Common/ICachePolicy.h
@ -2,10 +2,11 @@

 #include <Common/Exception.h>
 #include <Common/ICachePolicyUserQuota.h>
+#include <base/UUID.h>

 #include <functional>
 #include <memory>
-#include <mutex>
+#include <optional>

 namespace DB
 {
@ -43,7 +44,7 @@ public:

    virtual void setMaxCount(size_t /*max_count*/) = 0;
    virtual void setMaxSizeInBytes(size_t /*max_size_in_bytes*/) = 0;
-    virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_name, max_size_in_bytes, max_entries); }
+    virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) { user_quotas->setQuotaForUser(user_id, max_size_in_bytes, max_entries); }

    /// HashFunction usually hashes the entire key and the found key will be equal the provided key. In such cases, use get(). It is also
    /// possible to store other, non-hashed data in the key. In that case, the found key is potentially different from the provided key.
--- a/src/Common/ICachePolicyUserQuota.h
+++ b/src/Common/ICachePolicyUserQuota.h
@ -1,5 +1,6 @@
 #pragma once

+#include <base/UUID.h>
 #include <base/types.h>

 namespace DB
@ -15,14 +16,17 @@ class ICachePolicyUserQuota
 {
 public:
    /// Register or update the user's quota for the given resource.
-    virtual void setQuotaForUser(const String & user_name, size_t max_size_in_bytes, size_t max_entries) = 0;
+    virtual void setQuotaForUser(const UUID & user_id, size_t max_size_in_bytes, size_t max_entries) = 0;

    /// Update the actual resource usage for the given user.
-    virtual void increaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
-    virtual void decreaseActual(const String & user_name, size_t entry_size_in_bytes) = 0;
+    virtual void increaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;
+    virtual void decreaseActual(const UUID & user_id, size_t entry_size_in_bytes) = 0;

    /// Is the user allowed to write a new entry into the cache?
-    virtual bool approveWrite(const String & user_name, size_t entry_size_in_bytes) const = 0;
+    virtual bool approveWrite(const UUID & user_id, size_t entry_size_in_bytes) const = 0;
+
+    /// Clears the policy contents
+    virtual void clear() = 0;

    virtual ~ICachePolicyUserQuota() = default;
 };
@ -33,10 +37,11 @@ using CachePolicyUserQuotaPtr = std::unique_ptr<ICachePolicyUserQuota>;
 class NoCachePolicyUserQuota : public ICachePolicyUserQuota
 {
 public:
-    void setQuotaForUser(const String & /*user_name*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
-    void increaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
-    void decreaseActual(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) override {}
-    bool approveWrite(const String & /*user_name*/, size_t /*entry_size_in_bytes*/) const override { return true; }
+    void setQuotaForUser(const UUID & /*user_id*/, size_t /*max_size_in_bytes*/, size_t /*max_entries*/) override {}
+    void increaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
+    void decreaseActual(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) override {}
+    bool approveWrite(const UUID & /*user_id*/, size_t /*entry_size_in_bytes*/) const override { return true; }
+    void clear() override {}
 };


--- a/Show More
+++ b/Show More