Merge branch 'master' into async-loader-waiters-limit

2024-11-21 23:21:59 +00:00 · 2024-03-21 10:58:22 +00:00 · 2024-03-21 10:58:22 +00:00 · c31b958f88
commit c31b958f88
parent 294526e772 9b51780458
1313 changed files with 30045 additions and 24258 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -5,128 +5,128 @@
 # a) the new check is not controversial (this includes many checks in readability-* and google-*) or
 # b) too noisy (checks with > 100 new warnings are considered noisy, this includes e.g. cppcoreguidelines-*).

-# TODO: Once clang(-tidy) 17 is the minimum, we can convert this list to YAML
-# See https://releases.llvm.org/17.0.1/tools/clang/tools/extra/docs/ReleaseNotes.html#improvements-to-clang-tidy
+HeaderFilterRegex: '^.*/(base|src|programs|utils)/.*(h|hpp)$'

-# TODO Let clang-tidy check headers in further directories
-#      --> HeaderFilterRegex: '^.*/(src|base|programs|utils)/.*(h|hpp)$'
-HeaderFilterRegex: '^.*/(base|programs|utils)/.*(h|hpp)$'
+Checks: [
+  '*',

-Checks: '*,
-    -abseil-*,
+  '-abseil-*',

-    -altera-*,
+  '-altera-*',

-    -android-*,
+  '-android-*',

-    -bugprone-assignment-in-if-condition,
-    -bugprone-branch-clone,
-    -bugprone-easily-swappable-parameters,
-    -bugprone-exception-escape,
-    -bugprone-implicit-widening-of-multiplication-result,
-    -bugprone-narrowing-conversions,
-    -bugprone-not-null-terminated-result,
-    -bugprone-reserved-identifier, # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
-    -bugprone-unchecked-optional-access,
+  '-bugprone-assignment-in-if-condition',
+  '-bugprone-branch-clone',
+  '-bugprone-easily-swappable-parameters',
+  '-bugprone-exception-escape',
+  '-bugprone-forward-declaration-namespace',
+  '-bugprone-implicit-widening-of-multiplication-result',
+  '-bugprone-narrowing-conversions',
+  '-bugprone-not-null-terminated-result',
+  '-bugprone-reserved-identifier', # useful but too slow, TODO retry when https://reviews.llvm.org/rG1c282052624f9d0bd273bde0b47b30c96699c6c7 is merged
+  '-bugprone-unchecked-optional-access',

-    -cert-dcl16-c,
-    -cert-dcl37-c,
-    -cert-dcl51-cpp,
-    -cert-err58-cpp,
-    -cert-msc32-c,
-    -cert-msc51-cpp,
-    -cert-oop54-cpp,
-    -cert-oop57-cpp,
+  '-cert-dcl16-c',
+  '-cert-dcl37-c',
+  '-cert-dcl51-cpp',
+  '-cert-err58-cpp',
+  '-cert-msc32-c',
+  '-cert-msc51-cpp',
+  '-cert-oop54-cpp',
+  '-cert-oop57-cpp',

-    -clang-analyzer-unix.Malloc,
+  '-clang-analyzer-optin.performance.Padding',

-    -cppcoreguidelines-*, # impractical in a codebase as large as ClickHouse, also slow
+  '-clang-analyzer-unix.Malloc',

-    -darwin-*,
+  '-cppcoreguidelines-*', # impractical in a codebase as large as ClickHouse, also slow

-    -fuchsia-*,
+  '-darwin-*',

-    -google-build-using-namespace,
-    -google-readability-braces-around-statements,
-    -google-readability-casting,
-    -google-readability-function-size,
-    -google-readability-namespace-comments,
-    -google-readability-todo,
+  '-fuchsia-*',

-    -hicpp-avoid-c-arrays,
-    -hicpp-avoid-goto,
-    -hicpp-braces-around-statements,
-    -hicpp-explicit-conversions,
-    -hicpp-function-size,
-    -hicpp-member-init,
-    -hicpp-move-const-arg,
-    -hicpp-multiway-paths-covered,
-    -hicpp-named-parameter,
-    -hicpp-no-array-decay,
-    -hicpp-no-assembler,
-    -hicpp-no-malloc,
-    -hicpp-signed-bitwise,
-    -hicpp-special-member-functions,
-    -hicpp-uppercase-literal-suffix,
-    -hicpp-use-auto,
-    -hicpp-use-emplace,
-    -hicpp-vararg,
+  '-google-build-using-namespace',
+  '-google-readability-braces-around-statements',
+  '-google-readability-casting',
+  '-google-readability-function-size',
+  '-google-readability-namespace-comments',
+  '-google-readability-todo',

-    -linuxkernel-*,
+  '-hicpp-avoid-c-arrays',
+  '-hicpp-avoid-goto',
+  '-hicpp-braces-around-statements',
+  '-hicpp-explicit-conversions',
+  '-hicpp-function-size',
+  '-hicpp-member-init',
+  '-hicpp-move-const-arg',
+  '-hicpp-multiway-paths-covered',
+  '-hicpp-named-parameter',
+  '-hicpp-no-array-decay',
+  '-hicpp-no-assembler',
+  '-hicpp-no-malloc',
+  '-hicpp-signed-bitwise',
+  '-hicpp-special-member-functions',
+  '-hicpp-uppercase-literal-suffix',
+  '-hicpp-use-auto',
+  '-hicpp-use-emplace',
+  '-hicpp-vararg',

-    -llvm-*,
+  '-linuxkernel-*',

-    -llvmlibc-*,
+  '-llvm-*',

-    -openmp-*,
+  '-llvmlibc-*',

-    -misc-const-correctness,
-    -misc-include-cleaner, # useful but far too many occurrences
-    -misc-no-recursion,
-    -misc-non-private-member-variables-in-classes,
-    -misc-confusable-identifiers, # useful but slooow
-    -misc-use-anonymous-namespace,
+  '-openmp-*',

-    -modernize-avoid-c-arrays,
-    -modernize-concat-nested-namespaces,
-    -modernize-macro-to-enum,
-    -modernize-pass-by-value,
-    -modernize-return-braced-init-list,
-    -modernize-use-auto,
-    -modernize-use-default-member-init,
-    -modernize-use-emplace,
-    -modernize-use-nodiscard,
-    -modernize-use-override,
-    -modernize-use-trailing-return-type,
+  '-misc-const-correctness',
+  '-misc-include-cleaner', # useful but far too many occurrences
+  '-misc-no-recursion',
+  '-misc-non-private-member-variables-in-classes',
+  '-misc-confusable-identifiers', # useful but slooo
+  '-misc-use-anonymous-namespace',

-    -performance-inefficient-string-concatenation,
-    -performance-no-int-to-ptr,
-    -performance-avoid-endl,
-    -performance-unnecessary-value-param,
+  '-modernize-avoid-c-arrays',
+  '-modernize-concat-nested-namespaces',
+  '-modernize-macro-to-enum',
+  '-modernize-pass-by-value',
+  '-modernize-return-braced-init-list',
+  '-modernize-use-auto',
+  '-modernize-use-default-member-init',
+  '-modernize-use-emplace',
+  '-modernize-use-nodiscard',
+  '-modernize-use-override',
+  '-modernize-use-trailing-return-type',

-    -portability-simd-intrinsics,
+  '-performance-inefficient-string-concatenation',
+  '-performance-no-int-to-ptr',
+  '-performance-avoid-endl',
+  '-performance-unnecessary-value-param',

-    -readability-avoid-unconditional-preprocessor-if,
-    -readability-braces-around-statements,
-    -readability-convert-member-functions-to-static,
-    -readability-else-after-return,
-    -readability-function-cognitive-complexity,
-    -readability-function-size,
-    -readability-identifier-length,
-    -readability-identifier-naming, # useful but too slow
-    -readability-implicit-bool-conversion,
-    -readability-isolate-declaration,
-    -readability-magic-numbers,
-    -readability-named-parameter,
-    -readability-redundant-declaration,
-    -readability-simplify-boolean-expr,
-    -readability-static-accessed-through-instance,
-    -readability-suspicious-call-argument,
-    -readability-uppercase-literal-suffix,
-    -readability-use-anyofallof,
+  '-portability-simd-intrinsics',

-    -zircon-*,
-'
+  '-readability-avoid-unconditional-preprocessor-if',
+  '-readability-braces-around-statements',
+  '-readability-convert-member-functions-to-static',
+  '-readability-else-after-return',
+  '-readability-function-cognitive-complexity',
+  '-readability-function-size',
+  '-readability-identifier-length',
+  '-readability-identifier-naming', # useful but too slow
+  '-readability-implicit-bool-conversion',
+  '-readability-isolate-declaration',
+  '-readability-magic-numbers',
+  '-readability-named-parameter',
+  '-readability-redundant-declaration',
+  '-readability-simplify-boolean-expr',
+  '-readability-static-accessed-through-instance',
+  '-readability-suspicious-call-argument',
+  '-readability-uppercase-literal-suffix',
+  '-readability-use-anyofallof',
+
+  '-zircon-*'
+]

 WarningsAsErrors: '*'

--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -45,62 +45,3 @@ jobs:
    with:
      data: "${{ needs.RunConfig.outputs.data }}"
      set_latest: true
-  SonarCloud:
-    runs-on: [self-hosted, builder]
-    env:
-      SONAR_SCANNER_VERSION: 4.8.0.2856
-      SONAR_SERVER_URL: "https://sonarcloud.io"
-      BUILD_WRAPPER_OUT_DIR: build_wrapper_output_directory # Directory where build-wrapper output will be placed
-      CC: clang-17
-      CXX: clang++-17
-    steps:
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          fetch-depth: 0  # Shallow clones should be disabled for a better relevancy of analysis
-          filter: tree:0
-          submodules: true
-      - name: Set up JDK 11
-        uses: actions/setup-java@v1
-        with:
-          java-version: 11
-      - name: Download and set up sonar-scanner
-        env:
-          SONAR_SCANNER_DOWNLOAD_URL: https://binaries.sonarsource.com/Distribution/sonar-scanner-cli/sonar-scanner-cli-${{ env.SONAR_SCANNER_VERSION }}-linux.zip
-        run: |
-          mkdir -p "$HOME/.sonar"
-          curl -sSLo "$HOME/.sonar/sonar-scanner.zip" "${{ env.SONAR_SCANNER_DOWNLOAD_URL }}"
-          unzip -o "$HOME/.sonar/sonar-scanner.zip" -d "$HOME/.sonar/"
-          echo "$HOME/.sonar/sonar-scanner-${{ env.SONAR_SCANNER_VERSION }}-linux/bin" >> "$GITHUB_PATH"
-      - name: Download and set up build-wrapper
-        env:
-          BUILD_WRAPPER_DOWNLOAD_URL: ${{ env.SONAR_SERVER_URL }}/static/cpp/build-wrapper-linux-x86.zip
-        run: |
-          curl -sSLo "$HOME/.sonar/build-wrapper-linux-x86.zip" "${{ env.BUILD_WRAPPER_DOWNLOAD_URL }}"
-          unzip -o "$HOME/.sonar/build-wrapper-linux-x86.zip" -d "$HOME/.sonar/"
-          echo "$HOME/.sonar/build-wrapper-linux-x86" >> "$GITHUB_PATH"
-      - name: Set Up Build Tools
-        run: |
-          sudo apt-get update
-          sudo apt-get install -yq git cmake ccache ninja-build python3 yasm nasm
-          sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
-      - name: Run build-wrapper
-        run: |
-          mkdir build
-          cd build
-          cmake ..
-          cd ..
-          build-wrapper-linux-x86-64 --out-dir ${{ env.BUILD_WRAPPER_OUT_DIR }} cmake --build build/
-      - name: Run sonar-scanner
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
-        run: |
-          sonar-scanner \
-            --define sonar.host.url="${{ env.SONAR_SERVER_URL }}" \
-            --define sonar.cfamily.build-wrapper-output="${{ env.BUILD_WRAPPER_OUT_DIR }}" \
-            --define sonar.projectKey="ClickHouse_ClickHouse" \
-            --define sonar.organization="clickhouse-java" \
-            --define sonar.cfamily.cpp23.enabled=true \
-            --define sonar.exclusions="**/*.java,**/*.ts,**/*.js,**/*.css,**/*.sql"
--- a/.github/workflows/pull_request.yml
+++ b/.github/workflows/pull_request.yml
@ -172,6 +172,7 @@ jobs:
        run: |
          cd "$GITHUB_WORKSPACE/tests/ci"
          python3 finish_check.py
+          python3 merge_pr.py --check-approved


 #############################################################################################
--- a/.github/workflows/reusable_build.yml
+++ b/.github/workflows/reusable_build.yml
@ -43,8 +43,7 @@ jobs:
    runs-on: [self-hosted, '${{inputs.runner_type}}']
    steps:
      - name: Check out repository code
-        # WIP: temporary try commit with limited perallelization of checkout
-        uses: ClickHouse/checkout@0be3f7b3098bae494d3ef5d29d2e0676fb606232
+        uses: ClickHouse/checkout@v1
        with:
          clear-repository: true
          ref: ${{ fromJson(inputs.data).git_ref }}
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -56,16 +56,21 @@ option(ENABLE_CHECK_HEAVY_BUILDS "Don't allow C++ translation units to compile t
 if (ENABLE_CHECK_HEAVY_BUILDS)
    # set DATA (since RSS does not work since 2.6.x+) to 5G
    set (RLIMIT_DATA 5000000000)
-    # set VIRT (RLIMIT_AS) to 10G (DATA*10)
+    # set VIRT (RLIMIT_AS) to 10G (DATA*2)
    set (RLIMIT_AS 10000000000)
    # set CPU time limit to 1000 seconds
    set (RLIMIT_CPU 1000)

-    # -fsanitize=memory is too heavy
-    if (SANITIZE STREQUAL "memory")
+    # Sanitizers are too heavy
+    if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
       set (RLIMIT_DATA 10000000000) # 10G
    endif()

+    # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file
+    if (ARCH_RISCV64)
+        set (RLIMIT_CPU 1800)
+    endif()
+
    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()

@ -110,11 +115,6 @@ endif()
 # - sanitize.cmake
 add_library(global-libs INTERFACE)

-# We don't want to instrument everything with fuzzer, but only specific targets (see below),
-# also, since we build our own llvm, we specifically don't want to instrument
-# libFuzzer library itself - it would result in infinite recursion
-#include (cmake/fuzzer.cmake)
-
 include (cmake/sanitize.cmake)

 option(ENABLE_COLORED_BUILD "Enable colors in compiler output" ON)
@ -554,7 +554,9 @@ if (ENABLE_RUST)
    endif()
 endif()

-if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO" AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
+if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO"
+    AND NOT SANITIZE AND NOT SANITIZE_COVERAGE AND NOT ENABLE_FUZZING
+    AND OS_LINUX AND (ARCH_AMD64 OR ARCH_AARCH64))
    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT ON)
 else ()
    set(CHECK_LARGE_OBJECT_SIZES_DEFAULT OFF)
@ -577,10 +579,7 @@ if (FUZZER)
            if (NOT(target_type STREQUAL "INTERFACE_LIBRARY" OR target_type STREQUAL "UTILITY"))
                target_compile_options(${target} PRIVATE "-fsanitize=fuzzer-no-link")
            endif()
-            # clickhouse fuzzer isn't working correctly
-            # initial PR https://github.com/ClickHouse/ClickHouse/pull/27526
-            #if (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse")
-            if (target_type STREQUAL "EXECUTABLE" AND target MATCHES ".+_fuzzer")
+            if (target_type STREQUAL "EXECUTABLE" AND (target MATCHES ".+_fuzzer" OR target STREQUAL "clickhouse"))
                message(STATUS "${target} instrumented with fuzzer")
                target_link_libraries(${target} PUBLIC ch_contrib::fuzzer)
                # Add to fuzzers bundle
--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@ -20,6 +20,7 @@ set (SRCS
    getPageSize.cpp
    getThreadId.cpp
    int8_to_string.cpp
+    itoa.cpp
    JSON.cpp
    mremap.cpp
    phdr_cache.cpp
--- a/base/base/IPv4andIPv6.h
+++ b/base/base/IPv4andIPv6.h
@ -1,8 +1,7 @@
 #pragma once

-#include <base/strong_typedef.h>
 #include <base/extended_types.h>
-#include <Common/formatIPv6.h>
+#include <base/strong_typedef.h>
 #include <Common/memcmpSmall.h>

 namespace DB
@ -62,7 +61,8 @@ namespace std
    {
        size_t operator()(const DB::IPv6 & x) const
        {
-            return std::hash<std::string_view>{}(std::string_view(reinterpret_cast<const char*>(&x.toUnderType()), IPV6_BINARY_LENGTH));
+            return std::hash<std::string_view>{}(
+                std::string_view(reinterpret_cast<const char *>(&x.toUnderType()), sizeof(DB::IPv6::UnderlyingType)));
        }
    };

--- a/base/base/coverage.cpp
+++ b/base/base/coverage.cpp
@ -1,7 +1,7 @@
 #include "coverage.h"
 #include <sys/mman.h>

-#pragma GCC diagnostic ignored "-Wreserved-identifier"
+#pragma clang diagnostic ignored "-Wreserved-identifier"


 /// WITH_COVERAGE enables the default implementation of code coverage,
--- a/base/base/defines.h
+++ b/base/base/defines.h
@ -108,16 +108,22 @@
        {
            [[noreturn]] void abortOnFailedAssertion(const String & description);
        }
-        #define chassert(x) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
+        #define chassert_1(x, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(#x); } while (0)
+        #define chassert_2(x, comment, ...) do { static_cast<bool>(x) ? void(0) : ::DB::abortOnFailedAssertion(comment); } while (0)
        #define UNREACHABLE() abort()
        // clang-format off
    #else
        /// Here sizeof() trick is used to suppress unused warning for result,
        /// since simple "(void)x" will evaluate the expression, while
        /// "sizeof(!(x))" will not.
-        #define chassert(x) (void)sizeof(!(x))
+        #define chassert_1(x, ...) (void)sizeof(!(x))
+        #define chassert_2(x, comment, ...) (void)sizeof(!(x))
        #define UNREACHABLE() __builtin_unreachable()
    #endif
+        #define CHASSERT_DISPATCH(_1,_2, N,...) N(_1, _2)
+        #define CHASSERT_INVOKE(tuple) CHASSERT_DISPATCH tuple
+        #define chassert(...) CHASSERT_INVOKE((__VA_ARGS__, chassert_2, chassert_1))
+
 #endif

 /// Macros for Clang Thread Safety Analysis (TSA). They can be safely ignored by other compilers.
--- a/base/base/getMemoryAmount.cpp
+++ b/base/base/getMemoryAmount.cpp
@ -50,9 +50,6 @@ std::optional<uint64_t> getCgroupsV2MemoryLimit()

 }

-/** Returns the size of physical memory (RAM) in bytes.
-  * Returns 0 on unsupported platform
-  */
 uint64_t getMemoryAmountOrZero()
 {
    int64_t num_pages = sysconf(_SC_PHYS_PAGES);
--- a/base/base/getMemoryAmount.h
+++ b/base/base/getMemoryAmount.h
@ -2,11 +2,10 @@

 #include <cstdint>

-/** Returns the size of physical memory (RAM) in bytes.
-  * Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
-  */
+/// Returns the size in bytes of physical memory (RAM) available to the process. The value can
+/// be smaller than the total available RAM available to the system due to cgroups settings.
+/// Returns 0 on unsupported platform or if it cannot determine the size of physical memory.
 uint64_t getMemoryAmountOrZero();

-/** Throws exception if it cannot determine the size of physical memory.
-  */
+/// Throws exception if it cannot determine the size of physical memory.
 uint64_t getMemoryAmount();
--- a/base/base/itoa.cpp
+++ b/base/base/itoa.cpp
@ -0,0 +1,503 @@
+// Based on https://github.com/amdn/itoa and combined with our optimizations
+//
+//=== itoa.cpp - Fast integer to ascii conversion                 --*- C++ -*-//
+//
+// The MIT License (MIT)
+// Copyright (c) 2016 Arturo Martin-de-Nicolas
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+//     The above copyright notice and this permission notice shall be included
+//     in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//===----------------------------------------------------------------------===//
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <type_traits>
+#include <base/defines.h>
+#include <base/extended_types.h>
+#include <base/itoa.h>
+
+namespace
+{
+template <typename T>
+ALWAYS_INLINE inline constexpr T pow10(size_t x)
+{
+    return x ? 10 * pow10<T>(x - 1) : 1;
+}
+
+// Division by a power of 10 is implemented using a multiplicative inverse.
+// This strength reduction is also done by optimizing compilers, but
+// presently the fastest results are produced by using the values
+// for the multiplication and the shift as given by the algorithm
+// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
+//
+// http://www.agner.org/optimize/optimizing_assembly.pdf
+//
+// "Integer division by a constant (all processors)
+// A floating point number can be divided by a constant by multiplying
+// with the reciprocal. If we want to do the same with integers, we have
+// to scale the reciprocal by 2n and then shift the product to the right
+// by n. There are various algorithms for finding a suitable value of n
+// and compensating for rounding errors. The algorithm described below
+// was invented by Terje Mathisen, Norway, and not published elsewhere."
+
+/// Division by constant is performed by:
+/// 1. Adding 1 if needed;
+/// 2. Multiplying by another constant;
+/// 3. Shifting right by another constant.
+template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
+struct Division
+{
+    static constexpr bool add{add_};
+    static constexpr UInt multiplier{multiplier_};
+    static constexpr unsigned shift{shift_};
+};
+
+/// Select a type with appropriate number of bytes from the list of types.
+/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
+/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
+template <size_t N, typename T, typename... Ts>
+struct SelectType
+{
+    using Result = typename SelectType<N / 2, Ts...>::Result;
+};
+
+template <typename T, typename... Ts>
+struct SelectType<1, T, Ts...>
+{
+    using Result = T;
+};
+
+
+/// Division by 10^N where N is the size of the type.
+template <size_t N>
+using DivisionBy10PowN = typename SelectType<
+    N,
+    Division<uint8_t, false, 205U, 11>, /// divide by 10
+    Division<uint16_t, true, 41943U, 22>, /// divide by 100
+    Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
+    Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
+    >::Result;
+
+template <size_t N>
+using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
+
+/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
+template <size_t N>
+struct QuotientAndRemainder
+{
+    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
+    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
+};
+
+template <size_t N>
+QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
+{
+    constexpr DivisionBy10PowN<N> division;
+
+    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
+    UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
+
+    return {quotient, remainder};
+}
+
+ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
+{
+    *p = '0' + value;
+    ++p;
+    return p;
+}
+
+// Using a lookup table to convert binary numbers from 0 to 99
+// into ascii characters as described by Andrei Alexandrescu in
+// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
+
+const char digits[201] = "00010203040506070809"
+                         "10111213141516171819"
+                         "20212223242526272829"
+                         "30313233343536373839"
+                         "40414243444546474849"
+                         "50515253545556575859"
+                         "60616263646566676869"
+                         "70717273747576777879"
+                         "80818283848586878889"
+                         "90919293949596979899";
+
+ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
+{
+    memcpy(p, &digits[value * 2], 2);
+    p += 2;
+    return p;
+}
+
+namespace convert
+{
+template <typename UInt, size_t N = sizeof(UInt)>
+char * head(char * p, UInt u);
+template <typename UInt, size_t N = sizeof(UInt)>
+char * tail(char * p, UInt u);
+
+//===----------------------------------------------------------===//
+//     head: find most significant digit, skip leading zeros
+//===----------------------------------------------------------===//
+
+// "x" contains quotient and remainder after division by 10^N
+// quotient is less than 10^N
+template <size_t N>
+ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
+{
+    p = head(p, UnsignedOfSize<N / 2>(x.quotient));
+    p = tail(p, x.remainder);
+    return p;
+}
+
+// "u" is less than 10^2*N
+template <typename UInt, size_t N>
+ALWAYS_INLINE inline char * head(char * p, UInt u)
+{
+    return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
+}
+
+// recursion base case, selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
+}
+
+//===----------------------------------------------------------===//
+//     tail: produce all digits including leading zeros
+//===----------------------------------------------------------===//
+
+// recursive step, "u" is less than 10^2*N
+template <typename UInt, size_t N>
+ALWAYS_INLINE inline char * tail(char * p, UInt u)
+{
+    QuotientAndRemainder<N> x = split<N>(u);
+    p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
+    p = tail(p, x.remainder);
+    return p;
+}
+
+// recursion base case, selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    return outTwoDigits(p, u);
+}
+
+//===----------------------------------------------------------===//
+// large values are >= 10^2*N
+// where x contains quotient and remainder after division by 10^N
+//===----------------------------------------------------------===//
+template <size_t N>
+ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
+{
+    QuotientAndRemainder<N> y = split<N>(x.quotient);
+    p = head(p, UnsignedOfSize<N / 2>(y.quotient));
+    p = tail(p, y.remainder);
+    p = tail(p, x.remainder);
+    return p;
+}
+
+//===----------------------------------------------------------===//
+// handle values of "u" that might be >= 10^2*N
+// where N is the size of "u" in bytes
+//===----------------------------------------------------------===//
+template <typename UInt, size_t N = sizeof(UInt)>
+ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
+{
+    if (u < pow10<UnsignedOfSize<N>>(N))
+        return head(p, UnsignedOfSize<N / 2>(u));
+    QuotientAndRemainder<N> x = split<N>(u);
+
+    return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
+}
+
+// selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    if (u < 10)
+        return outDigit(p, u);
+    else if (u < 100)
+        return outTwoDigits(p, u);
+    else
+    {
+        p = outDigit(p, u / 100);
+        p = outTwoDigits(p, u % 100);
+        return p;
+    }
+}
+
+//===----------------------------------------------------------===//
+//     handle unsigned and signed integral operands
+//===----------------------------------------------------------===//
+
+// itoa: handle unsigned integral operands (selected by SFINAE)
+template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
+ALWAYS_INLINE inline char * itoa(U u, char * p)
+{
+    return convert::uitoa(p, u);
+}
+
+// itoa: handle signed integral operands (selected by SFINAE)
+template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
+ALWAYS_INLINE inline char * itoa(I i, char * p)
+{
+    // Need "mask" to be filled with a copy of the sign bit.
+    // If "i" is a negative value, then the result of "operator >>"
+    // is implementation-defined, though usually it is an arithmetic
+    // right shift that replicates the sign bit.
+    // Use a conditional expression to be portable,
+    // a good optimizing compiler generates an arithmetic right shift
+    // and avoids the conditional branch.
+    UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
+    // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
+    // Cannot use std::abs() because the result is undefined
+    // in 2's complement systems for the most-negative value.
+    // Want to avoid conditional branch for performance reasons since
+    // CPU branch prediction will be ineffective when negative values
+    // occur randomly.
+    // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
+    // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
+    // This yields the absolute value with the desired type without
+    // using a conditional branch and without invoking undefined or
+    // implementation defined behavior:
+    UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
+    // Unconditionally store a minus sign when producing digits
+    // in a forward direction and increment the pointer only if
+    // the value is in fact negative.
+    // This avoids a conditional branch and is safe because we will
+    // always produce at least one digit and it will overwrite the
+    // minus sign when the value is not negative.
+    *p = '-';
+    p += (mask & 1);
+    p = convert::uitoa(p, u);
+    return p;
+}
+}
+
+const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull;
+const int max_multiple_of_hundred_blocks = 9;
+static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0);
+
+ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
+{
+    /// If we the highest 64bit item is empty, we can print just the lowest item as u64
+    if (_x.items[UInt128::_impl::little(1)] == 0)
+        return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
+
+    /// Doing operations using __int128 is faster and we already rely on this feature
+    using T = unsigned __int128;
+    T x = (T(_x.items[UInt128::_impl::little(1)]) << 64) + T(_x.items[UInt128::_impl::little(0)]);
+
+    /// We are going to accumulate blocks of 2 digits to print until the number is small enough to be printed as u64
+    /// To do this we could do: x / 100, x % 100
+    /// But these would mean doing many iterations with long integers, so instead we divide by a much longer integer
+    /// multiple of 100 (100^9) and then get the blocks out of it (as u64)
+    /// Once we reach u64::max we can stop and use the fast method to print that in the front
+    static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
+    static const T largest_uint64 = std::numeric_limits<uint64_t>::max();
+    uint8_t two_values[20] = {0}; // 39 Max characters / 2
+
+    int current_block = 0;
+    while (x > largest_uint64)
+    {
+        uint64_t u64_remainder = uint64_t(x % large_divisor);
+        x /= large_divisor;
+
+        int pos = current_block;
+        while (u64_remainder)
+        {
+            two_values[pos] = uint8_t(u64_remainder % 100);
+            pos++;
+            u64_remainder /= 100;
+        }
+        current_block += max_multiple_of_hundred_blocks;
+    }
+
+    char * highest_part_print = convert::itoa(uint64_t(x), p);
+    for (int i = 0; i < current_block; i++)
+    {
+        outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
+        highest_part_print += 2;
+    }
+
+    return highest_part_print;
+}
+
+ALWAYS_INLINE inline char * writeUIntText(UInt256 _x, char * p)
+{
+    /// If possible, treat it as a smaller integer as they are much faster to print
+    if (_x.items[UInt256::_impl::little(3)] == 0 && _x.items[UInt256::_impl::little(2)] == 0)
+        return writeUIntText(UInt128{_x.items[UInt256::_impl::little(0)], _x.items[UInt256::_impl::little(1)]}, p);
+
+    /// If available (x86) we transform from our custom class to _BitInt(256) which has better support in the compiler
+    /// and produces better code
+    using T =
+#if defined(__x86_64__)
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wbit-int-extension"
+        unsigned _BitInt(256)
+#    pragma clang diagnostic pop
+#else
+        UInt256
+#endif
+        ;
+
+#if defined(__x86_64__)
+    T x = (T(_x.items[UInt256::_impl::little(3)]) << 192) + (T(_x.items[UInt256::_impl::little(2)]) << 128)
+        + (T(_x.items[UInt256::_impl::little(1)]) << 64) + T(_x.items[UInt256::_impl::little(0)]);
+#else
+    T x = _x;
+#endif
+
+    /// Similar to writeUIntText(UInt128) only that in this case we will stop as soon as we reach the largest u128
+    /// and switch to that function
+    uint8_t two_values[39] = {0}; // 78 Max characters / 2
+    int current_pos = 0;
+
+    static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
+    static const T largest_uint128 = T(std::numeric_limits<uint64_t>::max()) << 64 | T(std::numeric_limits<uint64_t>::max());
+
+    while (x > largest_uint128)
+    {
+        uint64_t u64_remainder = uint64_t(x % large_divisor);
+        x /= large_divisor;
+
+        int pos = current_pos;
+        while (u64_remainder)
+        {
+            two_values[pos] = uint8_t(u64_remainder % 100);
+            pos++;
+            u64_remainder /= 100;
+        }
+        current_pos += max_multiple_of_hundred_blocks;
+    }
+
+#if defined(__x86_64__)
+    UInt128 pending{uint64_t(x), uint64_t(x >> 64)};
+#else
+    UInt128 pending{x.items[UInt256::_impl::little(0)], x.items[UInt256::_impl::little(1)]};
+#endif
+
+    char * highest_part_print = writeUIntText(pending, p);
+    for (int i = 0; i < current_pos; i++)
+    {
+        outTwoDigits(highest_part_print, two_values[current_pos - 1 - i]);
+        highest_part_print += 2;
+    }
+
+    return highest_part_print;
+}
+
+ALWAYS_INLINE inline char * writeLeadingMinus(char * pos)
+{
+    *pos = '-';
+    return pos + 1;
+}
+
+template <typename T>
+ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
+{
+    static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
+
+    using UnsignedT = make_unsigned_t<T>;
+    static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
+
+    if (unlikely(x == min_int))
+    {
+        if constexpr (std::is_same_v<T, Int128>)
+        {
+            const char * res = "-170141183460469231731687303715884105728";
+            memcpy(pos, res, strlen(res));
+            return pos + strlen(res);
+        }
+        else if constexpr (std::is_same_v<T, Int256>)
+        {
+            const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
+            memcpy(pos, res, strlen(res));
+            return pos + strlen(res);
+        }
+    }
+
+    if (x < 0)
+    {
+        x = -x;
+        pos = writeLeadingMinus(pos);
+    }
+    return writeUIntText(UnsignedT(x), pos);
+}
+}
+
+char * itoa(UInt8 i, char * p)
+{
+    return convert::itoa(uint8_t(i), p);
+}
+
+char * itoa(Int8 i, char * p)
+{
+    return convert::itoa(int8_t(i), p);
+}
+
+char * itoa(UInt128 i, char * p)
+{
+    return writeUIntText(i, p);
+}
+
+char * itoa(Int128 i, char * p)
+{
+    return writeSIntText(i, p);
+}
+
+char * itoa(UInt256 i, char * p)
+{
+    return writeUIntText(i, p);
+}
+
+char * itoa(Int256 i, char * p)
+{
+    return writeSIntText(i, p);
+}
+
+#define DEFAULT_ITOA(T) \
+    char * itoa(T i, char * p) \
+    { \
+        return convert::itoa(i, p); \
+    }
+
+#define FOR_MISSING_INTEGER_TYPES(M) \
+    M(uint8_t) \
+    M(UInt16) \
+    M(UInt32) \
+    M(UInt64) \
+    M(int8_t) \
+    M(Int16) \
+    M(Int32) \
+    M(Int64)
+
+FOR_MISSING_INTEGER_TYPES(DEFAULT_ITOA)
+
+#if defined(OS_DARWIN)
+DEFAULT_ITOA(unsigned long)
+DEFAULT_ITOA(long)
+#endif
+
+#undef FOR_MISSING_INTEGER_TYPES
+#undef DEFAULT_ITOA
--- a/base/base/itoa.h
+++ b/base/base/itoa.h
@ -1,446 +1,30 @@
 #pragma once

-// Based on https://github.com/amdn/itoa and combined with our optimizations
-//
-//=== itoa.h - Fast integer to ascii conversion                   --*- C++ -*-//
-//
-// The MIT License (MIT)
-// Copyright (c) 2016 Arturo Martin-de-Nicolas
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-//     The above copyright notice and this permission notice shall be included
-//     in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//===----------------------------------------------------------------------===//
-
-#include <cstdint>
-#include <cstddef>
-#include <cstring>
-#include <type_traits>
 #include <base/extended_types.h>

+#define FOR_INTEGER_TYPES(M) \
+    M(uint8_t) \
+    M(UInt8) \
+    M(UInt16) \
+    M(UInt32) \
+    M(UInt64) \
+    M(UInt128) \
+    M(UInt256) \
+    M(int8_t) \
+    M(Int8) \
+    M(Int16) \
+    M(Int32) \
+    M(Int64) \
+    M(Int128) \
+    M(Int256)

-template <typename T>
-inline int digits10(T x)
-{
-    if (x < 10ULL)
-        return 1;
-    if (x < 100ULL)
-        return 2;
-    if (x < 1000ULL)
-        return 3;
+#define INSTANTIATION(T) char * itoa(T i, char * p);
+FOR_INTEGER_TYPES(INSTANTIATION)

-    if (x < 1000000000000ULL)
-    {
-        if (x < 100000000ULL)
-        {
-            if (x < 1000000ULL)
-            {
-                if (x < 10000ULL)
-                    return 4;
-                else
-                    return 5 + (x >= 100000ULL);
-            }
+#if defined(OS_DARWIN)
+INSTANTIATION(unsigned long)
+INSTANTIATION(long)
+#endif

-            return 7 + (x >= 10000000ULL);
-        }
-
-        if (x < 10000000000ULL)
-            return 9 + (x >= 1000000000ULL);
-
-        return 11 + (x >= 100000000000ULL);
-    }
-
-    return 12 + digits10(x / 1000000000000ULL);
-}
-
-
-namespace impl
-{
-
-template <typename T>
-static constexpr T pow10(size_t x)
-{
-    return x ? 10 * pow10<T>(x - 1) : 1;
-}
-
-// Division by a power of 10 is implemented using a multiplicative inverse.
-// This strength reduction is also done by optimizing compilers, but
-// presently the fastest results are produced by using the values
-// for the multiplication and the shift as given by the algorithm
-// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
-//
-// http://www.agner.org/optimize/optimizing_assembly.pdf
-//
-// "Integer division by a constant (all processors)
-// A floating point number can be divided by a constant by multiplying
-// with the reciprocal. If we want to do the same with integers, we have
-// to scale the reciprocal by 2n and then shift the product to the right
-// by n. There are various algorithms for finding a suitable value of n
-// and compensating for rounding errors. The algorithm described below
-// was invented by Terje Mathisen, Norway, and not published elsewhere."
-
-/// Division by constant is performed by:
-/// 1. Adding 1 if needed;
-/// 2. Multiplying by another constant;
-/// 3. Shifting right by another constant.
-template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
-struct Division
-{
-    static constexpr bool add{add_};
-    static constexpr UInt multiplier{multiplier_};
-    static constexpr unsigned shift{shift_};
-};
-
-/// Select a type with appropriate number of bytes from the list of types.
-/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
-/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
-template <size_t N, typename T, typename... Ts>
-struct SelectType
-{
-    using Result = typename SelectType<N / 2, Ts...>::Result;
-};
-
-template <typename T, typename... Ts>
-struct SelectType<1, T, Ts...>
-{
-    using Result = T;
-};
-
-
-/// Division by 10^N where N is the size of the type.
-template <size_t N>
-using DivisionBy10PowN = typename SelectType
-<
-    N,
-    Division<uint8_t, false, 205U, 11>,                           /// divide by 10
-    Division<uint16_t, true, 41943U, 22>,                         /// divide by 100
-    Division<uint32_t, false, 3518437209U, 45>,                   /// divide by 10000
-    Division<uint64_t, false, 12379400392853802749ULL, 90>        /// divide by 100000000
->::Result;
-
-template <size_t N>
-using UnsignedOfSize = typename SelectType
-<
-    N,
-    uint8_t,
-    uint16_t,
-    uint32_t,
-    uint64_t,
-    __uint128_t
->::Result;
-
-/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
-template <size_t N>
-struct QuotientAndRemainder
-{
-    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
-    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
-};
-
-template <size_t N>
-QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
-{
-    constexpr DivisionBy10PowN<N> division;
-
-    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
-    UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
-
-    return {quotient, remainder};
-}
-
-
-static inline char * outDigit(char * p, uint8_t value)
-{
-    *p = '0' + value;
-    ++p;
-    return p;
-}
-
-// Using a lookup table to convert binary numbers from 0 to 99
-// into ascii characters as described by Andrei Alexandrescu in
-// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
-
-static const char digits[201] = "00010203040506070809"
-                                "10111213141516171819"
-                                "20212223242526272829"
-                                "30313233343536373839"
-                                "40414243444546474849"
-                                "50515253545556575859"
-                                "60616263646566676869"
-                                "70717273747576777879"
-                                "80818283848586878889"
-                                "90919293949596979899";
-
-static inline char * outTwoDigits(char * p, uint8_t value)
-{
-    memcpy(p, &digits[value * 2], 2);
-    p += 2;
-    return p;
-}
-
-
-namespace convert
-{
-    template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
-    template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
-
-    //===----------------------------------------------------------===//
-    //     head: find most significant digit, skip leading zeros
-    //===----------------------------------------------------------===//
-
-    // "x" contains quotient and remainder after division by 10^N
-    // quotient is less than 10^N
-    template <size_t N>
-    static inline char * head(char * p, QuotientAndRemainder<N> x)
-    {
-        p = head(p, UnsignedOfSize<N / 2>(x.quotient));
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    // "u" is less than 10^2*N
-    template <typename UInt, size_t N>
-    static inline char * head(char * p, UInt u)
-    {
-        return u < pow10<UnsignedOfSize<N>>(N)
-            ? head(p, UnsignedOfSize<N / 2>(u))
-            : head<N>(p, split<N>(u));
-    }
-
-    // recursion base case, selected when "u" is one byte
-    template <>
-    inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        return u < 10
-            ? outDigit(p, u)
-            : outTwoDigits(p, u);
-    }
-
-    //===----------------------------------------------------------===//
-    //     tail: produce all digits including leading zeros
-    //===----------------------------------------------------------===//
-
-    // recursive step, "u" is less than 10^2*N
-    template <typename UInt, size_t N>
-    static inline char * tail(char * p, UInt u)
-    {
-        QuotientAndRemainder<N> x = split<N>(u);
-        p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    // recursion base case, selected when "u" is one byte
-    template <>
-    inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        return outTwoDigits(p, u);
-    }
-
-    //===----------------------------------------------------------===//
-    // large values are >= 10^2*N
-    // where x contains quotient and remainder after division by 10^N
-    //===----------------------------------------------------------===//
-
-    template <size_t N>
-    static inline char * large(char * p, QuotientAndRemainder<N> x)
-    {
-        QuotientAndRemainder<N> y = split<N>(x.quotient);
-        p = head(p, UnsignedOfSize<N / 2>(y.quotient));
-        p = tail(p, y.remainder);
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    //===----------------------------------------------------------===//
-    // handle values of "u" that might be >= 10^2*N
-    // where N is the size of "u" in bytes
-    //===----------------------------------------------------------===//
-
-    template <typename UInt, size_t N = sizeof(UInt)>
-    static inline char * uitoa(char * p, UInt u)
-    {
-        if (u < pow10<UnsignedOfSize<N>>(N))
-            return head(p, UnsignedOfSize<N / 2>(u));
-        QuotientAndRemainder<N> x = split<N>(u);
-
-        return u < pow10<UnsignedOfSize<N>>(2 * N)
-            ? head<N>(p, x)
-            : large<N>(p, x);
-    }
-
-    // selected when "u" is one byte
-    template <>
-    inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        if (u < 10)
-            return outDigit(p, u);
-        else if (u < 100)
-            return outTwoDigits(p, u);
-        else
-        {
-            p = outDigit(p, u / 100);
-            p = outTwoDigits(p, u % 100);
-            return p;
-        }
-    }
-
-    //===----------------------------------------------------------===//
-    //     handle unsigned and signed integral operands
-    //===----------------------------------------------------------===//
-
-    // itoa: handle unsigned integral operands (selected by SFINAE)
-    template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
-    static inline char * itoa(U u, char * p)
-    {
-        return convert::uitoa(p, u);
-    }
-
-    // itoa: handle signed integral operands (selected by SFINAE)
-    template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
-    static inline char * itoa(I i, char * p)
-    {
-        // Need "mask" to be filled with a copy of the sign bit.
-        // If "i" is a negative value, then the result of "operator >>"
-        // is implementation-defined, though usually it is an arithmetic
-        // right shift that replicates the sign bit.
-        // Use a conditional expression to be portable,
-        // a good optimizing compiler generates an arithmetic right shift
-        // and avoids the conditional branch.
-        UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
-        // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
-        // Cannot use std::abs() because the result is undefined
-        // in 2's complement systems for the most-negative value.
-        // Want to avoid conditional branch for performance reasons since
-        // CPU branch prediction will be ineffective when negative values
-        // occur randomly.
-        // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
-        // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
-        // This yields the absolute value with the desired type without
-        // using a conditional branch and without invoking undefined or
-        // implementation defined behavior:
-        UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
-        // Unconditionally store a minus sign when producing digits
-        // in a forward direction and increment the pointer only if
-        // the value is in fact negative.
-        // This avoids a conditional branch and is safe because we will
-        // always produce at least one digit and it will overwrite the
-        // minus sign when the value is not negative.
-        *p = '-';
-        p += (mask & 1);
-        p = convert::uitoa(p, u);
-        return p;
-    }
-}
-
-
-template <typename T>
-static inline char * writeUIntText(T x, char * p)
-{
-    static_assert(is_unsigned_v<T>);
-
-    int len = digits10(x);
-    auto * pp = p + len;
-    while (x >= 100)
-    {
-        const auto i = x % 100;
-        x /= 100;
-        pp -= 2;
-        outTwoDigits(pp, i);
-    }
-    if (x < 10)
-        *p = '0' + x;
-    else
-        outTwoDigits(p, x);
-    return p + len;
-}
-
-static inline char * writeLeadingMinus(char * pos)
-{
-    *pos = '-';
-    return pos + 1;
-}
-
-template <typename T>
-static inline char * writeSIntText(T x, char * pos)
-{
-    static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
-
-    using UnsignedT = make_unsigned_t<T>;
-    static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
-
-    if (unlikely(x == min_int))
-    {
-        if constexpr (std::is_same_v<T, Int128>)
-        {
-            const char * res = "-170141183460469231731687303715884105728";
-            memcpy(pos, res, strlen(res));
-            return pos + strlen(res);
-        }
-        else if constexpr (std::is_same_v<T, Int256>)
-        {
-            const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
-            memcpy(pos, res, strlen(res));
-            return pos + strlen(res);
-        }
-    }
-
-    if (x < 0)
-    {
-        x = -x;
-        pos = writeLeadingMinus(pos);
-    }
-    return writeUIntText(UnsignedT(x), pos);
-}
-
-}
-
-template <typename I>
-char * itoa(I i, char * p)
-{
-    return impl::convert::itoa(i, p);
-}
-
-template <>
-inline char * itoa(char8_t i, char * p)
-{
-    return impl::convert::itoa(uint8_t(i), p);
-}
-
-template <>
-inline char * itoa(UInt128 i, char * p)
-{
-    return impl::writeUIntText(i, p);
-}
-
-template <>
-inline char * itoa(Int128 i, char * p)
-{
-    return impl::writeSIntText(i, p);
-}
-
-template <>
-inline char * itoa(UInt256 i, char * p)
-{
-    return impl::writeUIntText(i, p);
-}
-
-template <>
-inline char * itoa(Int256 i, char * p)
-{
-    return impl::writeSIntText(i, p);
-}
+#undef FOR_INTEGER_TYPES
+#undef INSTANTIATION
--- a/base/base/sort.h
+++ b/base/base/sort.h
@ -59,8 +59,8 @@ using ComparatorWrapper = Comparator;

 #endif

-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wold-style-cast"
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wold-style-cast"

 #include <miniselect/floyd_rivest_select.h>

@ -115,7 +115,7 @@ void partial_sort(RandomIt first, RandomIt middle, RandomIt last)
    ::partial_sort(first, middle, last, comparator());
 }

-#pragma GCC diagnostic pop
+#pragma clang diagnostic pop

 template <typename RandomIt, typename Compare>
 void sort(RandomIt first, RandomIt last, Compare compare)
--- a/base/poco/Foundation/include/Poco/FPEnvironment_SUN.h
+++ b/base/poco/Foundation/include/Poco/FPEnvironment_SUN.h
@ -0,0 +1,75 @@
+//
+// FPEnvironment_SUN.h
+//
+// Library: Foundation
+// Package: Core
+// Module:  FPEnvironment
+//
+// Definitions of class FPEnvironmentImpl for Solaris.
+//
+// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
+// and Contributors.
+//
+// SPDX-License-Identifier:	BSL-1.0
+//
+
+
+#ifndef Foundation_FPEnvironment_SUN_INCLUDED
+#define Foundation_FPEnvironment_SUN_INCLUDED
+
+
+#include <ieeefp.h>
+#include "Poco/Foundation.h"
+
+
+namespace Poco
+{
+
+
+class FPEnvironmentImpl
+{
+protected:
+    enum RoundingModeImpl
+    {
+        FP_ROUND_DOWNWARD_IMPL = FP_RM,
+        FP_ROUND_UPWARD_IMPL = FP_RP,
+        FP_ROUND_TONEAREST_IMPL = FP_RN,
+        FP_ROUND_TOWARDZERO_IMPL = FP_RZ
+    };
+    enum FlagImpl
+    {
+        FP_DIVIDE_BY_ZERO_IMPL = FP_X_DZ,
+        FP_INEXACT_IMPL = FP_X_IMP,
+        FP_OVERFLOW_IMPL = FP_X_OFL,
+        FP_UNDERFLOW_IMPL = FP_X_UFL,
+        FP_INVALID_IMPL = FP_X_INV
+    };
+    FPEnvironmentImpl();
+    FPEnvironmentImpl(const FPEnvironmentImpl & env);
+    ~FPEnvironmentImpl();
+    FPEnvironmentImpl & operator=(const FPEnvironmentImpl & env);
+    void keepCurrentImpl();
+    static void clearFlagsImpl();
+    static bool isFlagImpl(FlagImpl flag);
+    static void setRoundingModeImpl(RoundingModeImpl mode);
+    static RoundingModeImpl getRoundingModeImpl();
+    static bool isInfiniteImpl(float value);
+    static bool isInfiniteImpl(double value);
+    static bool isInfiniteImpl(long double value);
+    static bool isNaNImpl(float value);
+    static bool isNaNImpl(double value);
+    static bool isNaNImpl(long double value);
+    static float copySignImpl(float target, float source);
+    static double copySignImpl(double target, double source);
+    static long double copySignImpl(long double target, long double source);
+
+private:
+    fp_rnd _rnd;
+    fp_except _exc;
+};
+
+
+} // namespace Poco
+
+
+#endif // Foundation_FPEnvironment_SUN_INCLUDED
--- a/base/poco/Foundation/src/FPEnvironment_SUN.cpp
+++ b/base/poco/Foundation/src/FPEnvironment_SUN.cpp
@ -0,0 +1,139 @@
+//
+// FPEnvironment_SUN.cpp
+//
+// Library: Foundation
+// Package: Core
+// Module:  FPEnvironment
+//
+// Copyright (c) 2005-2006, Applied Informatics Software Engineering GmbH.
+// and Contributors.
+//
+// SPDX-License-Identifier:	BSL-1.0
+//
+
+
+#include <math.h>
+#include "Poco/FPEnvironment_SUN.h"
+
+
+namespace Poco {
+
+
+FPEnvironmentImpl::FPEnvironmentImpl()
+{
+	_rnd = fpgetround();
+	_exc = fpgetmask();
+}
+
+
+FPEnvironmentImpl::FPEnvironmentImpl(const FPEnvironmentImpl& env)
+{
+	_rnd = env._rnd;
+	_exc = env._exc;
+}
+
+
+FPEnvironmentImpl::~FPEnvironmentImpl()
+{
+	fpsetround(_rnd);
+	fpsetmask(_exc);
+}
+
+
+FPEnvironmentImpl& FPEnvironmentImpl::operator = (const FPEnvironmentImpl& env)
+{
+	_rnd = env._rnd;
+	_exc = env._exc;
+	return *this;
+}
+
+
+bool FPEnvironmentImpl::isInfiniteImpl(float value)
+{
+	int cls = fpclass(value);
+	return cls == FP_PINF || cls == FP_NINF;
+}
+
+
+bool FPEnvironmentImpl::isInfiniteImpl(double value)
+{
+	int cls = fpclass(value);
+	return cls == FP_PINF || cls == FP_NINF;
+}
+
+
+bool FPEnvironmentImpl::isInfiniteImpl(long double value)
+{
+	int cls = fpclass(value);
+	return cls == FP_PINF || cls == FP_NINF;
+}
+
+
+bool FPEnvironmentImpl::isNaNImpl(float value)
+{
+	return isnanf(value) != 0;
+}
+
+
+bool FPEnvironmentImpl::isNaNImpl(double value)
+{
+	return isnan(value) != 0;
+}
+
+
+bool FPEnvironmentImpl::isNaNImpl(long double value)
+{
+	return isnan((double) value) != 0;
+}
+
+
+float FPEnvironmentImpl::copySignImpl(float target, float source)
+{
+	return (float) copysign(target, source);
+}
+
+
+double FPEnvironmentImpl::copySignImpl(double target, double source)
+{
+	return (float) copysign(target, source);
+}
+
+
+long double FPEnvironmentImpl::copySignImpl(long double target, long double source)
+{
+	return (source > 0 && target > 0) || (source < 0 && target < 0) ? target : -target;
+}
+
+
+void FPEnvironmentImpl::keepCurrentImpl()
+{
+	fpsetround(_rnd);
+	fpsetmask(_exc);
+}
+
+
+void FPEnvironmentImpl::clearFlagsImpl()
+{
+	fpsetsticky(0);
+}
+
+
+bool FPEnvironmentImpl::isFlagImpl(FlagImpl flag)
+{
+	return (fpgetsticky() & flag) != 0;
+}
+
+
+void FPEnvironmentImpl::setRoundingModeImpl(RoundingModeImpl mode)
+{
+	fpsetround((fp_rnd) mode);
+}
+
+
+FPEnvironmentImpl::RoundingModeImpl FPEnvironmentImpl::getRoundingModeImpl()
+{
+	return (FPEnvironmentImpl::RoundingModeImpl) fpgetround();
+}
+
+
+} // namespace Poco
--- a/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h
+++ b/base/poco/Net/include/Poco/Net/HTTPRequestHandlerFactory.h
@ -30,7 +30,6 @@ namespace Net


    class HTTPServerRequest;
-    class HTTPServerResponse;
    class HTTPRequestHandler;


--- a/base/poco/Net/src/TCPServerDispatcher.cpp
+++ b/base/poco/Net/src/TCPServerDispatcher.cpp
@ -93,7 +93,7 @@ void TCPServerDispatcher::release()

 void TCPServerDispatcher::run()
 {
-	AutoPtr<TCPServerDispatcher> guard(this, true); // ensure object stays alive
+	AutoPtr<TCPServerDispatcher> guard(this); // ensure object stays alive

 	int idleTime = (int) _pParams->getThreadIdleTime().totalMilliseconds();

@ -149,11 +149,13 @@ void TCPServerDispatcher::enqueue(const StreamSocket& socket)
 		{
 			try
 			{
+                this->duplicate();
 				_threadPool.startWithPriority(_pParams->getThreadPriority(), *this, threadName);
 				++_currentThreads;
 			}
 			catch (Poco::Exception& exc)
 			{
+                this->release();
 				++_refusedConnections;
 				std::cerr << "Got exception while starting thread for connection. Error code: "
 						  << exc.code() << ", message: '" << exc.displayText() << "'" << std::endl;
--- a/cmake/fuzzer.cmake
+++ b/cmake/fuzzer.cmake
@ -1,17 +0,0 @@
-# see ./CMakeLists.txt for variable declaration
-if (FUZZER)
-    if (FUZZER STREQUAL "libfuzzer")
-        # NOTE: Eldar Zaitov decided to name it "libfuzzer" instead of "fuzzer" to keep in mind another possible fuzzer backends.
-        # NOTE: no-link means that all the targets are built with instrumentation for fuzzer, but only some of them
-        # (tests) have entry point for fuzzer and it's not checked.
-        set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
-        set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${SAN_FLAGS} -fsanitize=fuzzer-no-link -DFUZZER=1")
-
-        # NOTE: oss-fuzz can change LIB_FUZZING_ENGINE variable
-        if (NOT LIB_FUZZING_ENGINE)
-            set (LIB_FUZZING_ENGINE "-fsanitize=fuzzer")
-        endif ()
-    else ()
-        message (FATAL_ERROR "Unknown fuzzer type: ${FUZZER}")
-    endif ()
-endif()
--- a/contrib/curl
+++ b/contrib/curl
@ -1 +1 @@
-Subproject commit 5ce164e0e9290c96eb7d502173426c0a135ec008
+Subproject commit 1a05e833f8f7140628b27882b10525fd9ec4b873
--- a/contrib/libhdfs3
+++ b/contrib/libhdfs3
@ -1 +1 @@
-Subproject commit b9598e6016720a7c088bfe85ce1fa0410f9d2103
+Subproject commit 0d04201c45359f0d0701fb1e8297d25eff7cfecf
--- a/contrib/libmetrohash/src/metrohash128.h
+++ b/contrib/libmetrohash/src/metrohash128.h
@ -17,6 +17,8 @@
 #ifndef METROHASH_METROHASH_128_H
 #define METROHASH_METROHASH_128_H

+// NOLINTBEGIN(readability-avoid-const-params-in-decls)
+
 #include <stdint.h>

 class MetroHash128
@ -68,5 +70,6 @@ private:
 void metrohash128_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);
 void metrohash128_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * out);

+// NOLINTEND(readability-avoid-const-params-in-decls)

 #endif // #ifndef METROHASH_METROHASH_128_H
--- a/contrib/libmetrohash/src/metrohash64.cpp
+++ b/contrib/libmetrohash/src/metrohash64.cpp
@ -26,13 +26,13 @@ const uint8_t MetroHash64::test_seed_1[8] =   { 0x3B, 0x0D, 0x48, 0x1C, 0xF4, 0x



-MetroHash64::MetroHash64(const uint64_t seed)
+MetroHash64::MetroHash64(uint64_t seed)
 {
    Initialize(seed);
 }


-void MetroHash64::Initialize(const uint64_t seed)
+void MetroHash64::Initialize(uint64_t seed)
 {
    vseed = (static_cast<uint64_t>(seed) + k2) * k0;

@ -47,7 +47,7 @@ void MetroHash64::Initialize(const uint64_t seed)
 }


-void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
+void MetroHash64::Update(const uint8_t * const buffer, uint64_t length)
 {
    const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
    const uint8_t * const end = ptr + length;
@ -62,7 +62,7 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
        memcpy(input.b + (bytes % 32), ptr, static_cast<size_t>(fill));
        ptr   += fill;
        bytes += fill;
-        
+
        // input buffer is still partially filled
        if ((bytes % 32) != 0) return;

@ -72,7 +72,7 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
        state.v[2] += read_u64(&input.b[16]) * k2; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
        state.v[3] += read_u64(&input.b[24]) * k3; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
    }
-    
+
    // bulk update
    bytes += static_cast<uint64_t>(end - ptr);
    while (ptr <= (end - 32))
@ -83,14 +83,14 @@ void MetroHash64::Update(const uint8_t * const buffer, const uint64_t length)
        state.v[2] += read_u64(ptr) * k2; ptr += 8; state.v[2] = rotate_right(state.v[2],29) + state.v[0];
        state.v[3] += read_u64(ptr) * k3; ptr += 8; state.v[3] = rotate_right(state.v[3],29) + state.v[1];
    }
-    
+
    // store remaining bytes in input buffer
    if (ptr < end)
        memcpy(input.b, ptr, static_cast<size_t>(end - ptr));
 }


-void MetroHash64::Finalize(uint8_t * const hash)
+void MetroHash64::Finalize(uint8_t * hash)
 {
    // finalize bulk loop, if used
    if (bytes >= 32)
@ -102,11 +102,11 @@ void MetroHash64::Finalize(uint8_t * const hash)

        state.v[0] = vseed + (state.v[0] ^ state.v[1]);
    }
-    
+
    // process any bytes remaining in the input buffer
    const uint8_t * ptr = reinterpret_cast<const uint8_t*>(input.b);
    const uint8_t * const end = ptr + (bytes % 32);
-    
+
    if ((end - ptr) >= 16)
    {
        state.v[1]  = state.v[0] + (read_u64(ptr) * k2); ptr += 8; state.v[1] = rotate_right(state.v[1],29) * k3;
@ -139,7 +139,7 @@ void MetroHash64::Finalize(uint8_t * const hash)
        state.v[0] += read_u8 (ptr) * k3;
        state.v[0] ^= rotate_right(state.v[0], 37) * k1;
    }
-    
+
    state.v[0] ^= rotate_right(state.v[0], 28);
    state.v[0] *= k0;
    state.v[0] ^= rotate_right(state.v[0], 29);
@ -152,7 +152,7 @@ void MetroHash64::Finalize(uint8_t * const hash)
 }


-void MetroHash64::Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed)
+void MetroHash64::Hash(const uint8_t * buffer, uint64_t length, uint8_t * const hash, uint64_t seed)
 {
    const uint8_t * ptr = reinterpret_cast<const uint8_t*>(buffer);
    const uint8_t * const end = ptr + length;
@ -238,7 +238,7 @@ bool MetroHash64::ImplementationVerified()

    // verify incremental implementation
    MetroHash64 metro;
-    
+
    metro.Initialize(0);
    metro.Update(reinterpret_cast<const uint8_t *>(MetroHash64::test_string), strlen(MetroHash64::test_string));
    metro.Finalize(hash);
@ -262,9 +262,9 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o

    const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
    const uint8_t * const end = ptr + len;
-    
+
    uint64_t hash = ((static_cast<uint64_t>(seed) + k2) * k0) + len;
-    
+
    if (len >= 32)
    {
        uint64_t v[4];
@ -272,7 +272,7 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
        v[1] = hash;
        v[2] = hash;
        v[3] = hash;
-        
+
        do
        {
            v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
@ -288,7 +288,7 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
        v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0;
        hash += v[0] ^ v[1];
    }
-    
+
    if ((end - ptr) >= 16)
    {
        uint64_t v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1;
@ -297,32 +297,32 @@ void metrohash64_1(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
        v1 ^= rotate_right(v1 * k3, 35) + v0;
        hash += v1;
    }
-    
+
    if ((end - ptr) >= 8)
    {
        hash += read_u64(ptr) * k3; ptr += 8;
        hash ^= rotate_right(hash, 33) * k1;
-        
+
    }
-    
+
    if ((end - ptr) >= 4)
    {
        hash += read_u32(ptr) * k3; ptr += 4;
        hash ^= rotate_right(hash, 15) * k1;
    }
-    
+
    if ((end - ptr) >= 2)
    {
        hash += read_u16(ptr) * k3; ptr += 2;
        hash ^= rotate_right(hash, 13) * k1;
    }
-    
+
    if ((end - ptr) >= 1)
    {
        hash += read_u8 (ptr) * k3;
        hash ^= rotate_right(hash, 25) * k1;
    }
-    
+
    hash ^= rotate_right(hash, 33);
    hash *= k0;
    hash ^= rotate_right(hash, 33);
@ -336,13 +336,13 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
    static const uint64_t k0 = 0xD6D018F5;
    static const uint64_t k1 = 0xA2AA033B;
    static const uint64_t k2 = 0x62992FC1;
-    static const uint64_t k3 = 0x30BC5B29; 
+    static const uint64_t k3 = 0x30BC5B29;

    const uint8_t * ptr = reinterpret_cast<const uint8_t*>(key);
    const uint8_t * const end = ptr + len;
-    
+
    uint64_t hash = ((static_cast<uint64_t>(seed) + k2) * k0) + len;
-    
+
    if (len >= 32)
    {
        uint64_t v[4];
@ -350,7 +350,7 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
        v[1] = hash;
        v[2] = hash;
        v[3] = hash;
-        
+
        do
        {
            v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2];
@ -366,7 +366,7 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
        v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0;
        hash += v[0] ^ v[1];
    }
-    
+
    if ((end - ptr) >= 16)
    {
        uint64_t v0 = hash + (read_u64(ptr) * k2); ptr += 8; v0 = rotate_right(v0,29) * k3;
@ -375,31 +375,31 @@ void metrohash64_2(const uint8_t * key, uint64_t len, uint32_t seed, uint8_t * o
        v1 ^= rotate_right(v1 * k3, 34) + v0;
        hash += v1;
    }
-    
+
    if ((end - ptr) >= 8)
    {
        hash += read_u64(ptr) * k3; ptr += 8;
        hash ^= rotate_right(hash, 36) * k1;
    }
-    
+
    if ((end - ptr) >= 4)
    {
        hash += read_u32(ptr) * k3; ptr += 4;
        hash ^= rotate_right(hash, 15) * k1;
    }
-    
+
    if ((end - ptr) >= 2)
    {
        hash += read_u16(ptr) * k3; ptr += 2;
        hash ^= rotate_right(hash, 15) * k1;
    }
-    
+
    if ((end - ptr) >= 1)
    {
        hash += read_u8 (ptr) * k3;
        hash ^= rotate_right(hash, 23) * k1;
    }
-    
+
    hash ^= rotate_right(hash, 28);
    hash *= k0;
    hash ^= rotate_right(hash, 29);
--- a/contrib/libmetrohash/src/metrohash64.h
+++ b/contrib/libmetrohash/src/metrohash64.h
@ -25,24 +25,24 @@ public:
    static const uint32_t bits = 64;

    // Constructor initializes the same as Initialize()
-    explicit MetroHash64(const uint64_t seed=0);
+    explicit MetroHash64(uint64_t seed=0);

    // Initializes internal state for new hash with optional seed
-    void Initialize(const uint64_t seed=0);
+    void Initialize(uint64_t seed=0);

    // Update the hash state with a string of bytes. If the length
    // is sufficiently long, the implementation switches to a bulk
    // hashing algorithm directly on the argument buffer for speed.
-    void Update(const uint8_t * buffer, const uint64_t length);
+    void Update(const uint8_t * buffer, uint64_t length);

    // Constructs the final hash and writes it to the argument buffer.
    // After a hash is finalized, this instance must be Initialized()-ed
    // again or the behavior of Update() and Finalize() is undefined.
-    void Finalize(uint8_t * const hash);
+    void Finalize(uint8_t * hash);

    // A non-incremental function implementation. This can be significantly
    // faster than the incremental implementation for some usage patterns.
-    static void Hash(const uint8_t * buffer, const uint64_t length, uint8_t * const hash, const uint64_t seed=0);
+    static void Hash(const uint8_t * buffer, uint64_t length, uint8_t * hash, uint64_t seed=0);

    // Does implementation correctly execute test vectors?
    static bool ImplementationVerified();
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -34,7 +34,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.2.1.2248"
+ARG VERSION="24.2.2.71"
 ARG PACKAGES="clickhouse-keeper"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/packager/README.md
+++ b/docker/packager/README.md
@ -28,7 +28,6 @@ lrwxrwxrwx 1 root root        10  clickhouse-benchmark -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-clang -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-client -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-compressor -> clickhouse
-lrwxrwxrwx 1 root root        10  clickhouse-copier -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-extract-from-config -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-format -> clickhouse
 lrwxrwxrwx 1 root root        10  clickhouse-lld -> clickhouse
--- a/docker/packager/binary-builder/Dockerfile
+++ b/docker/packager/binary-builder/Dockerfile
@ -4,6 +4,9 @@ FROM clickhouse/fasttest:$FROM_TAG
 ENV CC=clang-${LLVM_VERSION}
 ENV CXX=clang++-${LLVM_VERSION}

+# If the cctools is updated, then first build it in the CI, then update here in a different commit
+COPY --from=clickhouse/cctools:d9e3596e706b /cctools /cctools
+
 # Rust toolchain and libraries
 ENV RUSTUP_HOME=/rust/rustup
 ENV CARGO_HOME=/rust/cargo
@ -73,9 +76,6 @@ RUN curl -Lo /usr/bin/clang-tidy-cache \
        "https://raw.githubusercontent.com/matus-chochlik/ctcache/$CLANG_TIDY_SHA1/clang-tidy-cache" \
    && chmod +x /usr/bin/clang-tidy-cache

-# If the cctools is updated, then first build it in the CI, then update here in a different commit
-COPY --from=clickhouse/cctools:5a908f73878a /cctools /cctools
-
 RUN mkdir /workdir && chmod 777 /workdir
 WORKDIR /workdir

--- a/docker/packager/cctools/Dockerfile
+++ b/docker/packager/cctools/Dockerfile
@ -2,7 +2,7 @@
 # It's based on the assumption that we don't care of the cctools version so much
 # It event does not depend on the clickhouse/fasttest in the `docker/images.json`
 ARG FROM_TAG=latest
-FROM clickhouse/fasttest:$FROM_TAG
+FROM clickhouse/fasttest:$FROM_TAG as builder

 ENV CC=clang-${LLVM_VERSION}
 ENV CXX=clang++-${LLVM_VERSION}
@ -29,3 +29,6 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
    && make install -j$(nproc) \
    && cd ../.. \
    && rm -rf cctools-port
+
+FROM scratch
+COPY --from=builder /cctools /cctools
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="24.2.1.2248"
+ARG VERSION="24.2.2.71"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"
 ARG DIRECT_DOWNLOAD_URLS=""

--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -27,7 +27,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="24.2.1.2248"
+ARG VERSION="24.2.2.71"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -33,6 +33,9 @@ ENV TSAN_OPTIONS='halt_on_error=1 abort_on_error=1 history_size=7 memory_limit_m
 ENV UBSAN_OPTIONS='print_stacktrace=1'
 ENV MSAN_OPTIONS='abort_on_error=1 poison_in_dtor=1'

+# for external_symbolizer_path
+RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
+
 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
 ENV LC_ALL en_US.UTF-8

--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -14,7 +14,6 @@ RUN apt-get update \
        libclang-${LLVM_VERSION}-dev \
        libclang-rt-${LLVM_VERSION}-dev \
        lld-${LLVM_VERSION} \
-        llvm-${LLVM_VERSION} \
        llvm-${LLVM_VERSION}-dev \
        lsof \
        ninja-build \
@ -37,8 +36,6 @@ RUN pip3 install numpy==1.26.3 scipy==1.12.0 pandas==1.5.3 Jinja2==3.1.3

 # This symlink is required by gcc to find the lld linker
 RUN ln -s /usr/bin/lld-${LLVM_VERSION} /usr/bin/ld.lld
-# for external_symbolizer_path
-RUN ln -s /usr/bin/llvm-symbolizer-${LLVM_VERSION} /usr/bin/llvm-symbolizer
 # FIXME: workaround for "The imported target "merge-fdata" references the file" error
 # https://salsa.debian.org/pkg-llvm-team/llvm-toolchain/-/commit/992e52c0b156a5ba9c6a8a54f8c4857ddd3d371d
 RUN sed -i '/_IMPORT_CHECK_FILES_FOR_\(mlir-\|llvm-bolt\|merge-fdata\|MLIR\)/ {s|^|#|}' /usr/lib/llvm-${LLVM_VERSION}/lib/cmake/llvm/LLVMExports-*.cmake
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -173,9 +173,15 @@ function fuzz

    mkdir -p /var/run/clickhouse-server

-    # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server
-    clickhouse-server --config-file db/config.xml --pid-file /var/run/clickhouse-server/clickhouse-server.pid -- --path db > server.log 2>&1 &
-    server_pid=$!
+    # server.log -> All server logs, including sanitizer
+    # stderr.log -> Process logs (sanitizer) only
+    clickhouse-server \
+        --config-file db/config.xml \
+        --pid-file /var/run/clickhouse-server/clickhouse-server.pid \
+        --  --path db \
+            --logger.console=0 \
+            --logger.log=server.log 2>&1 | tee -a stderr.log >> server.log 2>&1 &
+    server_pid=$(pidof clickhouse-server)

    kill -0 $server_pid

@ -343,7 +349,7 @@ quit
        # which is confusing.
        task_exit_code=$fuzzer_exit_code
        echo "failure" > status.txt
-        echo "Achtung!" > description.txt
+        echo "Let op!" > description.txt
        echo "Fuzzer went wrong with error code: ($fuzzer_exit_code). Its process died somehow when the server stayed alive. The server log probably won't tell you much so try to find information in other files." >>description.txt
        { rg -ao "Found error:.*" fuzzer.log || rg -ao "Exception:.*" fuzzer.log; } | tail -1 >>description.txt
    fi
@ -427,6 +433,7 @@ p.links a { padding: 5px; margin: 3px; background: #FFF; line-height: 2; white-s
  <a href="run.log">run.log</a>
  <a href="fuzzer.log.zst">fuzzer.log.zst</a>
  <a href="server.log.zst">server.log.zst</a>
+  <a href="stderr.log">stderr.log</a>
  <a href="main.log">main.log</a>
  <a href="dmesg.log">dmesg.log</a>
  ${CORE_LINK}
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -126,7 +126,6 @@ RUN set -x \

 COPY modprobe.sh /usr/local/bin/modprobe
 COPY dockerd-entrypoint.sh /usr/local/bin/
-COPY compose/ /compose/
 COPY misc/ /misc/


--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -51,22 +51,22 @@ fi
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml

 if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then
-    sudo cat /etc/clickhouse-server/config.d/zookeeper.xml \
-    | sed "/<use_compression>1<\/use_compression>/d" \
-    > /etc/clickhouse-server/config.d/zookeeper.xml.tmp
-    sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml
+    sudo sed -i "/<use_compression>1<\/use_compression>/d" /etc/clickhouse-server/config.d/zookeeper.xml

    # it contains some new settings, but we can safely remove it
    rm /etc/clickhouse-server/config.d/handlers.yaml
    rm /etc/clickhouse-server/users.d/s3_cache_new.xml
    rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml

+    #todo: remove these after 24.3 released.
+    sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml
+
+    #todo: remove these after 24.3 released.
+    sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml
+
    function remove_keeper_config()
    {
-        sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
-          | sed "/<$1>$2<\/$1>/d" \
-          > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
-        sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+        sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
    }
    # commit_logs_cache_size_threshold setting doesn't exist on some older versions
    remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
@ -77,7 +77,7 @@ fi
 if [ "$NUM_TRIES" -gt "1" ]; then
    export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
    export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
-    export THREAD_FUZZER_SLEEP_TIME_US=100000
+    export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000

    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
    export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
@ -88,10 +88,10 @@ if [ "$NUM_TRIES" -gt "1" ]; then
    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
-    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
-    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
-    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
-    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000

    mkdir -p /var/run/clickhouse-server
    # simplest way to forward env variables to server
@ -101,25 +101,13 @@ else
 fi

 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
-    sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
-    | sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" \
-    > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml

-    sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
-    | sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" \
-    > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml

-    sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
-    | sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" \
-    > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml

-    sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
-    | sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" \
-    > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml

    mkdir -p /var/run/clickhouse-server1
    sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
--- a/docker/test/stateless/stress_tests.lib
+++ b/docker/test/stateless/stress_tests.lib
@ -215,7 +215,7 @@ function check_server_start()
 function check_logs_for_critical_errors()
 {
    # Sanitizer asserts
-    sed -n '/WARNING:.*anitizer/,/^$/p' >> /test_output/tmp
+    sed -n '/WARNING:.*anitizer/,/^$/p' /var/log/clickhouse-server/stderr.log >> /test_output/tmp
    rg -Fav -e "ASan doesn't fully support makecontext/swapcontext functions" -e "DB::Exception" /test_output/tmp > /dev/null \
        && echo -e "Sanitizer assert (in stderr.log)$FAIL$(head_escaped /test_output/tmp)" >> /test_output/test_results.tsv \
        || echo -e "No sanitizer asserts$OK" >> /test_output/test_results.tsv
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -27,7 +27,7 @@ install_packages package_folder
 # and find more potential issues.
 export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
 export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
-export THREAD_FUZZER_SLEEP_TIME_US=100000
+export THREAD_FUZZER_SLEEP_TIME_US_MAX=100000

 export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
 export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
@ -38,11 +38,11 @@ export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
 export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
 export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
 export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
-export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
+export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US_MAX=10000

-export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
-export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
-export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
+export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US_MAX=10000
+export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US_MAX=10000
+export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US_MAX=10000

 export THREAD_FUZZER_EXPLICIT_SLEEP_PROBABILITY=0.01
 export THREAD_FUZZER_EXPLICIT_MEMORY_EXCEPTION_PROBABILITY=0.01
--- a/docker/test/style/Dockerfile
+++ b/docker/test/style/Dockerfile
@ -8,20 +8,22 @@ ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list

 RUN apt-get update && env DEBIAN_FRONTEND=noninteractive apt-get install --yes \
-    aspell \
-    curl \
-    git \
-    file \
-    libxml2-utils \
-    moreutils \
-    python3-fuzzywuzzy \
-    python3-pip \
-    yamllint \
-    locales \
-    && pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
-        requests types-requests \
+        aspell \
+        curl \
+        git \
+        file \
+        libxml2-utils \
+        moreutils \
+        python3-fuzzywuzzy \
+        python3-pip \
+        yamllint \
+        locales \
    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/* \
+    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*
+
+# python-magic is the same version as in Ubuntu 22.04
+RUN pip3 install black==23.12.0 boto3 codespell==2.2.1 mypy==1.8.0 PyGithub unidiff pylint==3.1.0 \
+        python-magic==0.4.24 requests types-requests \
    && rm -rf /root/.cache/pip

 RUN echo "en_US.UTF-8 UTF-8" > /etc/locale.gen && locale-gen en_US.UTF-8
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@ -67,10 +67,7 @@ configure

 function remove_keeper_config()
 {
-  sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
-    | sed "/<$1>$2<\/$1>/d" \
-    > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
-  sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+  sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
 }

 # async_replication setting doesn't exist on some older versions
@ -80,16 +77,10 @@ remove_keeper_config "async_replication" "1"
 remove_keeper_config "create_if_not_exists" "[01]"

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
-  | sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
-  > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
+sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
-  | sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
-  > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml

 # latest_logs_cache_size_threshold setting doesn't exist on some older versions
 remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
@ -120,22 +111,13 @@ export ZOOKEEPER_FAULT_INJECTION=0
 configure

 # force_sync=false doesn't work correctly on some older versions
-sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
-  | sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
-  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+sudo sed -i "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" /etc/clickhouse-server/config.d/keeper_port.xml

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
-  | sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
-  > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
+sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
-  | sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
-  > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml

 # async_replication setting doesn't exist on some older versions
 remove_keeper_config "async_replication" "1"
@ -150,10 +132,7 @@ remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
 remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"

 # But we still need default disk because some tables loaded only into it
-sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
-  | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
-  > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
-mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
+sudo sed -i "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
 sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
 sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml

@ -256,10 +235,7 @@ then
 fi

 # Just in case previous version left some garbage in zk
-sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
-  | sed "s|>1<|>0<|g" \
-  > /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
+sudo sed -i "s|>1<|>0<|g" /etc/clickhouse-server/config.d/lost_forever_check.xml \
 rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml

 start 500
--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@ -26,6 +26,8 @@ RUN apt-get update \
    && export CODENAME="$(lsb_release --codename --short | tr 'A-Z' 'a-z')" \
    && echo "deb https://apt.llvm.org/${CODENAME}/ llvm-toolchain-${CODENAME}-${LLVM_VERSION} main" >> \
        /etc/apt/sources.list \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends --verbose-versions llvm-${LLVM_VERSION} \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/* /var/cache/debconf /tmp/*

--- a/docs/changelogs/v23.12.5.81-stable.md
+++ b/docs/changelogs/v23.12.5.81-stable.md
@ -0,0 +1,64 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.12.5.81-stable (a0fbe3ae813) FIXME as compared to v23.12.4.15-stable (4233d111d20)
+
+#### Improvement
+* Backported in [#60290](https://github.com/ClickHouse/ClickHouse/issues/60290): Copy S3 file GCP fallback to buffer copy in case GCP returned `Internal Error` with `GATEWAY_TIMEOUT` HTTP error code. [#60164](https://github.com/ClickHouse/ClickHouse/pull/60164) ([Maksim Kita](https://github.com/kitaisreal)).
+* Backported in [#60830](https://github.com/ClickHouse/ClickHouse/issues/60830): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#59883](https://github.com/ClickHouse/ClickHouse/issues/59883): If you want to run initdb scripts every time when ClickHouse container is starting you shoud initialize environment varible CLICKHOUSE_ALWAYS_RUN_INITDB_SCRIPTS. [#59808](https://github.com/ClickHouse/ClickHouse/pull/59808) ([Alexander Nikolaev](https://github.com/AlexNik)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix_kql_issue_found_by_wingfuzz [#59626](https://github.com/ClickHouse/ClickHouse/pull/59626) ([Yong Wang](https://github.com/kashwy)).
+* Fix error "Read beyond last offset" for AsynchronousBoundedReadBuffer [#59630](https://github.com/ClickHouse/ClickHouse/pull/59630) ([Vitaly Baranov](https://github.com/vitlibar)).
+* Fix query start time on non initial queries [#59662](https://github.com/ClickHouse/ClickHouse/pull/59662) ([Raúl Marín](https://github.com/Algunenano)).
+* rabbitmq: fix having neither acked nor nacked messages [#59775](https://github.com/ClickHouse/ClickHouse/pull/59775) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix parsing of partition expressions surrounded by parens [#59901](https://github.com/ClickHouse/ClickHouse/pull/59901) ([János Benjamin Antal](https://github.com/antaljanosbenjamin)).
+* Fix optimize_uniq_to_count removing the column alias [#60026](https://github.com/ClickHouse/ClickHouse/pull/60026) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix cosineDistance crash with Nullable [#60150](https://github.com/ClickHouse/ClickHouse/pull/60150) ([Raúl Marín](https://github.com/Algunenano)).
+* Hide sensitive info for s3queue [#60233](https://github.com/ClickHouse/ClickHouse/pull/60233) ([Kseniia Sumarokova](https://github.com/kssenii)).
+* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
+* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
+* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
+* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
+* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### CI Fix or Improvement (changelog entry is not required)
+
+* Backported in [#60767](https://github.com/ClickHouse/ClickHouse/issues/60767): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#60582](https://github.com/ClickHouse/ClickHouse/issues/60582): Arm and amd docker build jobs use similar job names and thus overwrite job reports - aarch64 and amd64 suffixes added to fix this. [#60554](https://github.com/ClickHouse/ClickHouse/pull/60554) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61041](https://github.com/ClickHouse/ClickHouse/issues/61041): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#61030](https://github.com/ClickHouse/ClickHouse/issues/61030): ... [#61022](https://github.com/ClickHouse/ClickHouse/pull/61022) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61224](https://github.com/ClickHouse/ClickHouse/issues/61224): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
+* Backported in [#61190](https://github.com/ClickHouse/ClickHouse/issues/61190): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Revert "Backport [#59798](https://github.com/ClickHouse/ClickHouse/issues/59798) to 23.12: CI: do not reuse builds on release branches"'. [#59979](https://github.com/ClickHouse/ClickHouse/pull/59979) ([Max K.](https://github.com/maxknv)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* CI: move ci-specifics from job scripts to ci.py [#58516](https://github.com/ClickHouse/ClickHouse/pull/58516) ([Max K.](https://github.com/maxknv)).
+* Make ZooKeeper actually sequentialy consistent [#59735](https://github.com/ClickHouse/ClickHouse/pull/59735) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Fix special build reports in release branches [#59797](https://github.com/ClickHouse/ClickHouse/pull/59797) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* CI: do not reuse builds on release branches [#59798](https://github.com/ClickHouse/ClickHouse/pull/59798) ([Max K.](https://github.com/maxknv)).
+* Fix mark release ready [#59994](https://github.com/ClickHouse/ClickHouse/pull/59994) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Ability to detect undead ZooKeeper sessions [#60044](https://github.com/ClickHouse/ClickHouse/pull/60044) ([Alexander Tokmakov](https://github.com/tavplubix)).
+* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
+* Remove broken test while we fix it [#60547](https://github.com/ClickHouse/ClickHouse/pull/60547) ([Raúl Marín](https://github.com/Algunenano)).
+* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.3.21.26-lts.md
+++ b/docs/changelogs/v23.3.21.26-lts.md
@ -0,0 +1,24 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.3.21.26-lts (d9672a3731f) FIXME as compared to v23.3.20.27-lts (cc974ba4f81)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix reading from sparse columns after restart [#49660](https://github.com/ClickHouse/ClickHouse/pull/49660) ([Anton Popov](https://github.com/CurtizJ)).
+* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
+* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
+* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
+
--- a/docs/changelogs/v23.8.11.28-lts.md
+++ b/docs/changelogs/v23.8.11.28-lts.md
@ -0,0 +1,30 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v23.8.11.28-lts (31879d2ab4c) FIXME as compared to v23.8.10.43-lts (a278225bba9)
+
+#### Improvement
+* Backported in [#60828](https://github.com/ClickHouse/ClickHouse/issues/60828): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
+* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### NO CL ENTRY
+
+* NO CL ENTRY:  'Use the current branch test-utils to build cctools'. [#61276](https://github.com/ClickHouse/ClickHouse/pull/61276) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#57104](https://github.com/ClickHouse/ClickHouse/pull/57104) ([Kruglov Pavel](https://github.com/Avogar)).
+* Detect io_uring in tests [#60373](https://github.com/ClickHouse/ClickHouse/pull/60373) ([Azat Khuzhin](https://github.com/azat)).
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
+
--- a/docs/changelogs/v24.1.7.18-stable.md
+++ b/docs/changelogs/v24.1.7.18-stable.md
@ -0,0 +1,26 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.1.7.18-stable (90925babd78) FIXME as compared to v24.1.6.52-stable (fa09f677bc9)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
+* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
+* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### CI Fix or Improvement (changelog entry is not required)
+
+* Backported in [#61043](https://github.com/ClickHouse/ClickHouse/issues/61043): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#61168](https://github.com/ClickHouse/ClickHouse/issues/61168): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61192](https://github.com/ClickHouse/ClickHouse/issues/61192): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
+
--- a/docs/changelogs/v24.2.2.71-stable.md
+++ b/docs/changelogs/v24.2.2.71-stable.md
@ -0,0 +1,55 @@
+---
+sidebar_position: 1
+sidebar_label: 2024
+---
+
+# 2024 Changelog
+
+### ClickHouse release v24.2.2.71-stable (9293d361e72) FIXME as compared to v24.2.1.2248-stable (891689a4150)
+
+#### Improvement
+* Backported in [#60834](https://github.com/ClickHouse/ClickHouse/issues/60834): Update tzdata to 2024a. [#60768](https://github.com/ClickHouse/ClickHouse/pull/60768) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* PartsSplitter invalid ranges for the same part [#60041](https://github.com/ClickHouse/ClickHouse/pull/60041) ([Maksim Kita](https://github.com/kitaisreal)).
+* Try to avoid calculation of scalar subqueries for CREATE TABLE. [#60464](https://github.com/ClickHouse/ClickHouse/pull/60464) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
+* Fix deadlock in parallel parsing when lots of rows are skipped due to errors [#60516](https://github.com/ClickHouse/ClickHouse/pull/60516) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix_max_query_size_for_kql_compound_operator: [#60534](https://github.com/ClickHouse/ClickHouse/pull/60534) ([Yong Wang](https://github.com/kashwy)).
+* Reduce the number of read rows from `system.numbers` [#60546](https://github.com/ClickHouse/ClickHouse/pull/60546) ([JackyWoo](https://github.com/JackyWoo)).
+* Don't output number tips for date types [#60577](https://github.com/ClickHouse/ClickHouse/pull/60577) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix buffer overflow in CompressionCodecMultiple [#60731](https://github.com/ClickHouse/ClickHouse/pull/60731) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Remove nonsense from SQL/JSON [#60738](https://github.com/ClickHouse/ClickHouse/pull/60738) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Prevent setting custom metadata headers on unsupported multipart upload operations [#60748](https://github.com/ClickHouse/ClickHouse/pull/60748) ([Francisco J. Jurado Moreno](https://github.com/Beetelbrox)).
+* Fix crash in arrayEnumerateRanked [#60764](https://github.com/ClickHouse/ClickHouse/pull/60764) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix crash when using input() in INSERT SELECT JOIN [#60765](https://github.com/ClickHouse/ClickHouse/pull/60765) ([Kruglov Pavel](https://github.com/Avogar)).
+* Fix crash with different allow_experimental_analyzer value in subqueries [#60770](https://github.com/ClickHouse/ClickHouse/pull/60770) ([Dmitry Novik](https://github.com/novikd)).
+* Remove recursion when reading from S3 [#60849](https://github.com/ClickHouse/ClickHouse/pull/60849) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix multiple bugs in groupArraySorted [#61203](https://github.com/ClickHouse/ClickHouse/pull/61203) ([Raúl Marín](https://github.com/Algunenano)).
+* Fix Keeper reconfig for standalone binary [#61233](https://github.com/ClickHouse/ClickHouse/pull/61233) ([Antonio Andelic](https://github.com/antonio2368)).
+
+#### CI Fix or Improvement (changelog entry is not required)
+
+* Backported in [#60758](https://github.com/ClickHouse/ClickHouse/issues/60758): Decoupled changes from [#60408](https://github.com/ClickHouse/ClickHouse/issues/60408). [#60553](https://github.com/ClickHouse/ClickHouse/pull/60553) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#60706](https://github.com/ClickHouse/ClickHouse/issues/60706): Eliminates the need to provide input args to docker server jobs to clean yml files. [#60602](https://github.com/ClickHouse/ClickHouse/pull/60602) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61045](https://github.com/ClickHouse/ClickHouse/issues/61045): Debug and fix markreleaseready. [#60611](https://github.com/ClickHouse/ClickHouse/pull/60611) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Backported in [#60721](https://github.com/ClickHouse/ClickHouse/issues/60721): Fix build_report job so that it's defined by ci_config only (not yml file). [#60613](https://github.com/ClickHouse/ClickHouse/pull/60613) ([Max K.](https://github.com/maxknv)).
+* Backported in [#60668](https://github.com/ClickHouse/ClickHouse/issues/60668): Do not await ci pending jobs on release branches decrease wait timeout to fit into gh job timeout. [#60652](https://github.com/ClickHouse/ClickHouse/pull/60652) ([Max K.](https://github.com/maxknv)).
+* Backported in [#60863](https://github.com/ClickHouse/ClickHouse/issues/60863): Set limited number of builds for "special build check" report in backports. [#60850](https://github.com/ClickHouse/ClickHouse/pull/60850) ([Max K.](https://github.com/maxknv)).
+* Backported in [#60946](https://github.com/ClickHouse/ClickHouse/issues/60946): ... [#60935](https://github.com/ClickHouse/ClickHouse/pull/60935) ([Max K.](https://github.com/maxknv)).
+* Backported in [#60972](https://github.com/ClickHouse/ClickHouse/issues/60972): ... [#60952](https://github.com/ClickHouse/ClickHouse/pull/60952) ([Max K.](https://github.com/maxknv)).
+* Backported in [#60980](https://github.com/ClickHouse/ClickHouse/issues/60980): ... [#60958](https://github.com/ClickHouse/ClickHouse/pull/60958) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61170](https://github.com/ClickHouse/ClickHouse/issues/61170): Just a preparation for the merge queue support. [#61099](https://github.com/ClickHouse/ClickHouse/pull/61099) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61181](https://github.com/ClickHouse/ClickHouse/issues/61181): ... [#61172](https://github.com/ClickHouse/ClickHouse/pull/61172) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61228](https://github.com/ClickHouse/ClickHouse/issues/61228): ... [#61183](https://github.com/ClickHouse/ClickHouse/pull/61183) ([Han Fei](https://github.com/hanfei1991)).
+* Backported in [#61194](https://github.com/ClickHouse/ClickHouse/issues/61194): ... [#61185](https://github.com/ClickHouse/ClickHouse/pull/61185) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61244](https://github.com/ClickHouse/ClickHouse/issues/61244): ... [#61214](https://github.com/ClickHouse/ClickHouse/pull/61214) ([Max K.](https://github.com/maxknv)).
+* Backported in [#61388](https://github.com/ClickHouse/ClickHouse/issues/61388):. [#61373](https://github.com/ClickHouse/ClickHouse/pull/61373) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* CI: make workflow yml abstract [#60421](https://github.com/ClickHouse/ClickHouse/pull/60421) ([Max K.](https://github.com/maxknv)).
+* Cancel PipelineExecutor properly in case of exception in spawnThreads [#60499](https://github.com/ClickHouse/ClickHouse/pull/60499) ([Kruglov Pavel](https://github.com/Avogar)).
+* General sanity in function `seriesOutliersDetectTukey` [#60535](https://github.com/ClickHouse/ClickHouse/pull/60535) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Speed up cctools building [#61011](https://github.com/ClickHouse/ClickHouse/pull/61011) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/engines/table-engines/integrations/jdbc.md
+++ b/docs/en/engines/table-engines/integrations/jdbc.md
@ -6,6 +6,11 @@ sidebar_label: JDBC

 # JDBC

+:::note
+clickhouse-jdbc-bridge contains experimental codes and is no longer supported. It may contain reliability issues and security vulnerabilities. Use it at your own risk. 
+ClickHouse recommend using built-in table functions in ClickHouse which provide a better alternative for ad-hoc querying scenarios (Postgres, MySQL, MongoDB, etc).
+:::
+
 Allows ClickHouse to connect to external databases via [JDBC](https://en.wikipedia.org/wiki/Java_Database_Connectivity).

 To implement the JDBC connection, ClickHouse uses the separate program [clickhouse-jdbc-bridge](https://github.com/ClickHouse/clickhouse-jdbc-bridge) that should run as a daemon.
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -18,8 +18,8 @@ This engine allows integrating ClickHouse with [RabbitMQ](https://www.rabbitmq.c
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
-    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1],
-    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
+    name1 [type1],
+    name2 [type2],
    ...
 ) ENGINE = RabbitMQ SETTINGS
    rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'],
@ -198,6 +198,10 @@ Additional virtual columns when `kafka_handle_error_mode='stream'`:

 Note: `_raw_message` and `_error` virtual columns are filled only in case of exception during parsing, they are always `NULL` when message was parsed successfully.

+## Caveats {#caveats}
+
+Even though you may specify [default column expressions](/docs/en/sql-reference/statements/create/table.md/#default_values) (such as `DEFAULT`, `MATERIALIZED`, `ALIAS`) in the table definition, these will be ignored. Instead, the columns will be filled with their respective default values for their types.
+
 ## Data formats support {#data-formats-support}

 RabbitMQ engine supports all [formats](../../../interfaces/formats.md) supported in ClickHouse.
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -946,96 +946,6 @@ You could change storage policy after table creation with [ALTER TABLE ... MODIF

 The number of threads performing background moves of data parts can be changed by [background_move_pool_size](/docs/en/operations/server-configuration-parameters/settings.md/#background_move_pool_size) setting.

-### Dynamic Storage
-
-This example query shows how to attach a table stored at a URL and configure the
-remote storage within the query. The web storage is not configured in the ClickHouse
-configuration files; all the settings are in the CREATE/ATTACH query.
-
-:::note
-The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
-:::
-
-#### Example dynamic web storage
-
-:::tip
-A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub.  To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
-:::
-
-In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
-
-```sql
-# highlight-next-line
-ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
-(
-    price UInt32,
-    date Date,
-    postcode1 LowCardinality(String),
-    postcode2 LowCardinality(String),
-    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
-    is_new UInt8,
-    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
-    addr1 String,
-    addr2 String,
-    street LowCardinality(String),
-    locality LowCardinality(String),
-    town LowCardinality(String),
-    district LowCardinality(String),
-    county LowCardinality(String)
-)
-ENGINE = MergeTree
-ORDER BY (postcode1, postcode2, addr1, addr2)
-  # highlight-start
-  SETTINGS disk = disk(
-      type=web,
-      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
-      );
-  # highlight-end
-```
-
-### Nested Dynamic Storage
-
-This example query builds on the above dynamic disk configuration and shows how to
-use a local disk to cache data from a table stored at a URL. Neither the cache disk
-nor the web storage is configured in the ClickHouse configuration files; both are
-configured in the CREATE/ATTACH query settings.
-
-In the settings highlighted below notice that the disk of `type=web` is nested within
-the disk of `type=cache`.
-
-```sql
-ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
-(
-    price UInt32,
-    date Date,
-    postcode1 LowCardinality(String),
-    postcode2 LowCardinality(String),
-    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
-    is_new UInt8,
-    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
-    addr1 String,
-    addr2 String,
-    street LowCardinality(String),
-    locality LowCardinality(String),
-    town LowCardinality(String),
-    district LowCardinality(String),
-    county LowCardinality(String)
-)
-ENGINE = MergeTree
-ORDER BY (postcode1, postcode2, addr1, addr2)
-  # highlight-start
-  SETTINGS disk = disk(
-    type=cache,
-    max_size='1Gi',
-    path='/var/lib/clickhouse/custom_disk_cache/',
-    disk=disk(
-      type=web,
-      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
-      )
-  );
-  # highlight-end
-```
-
 ### Details {#details}

 In the case of `MergeTree` tables, data is getting to disk in different ways:
@ -1064,13 +974,11 @@ During this time, they are not moved to other volumes or disks. Therefore, until

 User can assign new big parts to different disks of a [JBOD](https://en.wikipedia.org/wiki/Non-RAID_drive_architectures) volume in a balanced way using the [min_bytes_to_rebalance_partition_over_jbod](/docs/en/operations/settings/merge-tree-settings.md/#min-bytes-to-rebalance-partition-over-jbod) setting.

-## Using S3 for Data Storage {#table_engine-mergetree-s3}
+## Using External Storage for Data Storage {#table_engine-mergetree-s3}

-:::note
-Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
-:::
+[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly. See [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage) for more details.

-`MergeTree` family table engines can store data to [S3](https://aws.amazon.com/s3/) using a disk with type `s3`.
+Example for [S3](https://aws.amazon.com/s3/) as external storage using a disk with type `s3`.

 Configuration markup:
 ``` xml
@ -1112,253 +1020,12 @@ Configuration markup:
 </storage_configuration>
 ```

+Also see [configuring external storage options](/docs/en/operations/storing-data.md/#configuring-external-storage).
+
 :::note cache configuration
 ClickHouse versions 22.3 through 22.7 use a different cache configuration, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) if you are using one of those versions.
 :::

-### Configuring the S3 disk
-
-Required parameters:
-
- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
- `access_key_id` — S3 access key id.
- `secret_access_key` — S3 secret access key.
-
-Optional parameters:
-
- `region` — S3 region name.
- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
- `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
-
-### Configuring the cache
-
-This is the cache configuration from above:
-```xml
-        <s3_cache>
-            <type>cache</type>
-            <disk>s3</disk>
-            <path>/var/lib/clickhouse/disks/s3_cache/</path>
-            <max_size>10Gi</max_size>
-        </s3_cache>
-```
-
-These parameters define the cache layer:
- `type` — If a disk is of type `cache` it caches mark and index files in memory.
- `disk` — The name of the disk that will be cached.
-
-Cache parameters:
- `path` — The path where metadata for the cache is stored.
- `max_size` — The size (amount of disk space) that the cache can grow to.
-
-:::tip
-There are several other cache parameters that you can use to tune your storage, see [using local cache](/docs/en/operations/storing-data.md/#using-local-cache) for the details.
-:::
-
-S3 disk can be configured as `main` or `cold` storage:
-``` xml
-<storage_configuration>
-    ...
-    <disks>
-        <s3>
-            <type>s3</type>
-            <endpoint>https://clickhouse-public-datasets.s3.amazonaws.com/my-bucket/root-path/</endpoint>
-            <access_key_id>your_access_key_id</access_key_id>
-            <secret_access_key>your_secret_access_key</secret_access_key>
-        </s3>
-    </disks>
-    <policies>
-        <s3_main>
-            <volumes>
-                <main>
-                    <disk>s3</disk>
-                </main>
-            </volumes>
-        </s3_main>
-        <s3_cold>
-            <volumes>
-                <main>
-                    <disk>default</disk>
-                </main>
-                <external>
-                    <disk>s3</disk>
-                </external>
-            </volumes>
-            <move_factor>0.2</move_factor>
-        </s3_cold>
-    </policies>
-    ...
-</storage_configuration>
-```
-
-In case of `cold` option a data can be moved to S3 if local disk free size will be smaller than `move_factor * disk_size` or by TTL move rule.
-
-## Using Azure Blob Storage for Data Storage {#table_engine-mergetree-azure-blob-storage}
-
-`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
-
-As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
-
-Configuration markup:
-``` xml
-<storage_configuration>
-    ...
-    <disks>
-        <blob_storage_disk>
-            <type>azure_blob_storage</type>
-            <storage_account_url>http://account.blob.core.windows.net</storage_account_url>
-            <container_name>container</container_name>
-            <account_name>account</account_name>
-            <account_key>pass123</account_key>
-            <metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
-            <cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
-            <skip_access_check>false</skip_access_check>
-        </blob_storage_disk>
-    </disks>
-    ...
-</storage_configuration>
-```
-
-Connection parameters:
-* `endpoint` — AzureBlobStorage endpoint URL with container & prefix. Optionally can contain account_name if the authentication method used needs it. (`http://account.blob.core.windows.net:{port}/[account_name]{container_name}/{data_prefix}`) or these parameters can be provided separately using storage_account_url, account_name & container. For specifying prefix, endpoint should be used.
-* `endpoint_contains_account_name` - This flag is used to specify if endpoint contains account_name as it is only needed for certain authentication methods. (Default : true)
-* `storage_account_url` - Required if endpoint is not specified, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
-* `container_name` - Target container name, defaults to `default-container`.
-* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
-
-Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
-* `connection_string` - For authentication using a connection string.
-* `account_name` and `account_key` - For authentication using Shared Key.
-
-Limit parameters (mainly for internal usage):
-* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
-* `min_bytes_for_seek` - Limits the size of a seekable region.
-* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
-* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
-* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
-* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
-
-Other parameters:
-* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
-* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
-* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
-* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
-
-Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
-
-:::note Zero-copy replication is not ready for production
-Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
-:::
-
-## HDFS storage {#hdfs-storage}
-
-In this sample configuration:
- the disk is of type `hdfs`
- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
-
-```xml
-<clickhouse>
-    <storage_configuration>
-        <disks>
-            <hdfs>
-                <type>hdfs</type>
-                <endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
-                <skip_access_check>true</skip_access_check>
-            </hdfs>
-            <hdd>
-                <type>local</type>
-                <path>/</path>
-            </hdd>
-        </disks>
-        <policies>
-            <hdfs>
-                <volumes>
-                    <main>
-                        <disk>hdfs</disk>
-                    </main>
-                    <external>
-                        <disk>hdd</disk>
-                    </external>
-                </volumes>
-            </hdfs>
-        </policies>
-    </storage_configuration>
-</clickhouse>
-```
-
-## Web storage (read-only) {#web-storage}
-
-Web storage can be used for read-only purposes. An example use is for hosting sample
-data, or for migrating data.
-
-:::tip
-Storage can also be configured temporarily within a query, if a web dataset is not expected
-to be used routinely, see [dynamic storage](#dynamic-storage) and skip editing the
-configuration file.
-:::
-
-In this sample configuration:
- the disk is of type `web`
- the data is hosted at `http://nginx:80/test1/`
- a cache on local storage is used
-
-```xml
-<clickhouse>
-    <storage_configuration>
-        <disks>
-            <web>
-                <type>web</type>
-                <endpoint>http://nginx:80/test1/</endpoint>
-            </web>
-            <cached_web>
-                <type>cache</type>
-                <disk>web</disk>
-                <path>cached_web_cache/</path>
-                <max_size>100000000</max_size>
-            </cached_web>
-        </disks>
-        <policies>
-            <web>
-                <volumes>
-                    <main>
-                        <disk>web</disk>
-                    </main>
-                </volumes>
-            </web>
-            <cached_web>
-                <volumes>
-                    <main>
-                        <disk>cached_web</disk>
-                    </main>
-                </volumes>
-            </cached_web>
-        </policies>
-    </storage_configuration>
-</clickhouse>
-```
-
 ## Virtual Columns {#virtual-columns}

 - `_part` — Name of a part.
--- a/docs/en/engines/table-engines/special/memory.md
+++ b/docs/en/engines/table-engines/special/memory.md
@ -21,3 +21,79 @@ When restarting a server, data disappears from the table and the table becomes e
 Normally, using this table engine is not justified. However, it can be used for tests, and for tasks where maximum speed is required on a relatively small number of rows (up to approximately 100,000,000).

 The Memory engine is used by the system for temporary tables with external query data (see the section “External data for processing a query”), and for implementing `GLOBAL IN` (see the section “IN operators”).
+
+Upper and lower bounds can be specified to limit Memory engine table size, effectively allowing it to act as a circular buffer (see [Engine Parameters](#engine-parameters)).
+
+## Engine Parameters {#engine-parameters}
+
+- `min_bytes_to_keep` — Minimum bytes to keep when memory table is size-capped.
+  - Default value: `0`
+  - Requires `max_bytes_to_keep`
+- `max_bytes_to_keep` — Maximum bytes to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max bytes can exceed the stated limit if the oldest batch of rows to remove falls under the `min_bytes_to_keep` limit when adding a large block.
+  - Default value: `0`
+- `min_rows_to_keep` — Minimum rows to keep when memory table is size-capped.
+  - Default value: `0`
+  - Requires `max_rows_to_keep`
+- `max_rows_to_keep` — Maximum rows to keep within memory table where oldest rows are deleted on each insertion (i.e circular buffer). Max rows can exceed the stated limit if the oldest batch of rows to remove falls under the `min_rows_to_keep` limit when adding a large block.
+  - Default value: `0`
+
+## Usage {#usage}
+
+
+**Initialize settings**
+``` sql
+CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 100, max_rows_to_keep = 1000;
+```
+
+**Note:** Both `bytes` and `rows` capping parameters can be set at the same time, however, the lower bounds of `max` and `min` will be adhered to.
+
+## Examples {#examples}
+``` sql
+CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_bytes_to_keep = 4096, max_bytes_to_keep = 16384;
+
+/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */
+INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 8'192 bytes
+
+/* 2. adding block that doesn't get deleted */
+INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 1'024 bytes
+
+/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */
+INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 8'192 bytes
+
+/* 4. checking a very large block overrides all */
+INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 65'536 bytes
+
+SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase();
+```
+
+``` text
+┌─total_bytes─┬─total_rows─┐
+│       65536 │      10000 │
+└─────────────┴────────────┘
+```
+
+also, for rows:
+
+``` sql
+CREATE TABLE memory (i UInt32) ENGINE = Memory SETTINGS min_rows_to_keep = 4000, max_rows_to_keep = 10000;
+
+/* 1. testing oldest block doesn't get deleted due to min-threshold - 3000 rows */
+INSERT INTO memory SELECT * FROM numbers(0, 1600); -- 1'600 rows
+
+/* 2. adding block that doesn't get deleted */
+INSERT INTO memory SELECT * FROM numbers(1000, 100); -- 100 rows
+
+/* 3. testing oldest block gets deleted - 9216 bytes - 1100 */
+INSERT INTO memory SELECT * FROM numbers(9000, 1000); -- 1'000 rows
+
+/* 4. checking a very large block overrides all */
+INSERT INTO memory SELECT * FROM numbers(9000, 10000); -- 10'000 rows
+
+SELECT total_bytes, total_rows FROM system.tables WHERE name = 'memory' and database = currentDatabase();
+```
+
+``` text
+┌─total_bytes─┬─total_rows─┐
+│       65536 │      10000 │
+└─────────────┴────────────┘
+```
--- a/docs/en/getting-started/example-datasets/criteo.md
+++ b/docs/en/getting-started/example-datasets/criteo.md
@ -55,7 +55,7 @@ CREATE TABLE criteo_log (
 ) ENGINE = Log;
 ```

-Download the data:
+Insert the data:

 ``` bash
 $ for i in {00..23}; do echo $i; zcat datasets/criteo/day_${i#0}.gz | sed -r 's/^/2000-01-'${i/00/24}'\t/' | clickhouse-client --host=example-perftest01j --query="INSERT INTO criteo_log FORMAT TabSeparated"; done
--- a/docs/en/getting-started/example-datasets/laion.md
+++ b/docs/en/getting-started/example-datasets/laion.md
@ -10,10 +10,14 @@ The embeddings and the metadata are stored in separate files in the raw data. A
 converts them to CSV and imports them into ClickHouse. You can use the following `download.sh` script for that:

 ```bash
-wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${1}.npy          # download image embedding
-wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${1}.npy        # download text embedding
-wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${1}.parquet    # download metadata
-python3 process.py ${1} # merge files and convert to CSV
+number=${1}
+if [[ $number == '' ]]; then
+    number=1
+fi;
+wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/img_emb/img_emb_${number}.npy          # download image embedding
+wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/text_emb/text_emb_${number}.npy        # download text embedding
+wget --tries=100 https://deploy.laion.ai/8f83b608504d46bb81708ec86e912220/embeddings/metadata/metadata_${number}.parquet    # download metadata
+python3 process.py $number # merge files and convert to CSV
 ```
 Script `process.py` is defined as follows:

--- a/docs/en/getting-started/example-datasets/nyc-taxi.md
+++ b/docs/en/getting-started/example-datasets/nyc-taxi.md
@ -248,6 +248,9 @@ Some of the files might not download fully. Check the file sizes and re-download

 ``` bash
 $ curl -O https://datasets.clickhouse.com/trips_mergetree/partitions/trips_mergetree.tar
+# Validate the checksum
+$ md5sum trips_mergetree.tar
+# Checksum should be equal to: f3b8d469b41d9a82da064ded7245d12c
 $ tar xvf trips_mergetree.tar -C /var/lib/clickhouse # path to ClickHouse data directory
 $ # check permissions of unpacked data, fix if required
 $ sudo service clickhouse-server restart
--- a/docs/en/getting-started/example-datasets/tw-weather.md
+++ b/docs/en/getting-started/example-datasets/tw-weather.md
@ -0,0 +1,293 @@
+---
+slug: /en/getting-started/example-datasets/tw-weather
+sidebar_label: Taiwan Historical Weather Datasets
+sidebar_position: 1
+description: 131 million rows of weather observation data for the last 128 yrs
+---
+
+# Taiwan Historical Weather Datasets
+
+This dataset contains historical meteorological observations measurements for the last 128 years. Each row is a measurement for a point in date time and weather station.
+
+The origin of this dataset is available [here](https://github.com/Raingel/historical_weather) and the list of weather station numbers can be found [here](https://github.com/Raingel/weather_station_list).
+
+> The sources of meteorological datasets include the meteorological stations that are established by the Central Weather Administration (station code is beginning with C0, C1, and 4) and the agricultural meteorological stations belonging to the Council of Agriculture (station code other than those mentioned above):
+
+    - StationId
+    - MeasuredDate, the observation time
+    - StnPres, the station air pressure
+    - SeaPres, the sea level pressure
+    - Td, the dew point temperature
+    - RH, the relative humidity
+    - Other elements where available
+
+## Downloading the data
+
+- A [pre-processed version](#pre-processed-data) of the data for the ClickHouse, which has been cleaned, re-structured, and enriched. This dataset covers the years from 1896 to 2023.
+- [Download the original raw data](#original-raw-data) and convert to the format required by ClickHouse. Users wanting to add their own columns may wish to explore or complete their approaches.
+
+### Pre-processed data
+
+The dataset has also been re-structured from a measurement per line to a row per weather station id and measured date, i.e.
+
+```csv
+StationId,MeasuredDate,StnPres,Tx,RH,WS,WD,WSGust,WDGust,Precp,GloblRad,TxSoil0cm,TxSoil5cm,TxSoil20cm,TxSoil50cm,TxSoil100cm,SeaPres,Td,PrecpHour,SunShine,TxSoil10cm,EvapA,Visb,UVI,Cloud Amount,TxSoil30cm,TxSoil200cm,TxSoil300cm,TxSoil500cm,VaporPressure
+C0X100,2016-01-01 01:00:00,1022.1,16.1,72,1.1,8.0,,,,,,,,,,,,,,,,,,,,,,,
+C0X100,2016-01-01 02:00:00,1021.6,16.0,73,1.2,358.0,,,,,,,,,,,,,,,,,,,,,,,
+C0X100,2016-01-01 03:00:00,1021.3,15.8,74,1.5,353.0,,,,,,,,,,,,,,,,,,,,,,,
+C0X100,2016-01-01 04:00:00,1021.2,15.8,74,1.7,8.0,,,,,,,,,,,,,,,,,,,,,,,
+```
+
+It is easy to query and ensure that the resulting table has less sparse and some elements are null because they're not available to be measured in this weather station.
+
+This dataset is available in the following Google CloudStorage location. Either download the dataset to your local filesystem (and insert them with the ClickHouse client) or insert them directly into the ClickHouse (see [Inserting from URL](#inserting-from-url)).
+
+To download:
+
+```bash
+wget https://storage.googleapis.com/taiwan-weather-observaiton-datasets/preprocessed_weather_daily_1896_2023.tar.gz
+
+# Option: Validate the checksum
+md5sum preprocessed_weather_daily_1896_2023.tar.gz
+# Checksum should be equal to: 11b484f5bd9ddafec5cfb131eb2dd008
+
+tar -xzvf preprocessed_weather_daily_1896_2023.tar.gz
+daily_weather_preprocessed_1896_2023.csv
+
+# Option: Validate the checksum
+md5sum daily_weather_preprocessed_1896_2023.csv
+# Checksum should be equal to: 1132248c78195c43d93f843753881754
+```
+
+### Original raw data
+
+The following details are about the steps to download the original raw data to transform and convert as you want.
+
+#### Download
+
+To download the original raw data:
+
+```bash
+mkdir tw_raw_weather_data && cd tw_raw_weather_data
+
+wget https://storage.googleapis.com/taiwan-weather-observaiton-datasets/raw_data_weather_daily_1896_2023.tar.gz
+
+# Option: Validate the checksum
+md5sum raw_data_weather_daily_1896_2023.tar.gz
+# Checksum should be equal to: b66b9f137217454d655e3004d7d1b51a
+
+tar -xzvf raw_data_weather_daily_1896_2023.tar.gz
+466920_1928.csv
+466920_1929.csv
+466920_1930.csv
+466920_1931.csv
+...
+
+# Option: Validate the checksum
+cat *.csv | md5sum
+# Checksum should be equal to: b26db404bf84d4063fac42e576464ce1
+```
+
+#### Retrieve the Taiwan weather stations
+
+```bash
+wget -O weather_sta_list.csv https://github.com/Raingel/weather_station_list/raw/main/data/weather_sta_list.csv
+
+# Option: Convert the UTF-8-BOM to UTF-8 encoding
+sed -i '1s/^\xEF\xBB\xBF//' weather_sta_list.csv
+```
+
+## Create table schema
+
+Create the MergeTree table in ClickHouse (from the ClickHouse client).
+
+```bash
+CREATE TABLE tw_weather_data (
+    StationId String null,
+    MeasuredDate DateTime64,
+    StnPres Float64 null,
+    SeaPres Float64 null,
+    Tx Float64 null,
+    Td Float64 null,
+    RH Float64 null,
+    WS Float64 null,
+    WD Float64 null,
+    WSGust Float64 null,
+    WDGust Float64 null,
+    Precp Float64 null,
+    PrecpHour Float64 null,
+    SunShine Float64 null,
+    GloblRad Float64 null,
+    TxSoil0cm Float64 null,
+    TxSoil5cm Float64 null,
+    TxSoil10cm Float64 null,
+    TxSoil20cm Float64 null,
+    TxSoil50cm Float64 null,
+    TxSoil100cm Float64 null,
+    TxSoil30cm Float64 null,
+    TxSoil200cm Float64 null,
+    TxSoil300cm Float64 null,
+    TxSoil500cm Float64 null,
+    VaporPressure Float64 null,
+    UVI Float64 null,
+    "Cloud Amount" Float64 null,
+    EvapA Float64 null,
+    Visb Float64 null
+)
+ENGINE = MergeTree
+ORDER BY (MeasuredDate);
+```
+
+## Inserting into ClickHouse
+
+### Inserting from local file
+
+Data can be inserted from a local file as follows (from the ClickHouse client):
+
+```sql
+INSERT INTO tw_weather_data FROM INFILE '/path/to/daily_weather_preprocessed_1896_2023.csv'
+```
+
+where `/path/to` represents the specific user path to the local file on the disk.
+
+And the sample response output is as follows after inserting data into the ClickHouse:
+
+```response
+Query id: 90e4b524-6e14-4855-817c-7e6f98fbeabb
+
+Ok.
+131985329 rows in set. Elapsed: 71.770 sec. Processed 131.99 million rows, 10.06 GB (1.84 million rows/s., 140.14 MB/s.)
+Peak memory usage: 583.23 MiB.
+```
+
+### Inserting from URL
+
+```sql
+INSERT INTO tw_weather_data SELECT *
+FROM url('https://storage.googleapis.com/taiwan-weather-observaiton-datasets/daily_weather_preprocessed_1896_2023.csv', 'CSVWithNames')
+
+```
+To know how to speed this up, please see our blog post on [tuning large data loads](https://clickhouse.com/blog/supercharge-your-clickhouse-data-loads-part2).
+
+## Check data rows and sizes
+
+1. Let's see how many rows are inserted:
+
+```sql
+SELECT formatReadableQuantity(count())
+FROM tw_weather_data;
+```
+
+```response
+┌─formatReadableQuantity(count())─┐
+│ 131.99 million                  │
+└─────────────────────────────────┘
+```
+
+2. Let's see how much disk space are used for this table:
+
+```sql
+SELECT
+    formatReadableSize(sum(bytes)) AS disk_size,
+    formatReadableSize(sum(data_uncompressed_bytes)) AS uncompressed_size
+FROM system.parts
+WHERE (`table` = 'tw_weather_data') AND active
+```
+
+```response
+┌─disk_size─┬─uncompressed_size─┐
+│ 2.13 GiB  │ 32.94 GiB         │
+└───────────┴───────────────────┘
+```
+
+## Sample queries
+
+### Q1: Retrieve the highest dew point temperature for each weather station in the specific year
+
+```sql
+SELECT
+    StationId,
+    max(Td) AS max_td
+FROM tw_weather_data
+WHERE (year(MeasuredDate) = 2023) AND (Td IS NOT NULL)
+GROUP BY StationId
+
+┌─StationId─┬─max_td─┐
+│ 466940    │      1 │
+│ 467300    │      1 │
+│ 467540    │      1 │
+│ 467490    │      1 │
+│ 467080    │      1 │
+│ 466910    │      1 │
+│ 467660    │      1 │
+│ 467270    │      1 │
+│ 467350    │      1 │
+│ 467571    │      1 │
+│ 466920    │      1 │
+│ 467650    │      1 │
+│ 467550    │      1 │
+│ 467480    │      1 │
+│ 467610    │      1 │
+│ 467050    │      1 │
+│ 467590    │      1 │
+│ 466990    │      1 │
+│ 467060    │      1 │
+│ 466950    │      1 │
+│ 467620    │      1 │
+│ 467990    │      1 │
+│ 466930    │      1 │
+│ 467110    │      1 │
+│ 466881    │      1 │
+│ 467410    │      1 │
+│ 467441    │      1 │
+│ 467420    │      1 │
+│ 467530    │      1 │
+│ 466900    │      1 │
+└───────────┴────────┘
+
+30 rows in set. Elapsed: 0.045 sec. Processed 6.41 million rows, 187.33 MB (143.92 million rows/s., 4.21 GB/s.)
+```
+
+### Q2: Raw data fetching with the specific duration time range, fields and weather station
+
+```sql
+SELECT
+    StnPres,
+    SeaPres,
+    Tx,
+    Td,
+    RH,
+    WS,
+    WD,
+    WSGust,
+    WDGust,
+    Precp,
+    PrecpHour
+FROM tw_weather_data
+WHERE (StationId = 'C0UB10') AND (MeasuredDate >= '2023-12-23') AND (MeasuredDate < '2023-12-24')
+ORDER BY MeasuredDate ASC
+LIMIT 10
+```
+
+```response
+┌─StnPres─┬─SeaPres─┬───Tx─┬───Td─┬─RH─┬──WS─┬──WD─┬─WSGust─┬─WDGust─┬─Precp─┬─PrecpHour─┐
+│  1029.5 │    ᴺᵁᴸᴸ │ 11.8 │ ᴺᵁᴸᴸ │ 78 │ 2.7 │ 271 │    5.5 │    275 │ -99.8 │     -99.8 │
+│  1029.8 │    ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 78 │ 2.7 │ 289 │    5.5 │    308 │ -99.8 │     -99.8 │
+│  1028.6 │    ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 79 │ 2.3 │ 251 │    6.1 │    289 │ -99.8 │     -99.8 │
+│  1028.2 │    ᴺᵁᴸᴸ │   13 │ ᴺᵁᴸᴸ │ 75 │ 4.3 │ 312 │    7.5 │    316 │ -99.8 │     -99.8 │
+│  1027.8 │    ᴺᵁᴸᴸ │ 11.1 │ ᴺᵁᴸᴸ │ 89 │ 7.1 │ 310 │   11.6 │    322 │ -99.8 │     -99.8 │
+│  1027.8 │    ᴺᵁᴸᴸ │ 11.6 │ ᴺᵁᴸᴸ │ 90 │ 3.1 │ 269 │   10.7 │    295 │ -99.8 │     -99.8 │
+│  1027.9 │    ᴺᵁᴸᴸ │ 12.3 │ ᴺᵁᴸᴸ │ 89 │ 4.7 │ 296 │    8.1 │    310 │ -99.8 │     -99.8 │
+│  1028.2 │    ᴺᵁᴸᴸ │ 12.2 │ ᴺᵁᴸᴸ │ 94 │ 2.5 │ 246 │    7.1 │    283 │ -99.8 │     -99.8 │
+│  1028.4 │    ᴺᵁᴸᴸ │ 12.5 │ ᴺᵁᴸᴸ │ 94 │ 3.1 │ 265 │    4.8 │    297 │ -99.8 │     -99.8 │
+│  1028.3 │    ᴺᵁᴸᴸ │ 13.6 │ ᴺᵁᴸᴸ │ 91 │ 1.2 │ 273 │    4.4 │    256 │ -99.8 │     -99.8 │
+└─────────┴─────────┴──────┴──────┴────┴─────┴─────┴────────┴────────┴───────┴───────────┘
+
+10 rows in set. Elapsed: 0.009 sec. Processed 91.70 thousand rows, 2.33 MB (9.67 million rows/s., 245.31 MB/s.)
+```
+
+## Credits
+
+We would like to acknowledge the efforts of the Central Weather Administration and Agricultural Meteorological Observation Network (Station) of the Council of Agriculture for preparing, cleaning, and distributing this dataset. We appreciate your efforts.
+
+Ou, J.-H., Kuo, C.-H., Wu, Y.-F., Lin, G.-C., Lee, M.-H., Chen, R.-K., Chou, H.-P., Wu, H.-Y., Chu, S.-C., Lai, Q.-J., Tsai, Y.-C., Lin, C.-C., Kuo, C.-C., Liao, C.-T., Chen, Y.-N., Chu, Y.-W., Chen, C.-Y., 2023. Application-oriented deep learning model for early warning of rice blast in Taiwan. Ecological Informatics 73, 101950. https://doi.org/10.1016/j.ecoinf.2022.101950 [13/12/2022]
--- a/docs/en/getting-started/install.md
+++ b/docs/en/getting-started/install.md
@ -78,8 +78,8 @@ It is recommended to use official pre-compiled `deb` packages for Debian or Ubun

 #### Setup the Debian repository
 ``` bash
-sudo apt-get install -y apt-transport-https ca-certificates dirmngr
-sudo gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754
+sudo apt-get install -y apt-transport-https ca-certificates curl gnupg
+curl -fsSL 'https://packages.clickhouse.com/rpm/lts/repodata/repomd.xml.key' | sudo gpg --dearmor -o /usr/share/keyrings/clickhouse-keyring.gpg

 echo "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" | sudo tee \
    /etc/apt/sources.list.d/clickhouse.list
--- a/docs/en/interfaces/cli.md
+++ b/docs/en/interfaces/cli.md
@ -178,7 +178,7 @@ You can pass parameters to `clickhouse-client` (all parameters have a default va
 - `--password` – The password. Default value: empty string.
 - `--ask-password` - Prompt the user to enter a password.
 - `--query, -q` – The query to process when using non-interactive mode. `--query` can be specified multiple times, e.g. `--query "SELECT 1" --query "SELECT 2"`. Cannot be used simultaneously with `--queries-file`.
- `--queries-file` – file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--query queries1.sql --query queries2.sql`. Cannot be used simultaneously with `--query`.
+- `--queries-file` – file path with queries to execute. `--queries-file` can be specified multiple times, e.g. `--queries-file  queries1.sql --queries-file  queries2.sql`. Cannot be used simultaneously with `--query`.
 - `--multiquery, -n` – If specified, multiple queries separated by semicolons can be listed after the `--query` option. For convenience, it is also possible to omit `--query` and pass the queries directly after `--multiquery`.
 - `--multiline, -m` – If specified, allow multiline queries (do not send the query on Enter).
 - `--database, -d` – Select the current default database. Default value: the current database from the server settings (‘default’ by default).
--- a/docs/en/operations/backup.md
+++ b/docs/en/operations/backup.md
@ -170,7 +170,7 @@ RESTORE TABLE test.table PARTITIONS '2', '3'

 ### Backups as tar archives

-Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported. 
+Backups can also be stored as tar archives. The functionality is the same as for zip, except that a password is not supported.

 Write a backup as a tar:
 ```
@ -444,10 +444,6 @@ Often data that is ingested into ClickHouse is delivered through some sort of pe

 Some local filesystems provide snapshot functionality (for example, [ZFS](https://en.wikipedia.org/wiki/ZFS)), but they might not be the best choice for serving live queries. A possible solution is to create additional replicas with this kind of filesystem and exclude them from the [Distributed](../engines/table-engines/special/distributed.md) tables that are used for `SELECT` queries. Snapshots on such replicas will be out of reach of any queries that modify data. As a bonus, these replicas might have special hardware configurations with more disks attached per server, which would be cost-effective.

-### clickhouse-copier {#clickhouse-copier}
-
-[clickhouse-copier](../operations/utilities/clickhouse-copier.md) is a versatile tool that was initially created to re-shard petabyte-sized tables. It can also be used for backup and restore purposes because it reliably copies data between ClickHouse tables and clusters.
-
 For smaller volumes of data, a simple `INSERT INTO ... SELECT ...` to remote tables might work as well.

 ### Manipulations with Parts {#manipulations-with-parts}
--- a/docs/en/operations/configuration-files.md
+++ b/docs/en/operations/configuration-files.md
@ -95,9 +95,11 @@ which is equal to

 ## Substituting Configuration {#substitution}

-The config can also define “substitutions”. If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
+The config can define substitutions. There are two types of substitutions:

-If you want to replace an entire element with a substitution use `include` as the element name.
+- If an element has the `incl` attribute, the corresponding substitution from the file will be used as the value. By default, the path to the file with substitutions is `/etc/metrika.xml`. This can be changed in the [include_from](../operations/server-configuration-parameters/settings.md#server_configuration_parameters-include_from) element in the server config. The substitution values are specified in `/clickhouse/substitution_name` elements in this file. If a substitution specified in `incl` does not exist, it is recorded in the log. To prevent ClickHouse from logging missing substitutions, specify the `optional="true"` attribute (for example, settings for [macros](../operations/server-configuration-parameters/settings.md#macros)).
+
+- If you want to replace an entire element with a substitution, use `include` as the element name. Substitutions can also be performed from ZooKeeper by specifying attribute `from_zk = "/path/to/node"`. In this case, the element value is replaced with the contents of the Zookeeper node at `/path/to/node`. This also works with you store an entire XML subtree as a Zookeeper node, it will be fully inserted into the source element.

 XML substitution example:

@ -114,7 +116,7 @@ XML substitution example:
 </clickhouse>
 ```

-Substitutions can also be performed from ZooKeeper. To do this, specify the attribute `from_zk = "/path/to/node"`. The element value is replaced with the contents of the node at `/path/to/node` in ZooKeeper. You can also put an entire XML subtree on the ZooKeeper node, and it will be fully inserted into the source element.
+If you want to merge the substituting content with the existing configuration instead of appending you can use attribute `merge="true"`, for example: `<include from_zk="/some_path" merge="true">`. In this case, the existing configuration will be merged with the content from the substitution and the existing configuration settings will be replaced with values from substitution.

 ## Encrypting and Hiding Configuration {#encryption}

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -4337,6 +4337,18 @@ Possible values:

 Default value: `0`.

+
+## function_locate_has_mysql_compatible_argument_order {#function-locate-has-mysql-compatible-argument-order}
+
+Controls the order of arguments in function [locate](../../sql-reference/functions/string-search-functions.md#locate).
+
+Possible values:
+
+- 0 — Function `locate` accepts arguments `(haystack, needle[, start_pos])`.
+- 1 — Function `locate` accepts arguments `(needle, haystack, [, start_pos])` (MySQL-compatible behavior)
+
+Default value: `1`.
+
 ## date_time_overflow_behavior {#date_time_overflow_behavior}

 Defines the behavior when [Date](../../sql-reference/data-types/date.md), [Date32](../../sql-reference/data-types/date32.md), [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md) or integers are converted into Date, Date32, DateTime or DateTime64 but the value cannot be represented in the result type.
--- a/docs/en/operations/storing-data.md
+++ b/docs/en/operations/storing-data.md
@ -5,26 +5,416 @@ sidebar_label: "External Disks for Storing Data"
 title: "External Disks for Storing Data"
 ---

-Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely — on [Amazon S3](https://aws.amazon.com/s3/) disks or in the Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html)).
+Data, processed in ClickHouse, is usually stored in the local file system — on the same machine with the ClickHouse server. That requires large-capacity disks, which can be expensive enough. To avoid that you can store the data remotely. Various storages are supported:
+1. [Amazon S3](https://aws.amazon.com/s3/) object storage.
+2. The Hadoop Distributed File System ([HDFS](https://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-hdfs/HdfsDesign.html))
+3. [Azure Blob Storage](https://azure.microsoft.com/en-us/products/storage/blobs).

-To work with data stored on `Amazon S3` disks use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine, and to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
+:::note ClickHouse also has support for external table engines, which are different from external storage option described on this page as they allow to read data stored in some general file format (like Parquet), while on this page we are describing storage configuration for ClickHouse `MergeTree` family or `Log` family tables.
+1. to work with data stored on `Amazon S3` disks, use [S3](/docs/en/engines/table-engines/integrations/s3.md) table engine.
+2. to work with data in the Hadoop Distributed File System — [HDFS](/docs/en/engines/table-engines/integrations/hdfs.md) table engine.
+3. to work with data stored in Azure Blob Storage use [AzureBlobStorage](/docs/en/engines/table-engines/integrations/azureBlobStorage.md) table engine.
+:::

-To load data from a web server with static files use a disk with type [web](#storing-data-on-webserver).
+## Configuring external storage {#configuring-external-storage}

-## Configuring HDFS {#configuring-hdfs}
+[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to `S3`, `AzureBlobStorage`, `HDFS` using a disk with types `s3`, `azure_blob_storage`, `hdfs` accordingly.

-[MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) family table engines can store data to HDFS using a disk with type `HDFS`.
+Disk configuration requires:
+1. `type` section, equal to one of `s3`, `azure_blob_storage`, `hdfs`, `local_blob_storage`, `web`.
+2. Configuration of a specific external storage type.

-Configuration markup:
+Starting from 24.1 clickhouse version, it is possible to use a new configuration option.
+It requires to specify:
+1. `type` equal to `object_storage`
+2. `object_storage_type`, equal to one of `s3`, `azure_blob_storage` (or just `azure` from `24.3`), `hdfs`, `local_blob_storage` (or just `local` from `24.3`), `web`.
+Optionally, `metadata_type` can be specified (it is equal to `local` by default), but it can also be set to `plain`, `web`.
+Usage of `plain` metadata type is described in [plain storage section](/docs/en/operations/storing-data.md/#storing-data-on-webserver), `web` metadata type can be used only with `web` object storage type, `local` metadata type stores metadata files locally (each metadata files contains mapping to files in object storage and some additional meta information about them).
+
+E.g. configuration option
+``` xml
+<s3>
+    <type>s3</type>
+    <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+    <use_invironment_credentials>1</use_invironment_credentials>
+</s3>
+```
+
+is equal to configuration (from `24.1`):
+``` xml
+<s3>
+    <type>object_storage</type>
+    <object_storage_type>s3</object_storage_type>
+    <metadata_type>local</metadata_type>
+    <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+    <use_invironment_credentials>1</use_invironment_credentials>
+</s3>
+```
+
+Configuration
+``` xml
+<s3_plain>
+    <type>s3_plain</type>
+    <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+    <use_invironment_credentials>1</use_invironment_credentials>
+</s3_plain>
+```
+
+is equal to
+``` xml
+<s3_plain>
+    <type>object_storage</type>
+    <object_storage_type>s3</object_storage_type>
+    <metadata_type>plain</metadata_type>
+    <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+    <use_invironment_credentials>1</use_invironment_credentials>
+</s3_plain>
+```
+
+Example of full storage configuration will look like:
+``` xml
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>s3</type>
+                <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+                <use_invironment_credentials>1</use_invironment_credentials>
+            </s3>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+</clickhouse>
+```
+
+Starting with 24.1 clickhouse version, it can also look like:
+``` xml
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <s3>
+                <type>object_storage</type>
+                <object_storage_type>s3</object_storage_type>
+                <metadata_type>local</metadata_type>
+                <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+                <use_invironment_credentials>1</use_invironment_credentials>
+            </s3>
+        </disks>
+        <policies>
+            <s3>
+                <volumes>
+                    <main>
+                        <disk>s3</disk>
+                    </main>
+                </volumes>
+            </s3>
+        </policies>
+    </storage_configuration>
+</clickhouse>
+```
+
+In order to make a specific kind of storage a default option for all `MergeTree` tables add the following section to configuration file:
+``` xml
+<clickhouse>
+    <merge_tree>
+        <storage_policy>s3</storage_policy>
+    </merge_tree>
+</clickhouse>
+```
+
+If you want to configure a specific storage policy only to specific table, you can define it in settings while creating the table:
+
+``` sql
+CREATE TABLE test (a Int32, b String)
+ENGINE = MergeTree() ORDER BY a
+SETTINGS storage_policy = 's3';
+```
+
+You can also use `disk` instead of `storage_policy`. In this case it is not requires to have `storage_policy` section in configuration file, only `disk` section would be enough.
+
+``` sql
+CREATE TABLE test (a Int32, b String)
+ENGINE = MergeTree() ORDER BY a
+SETTINGS disk = 's3';
+```
+
+## Dynamic Configuration {#dynamic-configuration}
+
+There is also a possibility to specify storage configuration without a predefined disk in configuration in a configuration file, but can be configured in the `CREATE`/`ATTACH` query settings.
+
+The following example query builds on the above dynamic disk configuration and shows how to use a local disk to cache data from a table stored at a URL.
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+    is_new UInt8,
+    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+  # highlight-start
+  SETTINGS disk = disk(
+    type=web,
+    endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+  );
+  # highlight-end
+```
+
+The example below adds cache to external storage.
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+    is_new UInt8,
+    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+  # highlight-start
+  SETTINGS disk = disk(
+    type=cache,
+    max_size='1Gi',
+    path='/var/lib/clickhouse/custom_disk_cache/',
+    disk=disk(
+      type=web,
+      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+      )
+  );
+  # highlight-end
+```
+
+In the settings highlighted below notice that the disk of `type=web` is nested within
+the disk of `type=cache`.
+
+:::note
+The example uses `type=web`, but any disk type can be configured as dynamic, even Local disk. Local disks require a path argument to be inside the server config parameter `custom_local_disks_base_directory`, which has no default, so set that also when using local disk.
+:::
+
+A combination of config-based configuration and sql-defined configuration is also possible:
+
+```sql
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+    is_new UInt8,
+    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+  # highlight-start
+  SETTINGS disk = disk(
+    type=cache,
+    max_size='1Gi',
+    path='/var/lib/clickhouse/custom_disk_cache/',
+    disk=disk(
+      type=web,
+      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+      )
+  );
+  # highlight-end
+```
+
+where `web` is a from a server configuration file:

 ``` xml
+<storage_configuration>
+    <disks>
+        <web>
+            <type>web</type>
+            <endpoint>'https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'</endpoint>
+        </web>
+    </disks>
+</storage_configuration>
+```
+
+### Using S3 Storage {#s3-storage}
+
+Required parameters:
+
+- `endpoint` — S3 endpoint URL in `path` or `virtual hosted` [styles](https://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html). Endpoint URL should contain a bucket and root path to store data.
+- `access_key_id` — S3 access key id.
+- `secret_access_key` — S3 secret access key.
+
+Optional parameters:
+
+- `region` — S3 region name.
+- `support_batch_delete` — This controls the check to see if batch deletes are supported. Set this to `false` when using Google Cloud Storage (GCS) as GCS does not support batch deletes and preventing the checks will prevent error messages in the logs.
+- `use_environment_credentials` — Reads AWS credentials from the Environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY and AWS_SESSION_TOKEN if they exist. Default value is `false`.
+- `use_insecure_imds_request` — If set to `true`, S3 client will use insecure IMDS request while obtaining credentials from Amazon EC2 metadata. Default value is `false`.
+- `expiration_window_seconds` — Grace period for checking if expiration-based credentials have expired. Optional, default value is `120`.
+- `proxy` — Proxy configuration for S3 endpoint. Each `uri` element inside `proxy` block should contain a proxy URL.
+- `connect_timeout_ms` — Socket connect timeout in milliseconds. Default value is `10 seconds`.
+- `request_timeout_ms` — Request timeout in milliseconds. Default value is `5 seconds`.
+- `retry_attempts` — Number of retry attempts in case of failed request. Default value is `10`.
+- `single_read_retries` — Number of retry attempts in case of connection drop during read. Default value is `4`.
+- `min_bytes_for_seek` — Minimal number of bytes to use seek operation instead of sequential read. Default value is `1 Mb`.
+- `metadata_path` — Path on local FS to store metadata files for S3. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
+- `skip_access_check` — If true, disk access checks will not be performed on disk start-up. Default value is `false`.
+- `header` —  Adds specified HTTP header to a request to given endpoint. Optional, can be specified multiple times.
+- `server_side_encryption_customer_key_base64` — If specified, required headers for accessing S3 objects with SSE-C encryption will be set.
+- `server_side_encryption_kms_key_id` - If specified, required headers for accessing S3 objects with [SSE-KMS encryption](https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingKMSEncryption.html) will be set. If an empty string is specified, the AWS managed S3 key will be used. Optional.
+- `server_side_encryption_kms_encryption_context` - If specified alongside `server_side_encryption_kms_key_id`, the given encryption context header for SSE-KMS will be set. Optional.
+- `server_side_encryption_kms_bucket_key_enabled` - If specified alongside `server_side_encryption_kms_key_id`, the header to enable S3 bucket keys for SSE-KMS will be set. Optional, can be `true` or `false`, defaults to nothing (matches the bucket-level setting).
+- `s3_max_put_rps` — Maximum PUT requests per second rate before throttling. Default value is `0` (unlimited).
+- `s3_max_put_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_put_rps`.
+- `s3_max_get_rps` — Maximum GET requests per second rate before throttling. Default value is `0` (unlimited).
+- `s3_max_get_burst` — Max number of requests that can be issued simultaneously before hitting request per second limit. By default (`0` value) equals to `s3_max_get_rps`.
+- `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
+- `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
+- `key_template` — Define the format with which the object keys are generated. By default, Clickhouse takes `root path` from `endpoint` option and adds random generated suffix. That suffix is a dir with 3 random symbols and a file name with 29 random symbols. With that option you have a full control how to the object keys are generated. Some usage scenarios require having random symbols in the prefix or in the middle of object key. For example: `[a-z]{3}-prefix-random/constant-part/random-middle-[a-z]{3}/random-suffix-[a-z]{29}`. The value is parsed with [`re2`](https://github.com/google/re2/wiki/Syntax). Only some subset of the syntax is supported. Check if your preferred format is supported before using that option. Disk isn't initialized if clickhouse is unable to generate a key by the value of `key_template`. It requires enabled feature flag [storage_metadata_write_full_object_key](/docs/en/operations/settings/settings#storage_metadata_write_full_object_key). It forbids declaring the `root path` in `endpoint` option. It requires definition of the option `key_compatibility_prefix`.
+- `key_compatibility_prefix` — That option is required when option `key_template` is in use. In order to be able to read the objects keys which were stored in the metadata files with the metadata version lower that `VERSION_FULL_OBJECT_KEY`, the previous `root path` from the `endpoint` option should be set here.
+
+:::note
+Google Cloud Storage (GCS) is also supported using the type `s3`. See [GCS backed MergeTree](/docs/en/integrations/gcs).
+:::
+
+### Using Plain Storage {#plain-storage}
+
+In `22.10` a new disk type `s3_plain` was introduced, which provides a write-once storage. Configuration parameters are the same as for `s3` disk type.
+Unlike `s3` disk type, it stores data as is, e.g. instead of randomly-generated blob names, it uses normal file names (the same way as clickhouse stores files on local disk) and does not store any metadata locally, e.g. it is derived from data on `s3`.
+
+This disk type allows to keep a static version of the table, as it does not allow executing merges on the existing data and does not allow inserting of new data.
+A use case for this disk type is to create backups on it, which can be done via `BACKUP TABLE data TO Disk('plain_disk_name', 'backup_name')`. Afterwards you can do `RESTORE TABLE data AS data_restored FROM Disk('plain_disk_name', 'backup_name')` or using `ATTACH TABLE data (...) ENGINE = MergeTree() SETTINGS disk = 'plain_disk_name'`.
+
+Configuration:
+``` xml
+<s3_plain>
+    <type>s3_plain</type>
+    <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+    <use_invironment_credentials>1</use_invironment_credentials>
+</s3_plain>
+```
+
+Starting from `24.1` it is possible configure any object storage disk (`s3`, `azure`, `hdfs`, `local`) using `plain` metadata type.
+
+Configuration:
+``` xml
+<s3_plain>
+    <type>object_storage</type>
+    <object_storage_type>azure</object_storage_type>
+    <metadata_type>plain</metadata_type>
+    <endpoint>https://s3.eu-west-1.amazonaws.com/clickhouse-eu-west-1.clickhouse.com/data/</endpoint>
+    <use_invironment_credentials>1</use_invironment_credentials>
+</s3_plain>
+```
+
+### Using Azure Blob Storage {#azure-blob-storage}
+
+`MergeTree` family table engines can store data to [Azure Blob Storage](https://azure.microsoft.com/en-us/services/storage/blobs/) using a disk with type `azure_blob_storage`.
+
+As of February 2022, this feature is still a fresh addition, so expect that some Azure Blob Storage functionalities might be unimplemented.
+
+Configuration markup:
+``` xml
+<storage_configuration>
+    ...
+    <disks>
+        <blob_storage_disk>
+            <type>azure_blob_storage</type>
+            <storage_account_url>http://account.blob.core.windows.net</storage_account_url>
+            <container_name>container</container_name>
+            <account_name>account</account_name>
+            <account_key>pass123</account_key>
+            <metadata_path>/var/lib/clickhouse/disks/blob_storage_disk/</metadata_path>
+            <cache_path>/var/lib/clickhouse/disks/blob_storage_disk/cache/</cache_path>
+            <skip_access_check>false</skip_access_check>
+        </blob_storage_disk>
+    </disks>
+    ...
+</storage_configuration>
+```
+
+Connection parameters:
+* `storage_account_url` - **Required**, Azure Blob Storage account URL, like `http://account.blob.core.windows.net` or `http://azurite1:10000/devstoreaccount1`.
+* `container_name` - Target container name, defaults to `default-container`.
+* `container_already_exists` - If set to `false`, a new container `container_name` is created in the storage account, if set to `true`, disk connects to the container directly, and if left unset, disk connects to the account, checks if the container `container_name` exists, and creates it if it doesn't exist yet.
+
+Authentication parameters (the disk will try all available methods **and** Managed Identity Credential):
+* `connection_string` - For authentication using a connection string.
+* `account_name` and `account_key` - For authentication using Shared Key.
+
+Limit parameters (mainly for internal usage):
+* `s3_max_single_part_upload_size` - Limits the size of a single block upload to Blob Storage.
+* `min_bytes_for_seek` - Limits the size of a seekable region.
+* `max_single_read_retries` - Limits the number of attempts to read a chunk of data from Blob Storage.
+* `max_single_download_retries` - Limits the number of attempts to download a readable buffer from Blob Storage.
+* `thread_pool_size` - Limits the number of threads with which `IDiskRemote` is instantiated.
+* `s3_max_inflight_parts_for_one_file` - Limits the number of put requests that can be run concurrently for one object.
+
+Other parameters:
+* `metadata_path` - Path on local FS to store metadata files for Blob Storage. Default value is `/var/lib/clickhouse/disks/<disk_name>/`.
+* `skip_access_check` - If true, disk access checks will not be performed on disk start-up. Default value is `false`.
+* `read_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of read requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
+* `write_resource` — Resource name to be used for [scheduling](/docs/en/operations/workload-scheduling.md) of write requests to this disk. Default value is empty string (IO scheduling is not enabled for this disk).
+
+Examples of working configurations can be found in integration tests directory (see e.g. [test_merge_tree_azure_blob_storage](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_merge_tree_azure_blob_storage/configs/config.d/storage_conf.xml) or [test_azure_blob_storage_zero_copy_replication](https://github.com/ClickHouse/ClickHouse/blob/master/tests/integration/test_azure_blob_storage_zero_copy_replication/configs/config.d/storage_conf.xml)).
+
+:::note Zero-copy replication is not ready for production
+Zero-copy replication is disabled by default in ClickHouse version 22.8 and higher.  This feature is not recommended for production use.
+:::
+
+## Using HDFS storage {#hdfs-storage}
+
+In this sample configuration:
+- the disk is of type `hdfs`
+- the data is hosted at `hdfs://hdfs1:9000/clickhouse/`
+
+```xml
 <clickhouse>
    <storage_configuration>
        <disks>
            <hdfs>
                <type>hdfs</type>
                <endpoint>hdfs://hdfs1:9000/clickhouse/</endpoint>
+                <skip_access_check>true</skip_access_check>
            </hdfs>
+            <hdd>
+                <type>local</type>
+                <path>/</path>
+            </hdd>
        </disks>
        <policies>
            <hdfs>
@ -32,26 +422,17 @@ Configuration markup:
                    <main>
                        <disk>hdfs</disk>
                    </main>
+                    <external>
+                        <disk>hdd</disk>
+                    </external>
                </volumes>
            </hdfs>
        </policies>
    </storage_configuration>
-
-    <merge_tree>
-        <min_bytes_for_wide_part>0</min_bytes_for_wide_part>
-    </merge_tree>
 </clickhouse>
 ```

-Required parameters:
-
- `endpoint` — HDFS endpoint URL in `path` format. Endpoint URL should contain a root path to store data.
-
-Optional parameters:
-
- `min_bytes_for_seek` — The minimal number of bytes to use seek operation instead of sequential read. Default value: `1 Mb`.
-
-## Using Virtual File System for Data Encryption {#encrypted-virtual-file-system}
+### Using Data Encryption {#encrypted-virtual-file-system}

 You can encrypt the data stored on [S3](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3), or [HDFS](#configuring-hdfs) external disks, or on a local disk. To turn on the encryption mode, in the configuration file you must define a disk with the type `encrypted` and choose a disk on which the data will be saved. An `encrypted` disk ciphers all written files on the fly, and when you read files from an `encrypted` disk it deciphers them automatically. So you can work with an `encrypted` disk like with a normal one.

@ -112,7 +493,7 @@ Example of disk configuration:
 </clickhouse>
 ```

-## Using local cache {#using-local-cache}
+### Using local cache {#using-local-cache}

 It is possible to configure local cache over disks in storage configuration starting from version 22.3.
 For versions 22.3 - 22.7 cache is supported only for `s3` disk type. For versions >= 22.8 cache is supported for any disk type: S3, Azure, Local, Encrypted, etc.
@ -275,23 +656,92 @@ Cache profile events:

 - `CachedWriteBufferCacheWriteBytes`, `CachedWriteBufferCacheWriteMicroseconds`

-## Using in-memory cache (userspace page cache) {#userspace-page-cache}
-
-The File Cache described above stores cached data in local files. Alternatively, object-store-based disks can be configured to use "Userspace Page Cache", which is RAM-only. Userspace page cache is recommended only if file cache can't be used for some reason, e.g. if the machine doesn't have a local disk at all. Note that file cache effectively uses RAM for caching too, since the OS caches contents of local files.
-
-To enable userspace page cache for disks that don't use file cache, use setting `use_page_cache_for_disks_without_file_cache`.
-
-By default, on Linux, the userspace page cache will use all available memory, similar to the OS page cache. In tools like `top` and `ps`, the clickhouse server process will typically show resident set size near 100% of the machine's RAM - this is normal, and most of this memory is actually reclaimable by the OS on memory pressure (`MADV_FREE`). This behavior can be disabled with server setting `page_cache_use_madv_free = 0`, making the userspace page cache just use a fixed amount of memory `page_cache_size` with no special interaction with the OS. On Mac OS, `page_cache_use_madv_free` is always disabled as it doesn't have lazy `MADV_FREE`.
-
-Unfortunately, `page_cache_use_madv_free` makes it difficult to tell if the server is close to running out of memory, since the RSS metric becomes useless. Async metric `UnreclaimableRSS` shows the amount of physical memory used by the server, excluding the memory reclaimable by the OS: `select value from system.asynchronous_metrics where metric = 'UnreclaimableRSS'`. Use it for monitoring instead of RSS. This metric is only available if `page_cache_use_madv_free` is enabled.
-
-## Storing Data on Web Server {#storing-data-on-webserver}
-
-There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.
+### Using static Web storage (read-only) {#web-storage}

 This is a read-only disk. Its data is only read and never modified. A new table is loaded to this disk via `ATTACH TABLE` query (see example below). Local disk is not actually used, each `SELECT` query will result in a `http` request to fetch required data. All modification of the table data will result in an exception, i.e. the following types of queries are not allowed: [CREATE TABLE](/docs/en/sql-reference/statements/create/table.md), [ALTER TABLE](/docs/en/sql-reference/statements/alter/index.md), [RENAME TABLE](/docs/en/sql-reference/statements/rename.md/#misc_operations-rename_table), [DETACH TABLE](/docs/en/sql-reference/statements/detach.md) and [TRUNCATE TABLE](/docs/en/sql-reference/statements/truncate.md).
+Web storage can be used for read-only purposes. An example use is for hosting sample data, or for migrating data.
+There is a tool `clickhouse-static-files-uploader`, which prepares a data directory for a given table (`SELECT data_paths FROM system.tables WHERE name = 'table_name'`). For each table you need, you get a directory of files. These files can be uploaded to, for example, a web server with static files. After this preparation, you can load this table into any ClickHouse server via `DiskWeb`.

-Web server storage is supported only for the [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) and [Log](/docs/en/engines/table-engines/log-family/log.md) engine families. To access the data stored on a `web` disk, use the [storage_policy](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#terms) setting when executing the query. For example, `ATTACH TABLE table_web UUID '{}' (id Int32) ENGINE = MergeTree() ORDER BY id SETTINGS storage_policy = 'web'`.
+In this sample configuration:
+- the disk is of type `web`
+- the data is hosted at `http://nginx:80/test1/`
+- a cache on local storage is used
+
+```xml
+<clickhouse>
+    <storage_configuration>
+        <disks>
+            <web>
+                <type>web</type>
+                <endpoint>http://nginx:80/test1/</endpoint>
+            </web>
+            <cached_web>
+                <type>cache</type>
+                <disk>web</disk>
+                <path>cached_web_cache/</path>
+                <max_size>100000000</max_size>
+            </cached_web>
+        </disks>
+        <policies>
+            <web>
+                <volumes>
+                    <main>
+                        <disk>web</disk>
+                    </main>
+                </volumes>
+            </web>
+            <cached_web>
+                <volumes>
+                    <main>
+                        <disk>cached_web</disk>
+                    </main>
+                </volumes>
+            </cached_web>
+        </policies>
+    </storage_configuration>
+</clickhouse>
+```
+
+:::tip
+Storage can also be configured temporarily within a query, if a web dataset is not expected
+to be used routinely, see [dynamic configuration](#dynamic-configuration) and skip editing the
+configuration file.
+:::
+
+:::tip
+A [demo dataset](https://github.com/ClickHouse/web-tables-demo) is hosted in GitHub.  To prepare your own tables for web storage see the tool [clickhouse-static-files-uploader](/docs/en/operations/storing-data.md/#storing-data-on-webserver)
+:::
+
+In this `ATTACH TABLE` query the `UUID` provided matches the directory name of the data, and the endpoint is the URL for the raw GitHub content.
+
+```sql
+# highlight-next-line
+ATTACH TABLE uk_price_paid UUID 'cf712b4f-2ca8-435c-ac23-c4393efe52f7'
+(
+    price UInt32,
+    date Date,
+    postcode1 LowCardinality(String),
+    postcode2 LowCardinality(String),
+    type Enum8('other' = 0, 'terraced' = 1, 'semi-detached' = 2, 'detached' = 3, 'flat' = 4),
+    is_new UInt8,
+    duration Enum8('unknown' = 0, 'freehold' = 1, 'leasehold' = 2),
+    addr1 String,
+    addr2 String,
+    street LowCardinality(String),
+    locality LowCardinality(String),
+    town LowCardinality(String),
+    district LowCardinality(String),
+    county LowCardinality(String)
+)
+ENGINE = MergeTree
+ORDER BY (postcode1, postcode2, addr1, addr2)
+  # highlight-start
+  SETTINGS disk = disk(
+      type=web,
+      endpoint='https://raw.githubusercontent.com/ClickHouse/web-tables-demo/main/web/'
+      );
+  # highlight-end
+```

 A ready test case. You need to add this configuration to config:

@ -487,7 +937,7 @@ If URL is not reachable on disk load when the server is starting up tables, then
 Use [http_max_single_read_retries](/docs/en/operations/settings/settings.md/#http-max-single-read-retries) setting to limit the maximum number of retries during a single HTTP read.


-## Zero-copy Replication (not ready for production) {#zero-copy}
+### Zero-copy Replication (not ready for production) {#zero-copy}

 Zero-copy replication is possible, but not recommended, with  `S3` and `HDFS` disks. Zero-copy replication means that if the data is stored remotely on several machines and needs to be synchronized, then only the metadata is replicated (paths to the data parts), but not the data itself.

--- a/docs/en/operations/system-tables/metrics.md
+++ b/docs/en/operations/system-tables/metrics.md
@ -513,10 +513,6 @@ Part was moved to another disk and should be deleted in own destructor.

 Not active data part with identity refcounter, it is deleting right now by a cleaner.

-### PartsInMemory
-
-In-memory parts.
-
 ### PartsOutdated

 Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.
--- a/docs/en/operations/system-tables/scheduler.md
+++ b/docs/en/operations/system-tables/scheduler.md
@ -26,7 +26,9 @@ priority:          0
 is_active:         0
 active_children:   0
 dequeued_requests: 67
+canceled_requests: 0
 dequeued_cost:     4692272
+canceled_cost:     0
 busy_periods:      63
 vruntime:          938454.1999999989
 system_vruntime:   ᴺᵁᴸᴸ
@ -54,7 +56,9 @@ Columns:
 - `is_active` (`UInt8`) - Whether this node is currently active - has resource requests to be dequeued and constraints satisfied.
 - `active_children` (`UInt64`) - The number of children in active state.
 - `dequeued_requests` (`UInt64`) - The total number of resource requests dequeued from this node.
+- `canceled_requests` (`UInt64`) - The total number of resource requests canceled from this node.
 - `dequeued_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests dequeued from this node.
+- `canceled_cost` (`UInt64`) - The sum of costs (e.g. size in bytes) of all requests canceled from this node.
 - `busy_periods` (`UInt64`) - The total number of deactivations of this node.
 - `vruntime` (`Nullable(Float64)`) - For children of `fair` nodes only. Virtual runtime of a node used by SFQ algorithm to select the next child to process in a max-min fair manner.
 - `system_vruntime` (`Nullable(Float64)`) - For `fair` nodes only. Virtual runtime showing `vruntime` of the last processed resource request. Used during child activation as the new value of `vruntime`.
--- a/docs/en/operations/utilities/clickhouse-copier.md
+++ b/docs/en/operations/utilities/clickhouse-copier.md
@ -1,187 +0,0 @@
---
-slug: /en/operations/utilities/clickhouse-copier
-sidebar_position: 59
-sidebar_label: clickhouse-copier
---
-
-# clickhouse-copier 
-
-Copies data from the tables in one cluster to tables in another (or the same) cluster.
-
-:::note    
-To get a consistent copy, the data in the source tables and partitions should not change during the entire process.
-:::
-
-You can run multiple `clickhouse-copier` instances on different servers to perform the same job. ClickHouse Keeper, or ZooKeeper, is used for syncing the processes.
-
-After starting, `clickhouse-copier`:
-
- Connects to ClickHouse Keeper and receives:
-
-    - Copying jobs.
-    - The state of the copying jobs.
-
- It performs the jobs.
-
-    Each running process chooses the “closest” shard of the source cluster and copies the data into the destination cluster, resharding the data if necessary.
-
-`clickhouse-copier` tracks the changes in ClickHouse Keeper and applies them on the fly.
-
-To reduce network traffic, we recommend running `clickhouse-copier` on the same server where the source data is located.
-
-## Running Clickhouse-copier {#running-clickhouse-copier}
-
-The utility should be run manually:
-
-``` bash
-$ clickhouse-copier --daemon --config keeper.xml --task-path /task/path --base-dir /path/to/dir
-```
-
-Parameters:
-
- `daemon` — Starts `clickhouse-copier` in daemon mode.
- `config` — The path to the `keeper.xml` file with the parameters for the connection to ClickHouse Keeper.
- `task-path` — The path to the ClickHouse Keeper node. This node is used for syncing `clickhouse-copier` processes and storing tasks. Tasks are stored in `$task-path/description`.
- `task-file` — Optional path to file with task configuration for initial upload to ClickHouse Keeper.
- `task-upload-force` — Force upload `task-file` even if node already exists. Default is false.
- `base-dir` — The path to logs and auxiliary files. When it starts, `clickhouse-copier` creates `clickhouse-copier_YYYYMMHHSS_<PID>` subdirectories in `$base-dir`. If this parameter is omitted, the directories are created in the directory where `clickhouse-copier` was launched.
-
-## Format of keeper.xml {#format-of-zookeeper-xml}
-
-``` xml
-<clickhouse>
-    <logger>
-        <level>trace</level>
-        <size>100M</size>
-        <count>3</count>
-    </logger>
-
-    <zookeeper>
-        <node index="1">
-            <host>127.0.0.1</host>
-            <port>2181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
-```
-
-## Configuration of Copying Tasks {#configuration-of-copying-tasks}
-
-``` xml
-<clickhouse>
-    <!-- Configuration of clusters as in an ordinary server config -->
-    <remote_servers>
-        <source_cluster>
-            <!--
-                source cluster & destination clusters accept exactly the same
-                parameters as parameters for the usual Distributed table
-                see https://clickhouse.com/docs/en/engines/table-engines/special/distributed/
-            -->
-            <shard>
-                <internal_replication>false</internal_replication>
-                    <replica>
-                        <host>127.0.0.1</host>
-                        <port>9000</port>
-                        <!--
-                        <user>default</user>
-                        <password>default</password>
-                        <secure>1</secure>
-                        -->
-                    </replica>
-            </shard>
-            ...
-        </source_cluster>
-
-        <destination_cluster>
-        ...
-        </destination_cluster>
-    </remote_servers>
-
-    <!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
-    <max_workers>2</max_workers>
-
-    <!-- Setting used to fetch (pull) data from source cluster tables -->
-    <settings_pull>
-        <readonly>1</readonly>
-    </settings_pull>
-
-    <!-- Setting used to insert (push) data to destination cluster tables -->
-    <settings_push>
-        <readonly>0</readonly>
-    </settings_push>
-
-    <!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
-         They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
-    <settings>
-        <connect_timeout>3</connect_timeout>
-        <!-- Sync insert is set forcibly, leave it here just in case. -->
-        <distributed_foreground_insert>1</distributed_foreground_insert>
-    </settings>
-
-    <!-- Copying tasks description.
-         You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
-         sequentially.
-    -->
-    <tables>
-        <!-- A table task, copies one table. -->
-        <table_hits>
-            <!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
-            <cluster_pull>source_cluster</cluster_pull>
-            <database_pull>test</database_pull>
-            <table_pull>hits</table_pull>
-
-            <!-- Destination cluster name and tables in which the data should be inserted -->
-            <cluster_push>destination_cluster</cluster_push>
-            <database_push>test</database_push>
-            <table_push>hits2</table_push>
-
-            <!-- Engine of destination tables.
-                 If destination tables have not be created, workers create them using columns definition from source tables and engine
-                 definition from here.
-
-                 NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
-                 be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
-                 specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
-                 system.parts table.
-            -->
-            <engine>
-            ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
-            PARTITION BY toMonday(date)
-            ORDER BY (CounterID, EventDate)
-            </engine>
-
-            <!-- Sharding key used to insert data to destination cluster -->
-            <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
-
-            <!-- Optional expression that filter data while pull them from source servers -->
-            <where_condition>CounterID != 0</where_condition>
-
-            <!-- This section specifies partitions that should be copied, other partition will be ignored.
-                 Partition names should have the same format as
-                 partition column of system.parts table (i.e. a quoted text).
-                 Since partition key of source and destination cluster could be different,
-                 these partition names specify destination partitions.
-
-                 NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
-                 it is strictly recommended to specify them explicitly.
-                 If you already have some ready partitions on destination cluster they
-                 will be removed at the start of the copying since they will be interpeted
-                 as unfinished data from the previous copying!!!
-            -->
-            <enabled_partitions>
-                <partition>'2018-02-26'</partition>
-                <partition>'2018-03-05'</partition>
-                ...
-            </enabled_partitions>
-        </table_hits>
-
-        <!-- Next table to copy. It is not copied until previous table is copying. -->
-        <table_visits>
-        ...
-        </table_visits>
-        ...
-    </tables>
-</clickhouse>
-```
-
-`clickhouse-copier` tracks the changes in `/task/path/description` and applies them on the fly. For instance, if you change the value of `max_workers`, the number of processes running tasks will also change.
--- a/docs/en/operations/utilities/index.md
+++ b/docs/en/operations/utilities/index.md
@ -2,13 +2,11 @@
 slug: /en/operations/utilities/
 sidebar_position: 56
 sidebar_label: List of tools and utilities
-pagination_next: 'en/operations/utilities/clickhouse-copier'
 ---

 # List of tools and utilities

 - [clickhouse-local](../../operations/utilities/clickhouse-local.md) — Allows running SQL queries on data without starting the ClickHouse server, similar to how `awk` does this.
- [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) — Copies (and reshards) data from one cluster to another cluster.
 - [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — Loads server with the custom queries and settings.
 - [clickhouse-format](../../operations/utilities/clickhouse-format.md) — Enables formatting input queries.
 - [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — Obfuscates data.
--- a/docs/en/sql-reference/aggregate-functions/index.md
+++ b/docs/en/sql-reference/aggregate-functions/index.md
@ -16,7 +16,9 @@ ClickHouse also supports:

 ## NULL Processing

-During aggregation, all `NULL`s are skipped. If the aggregation has several parameters it will ignore any row in which one or more of the parameters are NULL.
+During aggregation, all `NULL` arguments are skipped. If the aggregation has several arguments it will ignore any row in which one or more of them are NULL.
+
+There is an exception to this rule, which are the functions [`first_value`](../../sql-reference/aggregate-functions/reference/first_value.md), [`last_value`](../../sql-reference/aggregate-functions/reference/last_value.md) and their aliases when followed by the modifier `RESPECT NULLS`: `FIRST_VALUE(b) RESPECT NULLS`.

 **Examples:**

@ -85,3 +87,50 @@ FROM t_null_big;
 │ [2,2,3]       │ [2,NULL,2,3,NULL]                     │
 └───────────────┴───────────────────────────────────────┘
 ```
+
+Note that aggregations are skipped when the columns are used as arguments to an aggregated function.  For example [`count`](../../sql-reference/aggregate-functions/reference/count.md) without parameters (`count()`) or with constant ones (`count(1)`) will count all rows in the block (independently of the value of the GROUP BY column as it's not an argument), while `count(column)` will only return the number of rows where column is not NULL.
+
+```sql
+SELECT
+    v,
+    count(1),
+    count(v)
+FROM
+(
+    SELECT if(number < 10, NULL, number % 3) AS v
+    FROM numbers(15)
+)
+GROUP BY v
+
+┌────v─┬─count()─┬─count(v)─┐
+│ ᴺᵁᴸᴸ │      10 │        0 │
+│    0 │       1 │        1 │
+│    1 │       2 │        2 │
+│    2 │       2 │        2 │
+└──────┴─────────┴──────────┘
+```
+
+And here is an example of of first_value with `RESPECT NULLS` where we can see that NULL inputs are respected and it will return the first value read, whether it's NULL or not:
+
+```sql
+SELECT
+    col || '_' || ((col + 1) * 5 - 1) as range,
+    first_value(odd_or_null) as first,
+    first_value(odd_or_null) IGNORE NULLS as first_ignore_null,
+    first_value(odd_or_null) RESPECT NULLS as first_respect_nulls
+FROM
+(
+    SELECT
+        intDiv(number, 5) AS col,
+        if(number % 2 == 0, NULL, number) as odd_or_null
+    FROM numbers(15)
+)
+GROUP BY col
+ORDER BY col
+
+┌─range─┬─first─┬─first_ignore_null─┬─first_respect_nulls─┐
+│ 0_4   │     1 │                 1 │                ᴺᵁᴸᴸ │
+│ 1_9   │     5 │                 5 │                   5 │
+│ 2_14  │    11 │                11 │                ᴺᵁᴸᴸ │
+└───────┴───────┴───────────────────┴─────────────────────┘
+```
--- a/docs/en/sql-reference/aggregate-functions/reference/varpop.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/varpop.md
@ -1,16 +1,99 @@
 ---
-slug: /en/sql-reference/aggregate-functions/reference/varpop
+title: "varPop"
+slug: "/en/sql-reference/aggregate-functions/reference/varpop"
 sidebar_position: 32
 ---

-# varPop(x)
+This page covers the `varPop` and `varPopStable` functions available in ClickHouse.

-Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.
+## varPop

-In other words, dispersion for a set of values. Returns `Float64`.
+Calculates the population covariance between two data columns. The population covariance measures the degree to which two variables vary together. Calculates the amount `Σ((x - x̅)^2) / n`, where `n` is the sample size and `x̅`is the average value of `x`.

-Alias: `VAR_POP`.
+**Syntax**

-:::note
-This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varPopStable` function. It works slower but provides a lower computational error.
-:::
+```sql
+covarPop(x, y)
+```
+
+**Parameters**
+
+- `x`: The first data column. [Numeric](../../../native-protocol/columns.md)
+- `y`: The second data column. [Numeric](../../../native-protocol/columns.md)
+
+**Returned value**
+
+Returns an integer of type `Float64`.
+
+**Implementation details**
+
+This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varPopStable` function](#varPopStable).
+
+**Example**
+
+```sql
+DROP TABLE IF EXISTS test_data;
+CREATE TABLE test_data
+(
+    x Int32,
+    y Int32
+)
+ENGINE = Memory;
+
+INSERT INTO test_data VALUES (1, 2), (2, 3), (3, 5), (4, 6), (5, 8);
+
+SELECT
+    covarPop(x, y) AS covar_pop
+FROM test_data;
+```
+
+```response
+3
+```
+
+## varPopStable
+
+Calculates population covariance between two data columns using a stable, numerically accurate method to calculate the variance. This function is designed to provide reliable results even with large datasets or values that might cause numerical instability in other implementations.
+
+**Syntax**
+
+```sql
+covarPopStable(x, y)
+```
+
+**Parameters**
+
+- `x`: The first data column. [String literal](../../syntax#syntax-string-literal)
+- `y`: The second data column. [Expression](../../syntax#syntax-expressions)
+
+**Returned value**
+
+Returns an integer of type `Float64`.
+
+**Implementation details**
+
+Unlike [`varPop()`](#varPop), this function uses a stable, numerically accurate algorithm to calculate the population variance to avoid issues like catastrophic cancellation or loss of precision. This function also handles `NaN` and `Inf` values correctly, excluding them from calculations.
+
+**Example**
+
+Query:
+
+```sql
+DROP TABLE IF EXISTS test_data;
+CREATE TABLE test_data
+(
+    x Int32,
+    y Int32
+)
+ENGINE = Memory;
+
+INSERT INTO test_data VALUES (1, 2), (2, 9), (9, 5), (4, 6), (5, 8);
+
+SELECT
+    covarPopStable(x, y) AS covar_pop_stable
+FROM test_data;
+```
+
+```response
+0.5999999999999999
+```
--- a/docs/en/sql-reference/aggregate-functions/reference/varsamp.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/varsamp.md
@ -1,18 +1,128 @@
 ---
+title: "varSamp"
 slug: /en/sql-reference/aggregate-functions/reference/varsamp
 sidebar_position: 33
 ---

-# varSamp
+This page contains information on the `varSamp` and `varSampStable` ClickHouse functions.

-Calculates the amount `Σ((x - x̅)^2) / (n - 1)`, where `n` is the sample size and `x̅`is the average value of `x`.
+## varSamp

-It represents an unbiased estimate of the variance of a random variable if passed values from its sample.
+Calculate the sample variance of a data set.

-Returns `Float64`. When `n <= 1`, returns `+∞`.
+**Syntax**

-Alias: `VAR_SAMP`.
+```sql
+varSamp(expr)
+```

-:::note
-This function uses a numerically unstable algorithm. If you need [numerical stability](https://en.wikipedia.org/wiki/Numerical_stability) in calculations, use the `varSampStable` function. It works slower but provides a lower computational error.
-:::
+**Parameters**
+
+- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
+
+**Returned value**
+
+Returns a Float64 value representing the sample variance of the input data set.
+
+**Implementation details**
+
+The `varSamp()` function calculates the sample variance using the following formula:
+
+```plaintext
+∑(x - mean(x))^2 / (n - 1)
+```
+
+Where:
+
+- `x` is each individual data point in the data set.
+- `mean(x)` is the arithmetic mean of the data set.
+- `n` is the number of data points in the data set.
+
+The function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPop()` function](./varpop#varpop) instead.
+
+This function uses a numerically unstable algorithm. If you need numerical stability in calculations, use the slower but more stable [`varSampStable` function](#varSampStable).
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE example_table
+(
+    id UInt64,
+    value Float64
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
+
+SELECT varSamp(value) FROM example_table;
+```
+
+Response:
+
+```response
+0.8650000000000091
+```
+
+## varSampStable
+
+Calculate the sample variance of a data set using a numerically stable algorithm.
+
+**Syntax**
+
+```sql
+varSampStable(expr)
+```
+
+**Parameters**
+
+- `expr`: An expression representing the data set for which you want to calculate the sample variance. [Expression](../../syntax#syntax-expressions)
+
+**Returned value**
+
+The `varSampStable()` function returns a Float64 value representing the sample variance of the input data set.
+
+**Implementation details**
+
+The `varSampStable()` function calculates the sample variance using the same formula as the [`varSamp()`](#varSamp function):
+
+```plaintext
+∑(x - mean(x))^2 / (n - 1)
+```
+
+Where:
+- `x` is each individual data point in the data set.
+- `mean(x)` is the arithmetic mean of the data set.
+- `n` is the number of data points in the data set.
+
+The difference between `varSampStable()` and `varSamp()` is that `varSampStable()` is designed to provide a more deterministic and stable result when dealing with floating-point arithmetic. It uses an algorithm that minimizes the accumulation of rounding errors, which can be particularly important when dealing with large data sets or data with a wide range of values.
+
+Like `varSamp()`, the `varSampStable()` function assumes that the input data set represents a sample from a larger population. If you want to calculate the variance of the entire population (when you have the complete data set), you should use the [`varPopStable()` function](./varpop#varpopstable) instead.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE example_table
+(
+    id UInt64,
+    value Float64
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+INSERT INTO example_table VALUES (1, 10.5), (2, 12.3), (3, 9.8), (4, 11.2), (5, 10.7);
+
+SELECT varSampStable(value) FROM example_table;
+```
+
+Response:
+
+```response
+0.865
+```
+
+This query calculates the sample variance of the `value` column in the `example_table` using the `varSampStable()` function. The result shows that the sample variance of the values `[10.5, 12.3, 9.8, 11.2, 10.7]` is approximately 0.865, which may differ slightly from the result of `varSamp()` due to the more precise handling of floating-point arithmetic.
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@ -433,3 +433,292 @@ Result:
 │ [0,1,2,3,4,5,6,7] │
 └───────────────────┘
 ```
+
+## mortonEncode
+
+Calculates the Morton encoding (ZCurve) for a list of unsigned integers.
+
+The function has two modes of operation:
+- Simple
+- Expanded
+
+### Simple mode
+
+Accepts up to 8 unsigned integers as arguments and produces a UInt64 code.
+
+**Syntax**
+
+```sql
+mortonEncode(args)
+```
+
+**Parameters**
+
+- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
+
+**Returned value**
+
+- A UInt64 code
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+
+**Example**
+
+Query:
+
+```sql
+SELECT mortonEncode(1, 2, 3);
+```
+Result:
+
+```response
+53
+```
+
+### Expanded mode
+
+Accepts a range mask ([tuple](../../sql-reference/data-types/tuple.md)) as a first argument and up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) as other arguments.
+
+Each number in the mask configures the amount of range expansion:<br/>
+1 - no expansion<br/>
+2 - 2x expansion<br/>
+3 - 3x expansion<br/>
+...<br/>
+Up to 8x expansion.<br/>
+
+**Syntax**
+
+```sql
+mortonEncode(range_mask, args)
+```
+
+**Parameters**
+- `range_mask`: 1-8.
+- `args`: up to 8 [unsigned integers](../../sql-reference/data-types/int-uint.md) or columns of the aforementioned type.
+
+Note: when using columns for `args` the provided `range_mask` tuple should still be a constant. 
+
+**Returned value**
+
+- A UInt64 code
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+
+
+**Example**
+
+Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
+For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
+
+Query:
+
+```sql
+SELECT mortonEncode((1,2), 1024, 16);
+```
+
+Result:
+
+```response
+1572864
+```
+
+Note: tuple size must be equal to the number of the other arguments.
+
+**Example**
+
+Morton encoding for one argument is always the argument itself:
+
+Query:
+
+```sql
+SELECT mortonEncode(1);
+```
+
+Result:
+
+```response
+1
+```
+
+**Example**
+
+It is also possible to expand one argument too:
+
+Query:
+
+```sql
+SELECT mortonEncode(tuple(2), 128);
+```
+
+Result:
+
+```response
+32768
+```
+
+**Example**
+
+You can also use column names in the function.
+
+Query:
+
+First create the table and insert some data.
+
+```sql
+create table morton_numbers(
+    n1 UInt32,
+    n2 UInt32,
+    n3 UInt16,
+    n4 UInt16,
+    n5 UInt8,
+    n6 UInt8,
+    n7 UInt8,
+    n8 UInt8
+)
+Engine=MergeTree()
+ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
+insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
+```
+Use column names instead of constants as function arguments to `mortonEncode`
+
+Query:
+
+```sql
+SELECT mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8) FROM morton_numbers;
+```
+
+Result:
+
+```response
+2155374165
+```
+
+**implementation details**
+
+Please note that you can fit only so many bits of information into Morton code as [UInt64](../../sql-reference/data-types/int-uint.md) has. Two arguments will have a range of maximum 2^32 (64/2) each, three arguments a range of max 2^21 (64/3) each and so on. All overflow will be clamped to zero.
+
+## mortonDecode
+
+Decodes a Morton encoding (ZCurve) into the corresponding unsigned integer tuple.
+
+As with the `mortonEncode` function, this function has two modes of operation:
+- Simple
+- Expanded
+
+### Simple mode
+
+Accepts a resulting tuple size as the first argument and the code as the second argument.
+
+**Syntax**
+
+```sql
+mortonDecode(tuple_size, code)
+```
+
+**Parameters**
+- `tuple_size`: integer value no more than 8.
+- `code`: [UInt64](../../sql-reference/data-types/int-uint.md) code.
+
+**Returned value**
+
+- [tuple](../../sql-reference/data-types/tuple.md) of the specified size.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md)
+
+**Example**
+
+Query:
+
+```sql
+SELECT mortonDecode(3, 53);
+```
+
+Result:
+
+```response
+["1","2","3"]
+```
+
+### Expanded mode
+
+Accepts a range mask (tuple) as a first argument and the code as the second argument.
+Each number in the mask configures the amount of range shrink:<br/>
+1 - no shrink<br/>
+2 - 2x shrink<br/> 
+3 - 3x shrink<br/>
+...<br/>
+Up to 8x shrink.<br/>
+
+Range expansion can be beneficial when you need a similar distribution for arguments with wildly different ranges (or cardinality)
+For example: 'IP Address' (0...FFFFFFFF) and 'Country code' (0...FF).
+As with the encode function, this is limited to 8 numbers at most.
+
+**Example**
+
+Query:
+
+```sql
+SELECT mortonDecode(1, 1);
+```
+
+Result:
+
+```response
+["1"]
+```
+
+**Example**
+
+It is also possible to shrink one argument:
+
+Query:
+
+```sql
+SELECT mortonDecode(tuple(2), 32768);
+```
+
+Result:
+
+```response
+["128"]
+```
+
+**Example**
+
+You can also use column names in the function.
+
+First create the table and insert some data.
+
+Query:
+```sql
+create table morton_numbers(
+    n1 UInt32,
+    n2 UInt32,
+    n3 UInt16,
+    n4 UInt16,
+    n5 UInt8,
+    n6 UInt8,
+    n7 UInt8,
+    n8 UInt8
+)
+Engine=MergeTree()
+ORDER BY n1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';
+insert into morton_numbers (*) values(1,2,3,4,5,6,7,8);
+```
+Use column names instead of constants as function arguments to `mortonDecode`
+
+Query:
+
+```sql
+select untuple(mortonDecode(8, mortonEncode(n1, n2, n3, n4, n5, n6, n7, n8))) from morton_numbers;
+```
+
+Result:
+
+```response
+1	2	3	4	5	6	7	8
+```
+
+
+
+
--- a/docs/en/sql-reference/functions/functions-for-nulls.md
+++ b/docs/en/sql-reference/functions/functions-for-nulls.md
@ -10,6 +10,8 @@ sidebar_label: Nullable

 Returns whether the argument is [NULL](../../sql-reference/syntax.md#null).

+See also operator [`IS NULL`](../operators/index.md#is_null).
+
 ``` sql
 isNull(x)
 ```
@ -54,6 +56,8 @@ Result:

 Returns whether the argument is not [NULL](../../sql-reference/syntax.md#null-literal).

+See also operator [`IS NOT NULL`](../operators/index.md#is_not_null).
+
 ``` sql
 isNotNull(x)
 ```
--- a/docs/en/sql-reference/functions/geo/polygon.md
+++ b/docs/en/sql-reference/functions/geo/polygon.md
@ -4,6 +4,67 @@ sidebar_label: Polygons
 title: "Functions for Working with Polygons"
 ---

+## WKT
+
+Returns a WKT (Well Known Text) geometric object from various [Geo Data Types](../../data-types/geo.md). Supported WKT objects are: 
+
+- POINT
+- POLYGON
+- MULTIPOLYGON
+
+**Syntax**
+
+```sql
+WKT(geo_data)
+```
+
+**Parameters**
+
+`geo_data` can be one of the following [Geo Data Types](../../data-types/geo.md) or their underlying primitive types:
+
+- [Point](../../data-types/geo.md#point)
+- [Ring](../../data-types/geo.md#ring)
+- [Polygon](../../data-types/geo.md#polygon)
+- [MultiPolygon](../../data-types/geo.md#multipolygon)
+
+**Returned value**
+
+- WKT geometric object `POINT` is returned for a Point.
+- WKT geometric object `POLYGON` is returned for a Polygon
+- WKT geometric object `MULTIPOLYGON` is returned for a MultiPolygon. 
+
+**Examples**
+
+POINT from tuple:
+
+```sql
+SELECT wkt((0., 0.));
+```
+
+```response
+POINT(0 0)
+```
+
+POLYGON from an array of tuples or an array of tuple arrays:
+
+```sql
+SELECT wkt([(0., 0.), (10., 0.), (10., 10.), (0., 10.)]);
+```
+
+```response
+POLYGON((0 0,10 0,10 10,0 10))
+```
+
+MULTIPOLYGON from an array of multi-dimensional tuple arrays:
+
+```sql
+SELECT wkt([[[(0., 0.), (10., 0.), (10., 10.), (0., 10.)], [(4., 4.), (5., 4.), (5., 5.), (4., 5.)]], [[(-10., -10.), (-10., -9.), (-9., 10.)]]]);
+```
+
+```response
+MULTIPOLYGON(((0 0,10 0,10 10,0 10,0 0),(4 4,5 4,5 5,4 5,4 4)),((-10 -10,-10 -9,-9 10,-10 -10)))
+```
+
 ## readWKTMultiPolygon

 Converts a WKT (Well Known Text) MultiPolygon into a MultiPolygon type.
@ -53,6 +114,62 @@ String starting with `POLYGON`

 Polygon

+## readWKTPoint
+
+The `readWKTPoint` function in ClickHouse parses a Well-Known Text (WKT) representation of a Point geometry and returns a point in the internal ClickHouse format.
+
+### Syntax
+
+```sql
+readWKTPoint(wkt_string)
+```
+
+### Arguments
+
+- `wkt_string`: The input WKT string representing a Point geometry.
+
+### Returned value
+
+The function returns a ClickHouse internal representation of the Point geometry.
+
+### Example
+
+```sql
+SELECT readWKTPoint('POINT (1.2 3.4)');
+```
+
+```response
+(1.2,3.4)
+```
+
+## readWKTRing
+
+Parses a Well-Known Text (WKT) representation of a Polygon geometry and returns a ring (closed linestring) in the internal ClickHouse format.
+
+### Syntax
+
+```sql
+readWKTRing(wkt_string)
+```
+
+### Arguments
+
+- `wkt_string`: The input WKT string representing a Polygon geometry.
+
+### Returned value
+
+The function returns a ClickHouse internal representation of the ring (closed linestring) geometry.
+
+### Example
+
+```sql
+SELECT readWKTRing('LINESTRING (1 1, 2 2, 3 3, 1 1)');
+```
+
+```response
+[(1,1),(2,2),(3,3),(1,1)]
+```
+
 ## polygonsWithinSpherical

 Returns true or false depending on whether or not one polygon lies completely inside another polygon. Reference https://www.boost.org/doc/libs/1_62_0/libs/geometry/doc/html/geometry/reference/algorithms/within/within_2.html
--- a/docs/en/sql-reference/functions/json-functions.md
+++ b/docs/en/sql-reference/functions/json-functions.md
@ -5,80 +5,372 @@ sidebar_label: JSON
 ---

 There are two sets of functions to parse JSON.
-   - `visitParam*` (`simpleJSON*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
+   - `simpleJSON*` (`visitParam*`) is made to parse a special very limited subset of a JSON, but these functions are extremely fast.
   - `JSONExtract*` is made to parse normal JSON.

-# visitParam functions
+# simpleJSON/visitParam functions

 ClickHouse has special functions for working with simplified JSON. All these JSON functions are based on strong assumptions about what the JSON can be, but they try to do as little as possible to get the job done.

 The following assumptions are made:

 1.  The field name (function argument) must be a constant.
-2.  The field name is somehow canonically encoded in JSON. For example: `visitParamHas('{"abc":"def"}', 'abc') = 1`, but `visitParamHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
+2.  The field name is somehow canonically encoded in JSON. For example: `simpleJSONHas('{"abc":"def"}', 'abc') = 1`, but `simpleJSONHas('{"\\u0061\\u0062\\u0063":"def"}', 'abc') = 0`
 3.  Fields are searched for on any nesting level, indiscriminately. If there are multiple matching fields, the first occurrence is used.
 4.  The JSON does not have space characters outside of string literals.

-## visitParamHas(params, name)
+## simpleJSONHas

-Checks whether there is a field with the `name` name.
+Checks whether there is a field named `field_name`.  The result is `UInt8`.

-Alias: `simpleJSONHas`.
+**Syntax**

-## visitParamExtractUInt(params, name)
-
-Parses UInt64 from the value of the field named `name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns 0.
-
-Alias: `simpleJSONExtractUInt`.
-
-## visitParamExtractInt(params, name)
-
-The same as for Int64.
-
-Alias: `simpleJSONExtractInt`.
-
-## visitParamExtractFloat(params, name)
-
-The same as for Float64.
-
-Alias: `simpleJSONExtractFloat`.
-
-## visitParamExtractBool(params, name)
-
-Parses a true/false value. The result is UInt8.
-
-Alias: `simpleJSONExtractBool`.
-
-## visitParamExtractRaw(params, name)
-
-Returns the value of a field, including separators.
-
-Alias: `simpleJSONExtractRaw`.
-
-Examples:
-
-``` sql
-visitParamExtractRaw('{"abc":"\\n\\u0000"}', 'abc') = '"\\n\\u0000"';
-visitParamExtractRaw('{"abc":{"def":[1,2,3]}}', 'abc') = '{"def":[1,2,3]}';
+```sql
+simpleJSONHas(json, field_name)
 ```

-## visitParamExtractString(params, name)
+**Parameters**

-Parses the string in double quotes. The value is unescaped. If unescaping failed, it returns an empty string.
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)

-Alias: `simpleJSONExtractString`.
+**Returned value**

-Examples:
+It returns `1` if the field exists, `0` otherwise.

-``` sql
-visitParamExtractString('{"abc":"\\n\\u0000"}', 'abc') = '\n\0';
-visitParamExtractString('{"abc":"\\u263a"}', 'abc') = '☺';
-visitParamExtractString('{"abc":"\\u263"}', 'abc') = '';
-visitParamExtractString('{"abc":"hello}', 'abc') = '';
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
+
+SELECT simpleJSONHas(json, 'foo') FROM jsons;
+SELECT simpleJSONHas(json, 'bar') FROM jsons;
 ```

+```response
+1
+0
+```
+## simpleJSONExtractUInt
+
+Parses `UInt64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
+
+**Syntax**
+
+```sql
+simpleJSONExtractUInt(json, field_name)
+```
+
+**Parameters**
+
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)
+
+**Returned value**
+
+It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":"4e3"}');
+INSERT INTO jsons VALUES ('{"foo":3.4}');
+INSERT INTO jsons VALUES ('{"foo":5}');
+INSERT INTO jsons VALUES ('{"foo":"not1number"}');
+INSERT INTO jsons VALUES ('{"baz":2}');
+
+SELECT simpleJSONExtractUInt(json, 'foo') FROM jsons ORDER BY json;
+```
+
+```response
+0
+4
+0
+3
+5
+```
+
+## simpleJSONExtractInt
+
+Parses `Int64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
+
+**Syntax**
+
+```sql
+simpleJSONExtractInt(json, field_name)
+```
+
+**Parameters**
+
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)
+
+**Returned value**
+
+It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
+INSERT INTO jsons VALUES ('{"foo":-3.4}');
+INSERT INTO jsons VALUES ('{"foo":5}');
+INSERT INTO jsons VALUES ('{"foo":"not1number"}');
+INSERT INTO jsons VALUES ('{"baz":2}');
+
+SELECT simpleJSONExtractInt(json, 'foo') FROM jsons ORDER BY json;
+```
+
+```response
+0
+-4
+0
+-3
+5
+```
+
+## simpleJSONExtractFloat
+
+Parses `Float64` from the value of the field named `field_name`. If this is a string field, it tries to parse a number from the beginning of the string. If the field does not exist, or it exists but does not contain a number, it returns `0`.
+
+**Syntax**
+
+```sql
+simpleJSONExtractFloat(json, field_name)
+```
+
+**Parameters**
+
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)
+
+**Returned value**
+
+It returns the number parsed from the field if the field exists and contains a number, `0` otherwise.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
+INSERT INTO jsons VALUES ('{"foo":-3.4}');
+INSERT INTO jsons VALUES ('{"foo":5}');
+INSERT INTO jsons VALUES ('{"foo":"not1number"}');
+INSERT INTO jsons VALUES ('{"baz":2}');
+
+SELECT simpleJSONExtractFloat(json, 'foo') FROM jsons ORDER BY json;
+```
+
+```response
+0
+-4000
+0
+-3.4
+5
+```
+
+## simpleJSONExtractBool
+
+Parses a true/false value from the value of the field named `field_name`. The result is `UInt8`.
+
+**Syntax**
+
+```sql
+simpleJSONExtractBool(json, field_name)
+```
+
+**Parameters**
+
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)
+
+**Returned value**
+
+It returns `1` if the value of the field is `true`, `0` otherwise. This means this function will return `0` including (and not only) in the following cases:
+ - If the field doesn't exists.
+ - If the field contains `true` as a string, e.g.: `{"field":"true"}`.
+ - If the field contains `1` as a numerical value.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":false,"bar":true}');
+INSERT INTO jsons VALUES ('{"foo":"true","qux":1}');
+
+SELECT simpleJSONExtractBool(json, 'bar') FROM jsons ORDER BY json;
+SELECT simpleJSONExtractBool(json, 'foo') FROM jsons ORDER BY json;
+```
+
+```response
+0
+1
+0
+0
+```
+
+## simpleJSONExtractRaw
+
+Returns the value of the field named `field_name` as a `String`, including separators.
+
+**Syntax**
+
+```sql
+simpleJSONExtractRaw(json, field_name)
+```
+
+**Parameters**
+
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)
+
+**Returned value**
+
+It returns the value of the field as a [`String`](../../sql-reference/data-types/string.md#string), including separators if the field exists, or an empty `String` otherwise.
+
+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":"-4e3"}');
+INSERT INTO jsons VALUES ('{"foo":-3.4}');
+INSERT INTO jsons VALUES ('{"foo":5}');
+INSERT INTO jsons VALUES ('{"foo":{"def":[1,2,3]}}');
+INSERT INTO jsons VALUES ('{"baz":2}');
+
+SELECT simpleJSONExtractRaw(json, 'foo') FROM jsons ORDER BY json;
+```
+
+```response
+
+"-4e3"
+-3.4
+5
+{"def":[1,2,3]}
+```
+
+## simpleJSONExtractString
+
+Parses `String` in double quotes from the value of the field named `field_name`.
+
+**Syntax**
+
+```sql
+simpleJSONExtractString(json, field_name)
+```
+
+**Parameters**
+
+- `json`: The JSON in which the field is searched for. [String](../../sql-reference/data-types/string.md#string)
+- `field_name`: The name of the field to search for. [String literal](../syntax#string)
+
+**Returned value**
+
+It returns the value of a field as a [`String`](../../sql-reference/data-types/string.md#string), including separators. The value is unescaped. It returns an empty `String`: if the field doesn't contain a double quoted string, if unescaping fails or if the field doesn't exist.
+
+**Implementation details**
+
 There is currently no support for code points in the format `\uXXXX\uYYYY` that are not from the basic multilingual plane (they are converted to CESU-8 instead of UTF-8).

+**Example**
+
+Query:
+
+```sql
+CREATE TABLE jsons
+(
+    `json` String
+)
+ENGINE = Memory;
+
+INSERT INTO jsons VALUES ('{"foo":"\\n\\u0000"}');
+INSERT INTO jsons VALUES ('{"foo":"\\u263"}');
+INSERT INTO jsons VALUES ('{"foo":"\\u263a"}');
+INSERT INTO jsons VALUES ('{"foo":"hello}');
+
+SELECT simpleJSONExtractString(json, 'foo') FROM jsons ORDER BY json;
+```
+
+```response
+\n\0
+
+☺
+
+```
+
+## visitParamHas
+
+This function is [an alias of `simpleJSONHas`](./json-functions#simplejsonhas).
+
+## visitParamExtractUInt
+
+This function is [an alias of `simpleJSONExtractUInt`](./json-functions#simplejsonextractuint).
+
+## visitParamExtractInt
+
+This function is [an alias of `simpleJSONExtractInt`](./json-functions#simplejsonextractint).
+
+## visitParamExtractFloat
+
+This function is [an alias of `simpleJSONExtractFloat`](./json-functions#simplejsonextractfloat).
+
+## visitParamExtractBool
+
+This function is [an alias of `simpleJSONExtractBool`](./json-functions#simplejsonextractbool).
+
+## visitParamExtractRaw
+
+This function is [an alias of `simpleJSONExtractRaw`](./json-functions#simplejsonextractraw).
+
+## visitParamExtractString
+
+This function is [an alias of `simpleJSONExtractString`](./json-functions#simplejsonextractstring).
+
 # JSONExtract functions

 The following functions are based on [simdjson](https://github.com/lemire/simdjson) designed for more complex JSON parsing requirements.
--- a/docs/en/sql-reference/functions/math-functions.md
+++ b/docs/en/sql-reference/functions/math-functions.md
@ -299,6 +299,18 @@ sin(x)

 Type: [Float*](../../sql-reference/data-types/float.md).

+**Example**
+
+Query:
+
+```sql
+SELECT sin(1.23);
+```
+
+```response
+0.9424888019316975
+```
+
 ## cos

 Returns the cosine of the argument.
--- a/docs/en/sql-reference/functions/other-functions.md
+++ b/docs/en/sql-reference/functions/other-functions.md
--- a/docs/en/sql-reference/functions/string-functions.md
+++ b/docs/en/sql-reference/functions/string-functions.md
@ -588,8 +588,41 @@ Result:

 ## substringUTF8

-Like `substring` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+Returns the substring of a string `s` which starts at the specified byte index `offset` for Unicode code points. Byte counting starts from `1`. If `offset` is `0`, an empty string is returned. If `offset` is negative, the substring starts `pos` characters from the end of the string, rather than from the beginning. An optional argument `length` specifies the maximum number of bytes the returned substring may have.

+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+**Syntax**
+
+```sql
+substringUTF8(s, offset[, length])
+```
+
+**Arguments**
+
+- `s`: The string to calculate a substring from. [String](../../sql-reference/data-types/string.md), [FixedString](../../sql-reference/data-types/fixedstring.md) or [Enum](../../sql-reference/data-types/enum.md)
+- `offset`: The starting position of the substring in `s` . [(U)Int*](../../sql-reference/data-types/int-uint.md).
+- `length`: The maximum length of the substring. [(U)Int*](../../sql-reference/data-types/int-uint.md). Optional.
+
+**Returned value**
+
+A substring of `s` with `length` many bytes, starting at index `offset`.
+
+**Implementation details**
+
+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+**Example**
+
+```sql
+SELECT 'Täglich grüßt das Murmeltier.' AS str,
+       substringUTF8(str, 9),
+       substringUTF8(str, 9, 5)
+```
+
+```response
+Täglich grüßt das Murmeltier.	grüßt das Murmeltier.	grüßt
+```

 ## substringIndex

@ -624,7 +657,39 @@ Result:

 ## substringIndexUTF8

-Like `substringIndex` but for Unicode code points. Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+Returns the substring of `s` before `count` occurrences of the delimiter `delim`, specifically for Unicode code points.
+
+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+**Syntax**
+
+```sql
+substringIndexUTF8(s, delim, count)
+```
+
+**Arguments**
+
+- `s`: The string to extract substring from. [String](../../sql-reference/data-types/string.md).
+- `delim`: The character to split. [String](../../sql-reference/data-types/string.md).
+- `count`: The number of occurrences of the delimiter to count before extracting the substring. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. [UInt or Int](../data-types/int-uint.md)
+
+**Returned value**
+
+A substring [String](../../sql-reference/data-types/string.md) of `s` before `count` occurrences of `delim`.
+
+**Implementation details**
+
+Assumes that the string contains valid UTF-8 encoded text. If this assumption is violated, no exception is thrown and the result is undefined.
+
+**Example**
+
+```sql
+SELECT substringIndexUTF8('www.straßen-in-europa.de', '.', 2)
+```
+
+```response
+www.straßen-in-europa
+```

 ## appendTrailingCharIfAbsent

--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@ -30,7 +30,6 @@ position(haystack, needle[, start_pos])

 Alias:
 - `position(needle IN haystack)`
- `locate(haystack, needle[, start_pos])`.

 **Arguments**

@ -49,7 +48,7 @@ If substring `needle` is empty, these rules apply:
 - if `start_pos >= 1` and `start_pos <= length(haystack) + 1`: return `start_pos`
 - otherwise: return `0`

-The same rules also apply to functions `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`
+The same rules also apply to functions `locate`, `positionCaseInsensitive`, `positionUTF8` and `positionCaseInsensitiveUTF8`.

 Type: `Integer`.

@ -114,6 +113,21 @@ SELECT
 └─────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┴────────────────────────┘
 ```

+## locate
+
+Like [position](#position) but with arguments `haystack` and `locate` switched.
+
+The behavior of this function depends on the ClickHouse version:
+- in versions < v24.3, `locate` was an alias of function `position` and accepted arguments `(haystack, needle[, start_pos])`.
+- in versions >= 24.3,, `locate` is an individual function (for better compatibility with MySQL) and accepts arguments `(needle, haystack[, start_pos])`. The previous behavior
+  can be restored using setting [function_locate_has_mysql_compatible_argument_order = false](../../operations/settings/settings.md#function-locate-has-mysql-compatible-argument-order);
+
+**Syntax**
+
+``` sql
+locate(needle, haystack[, start_pos])
+```
+
 ## positionCaseInsensitive

 Like [position](#position) but searches case-insensitively.
--- a/docs/en/sql-reference/operators/index.md
+++ b/docs/en/sql-reference/operators/index.md
@ -353,7 +353,7 @@ For efficiency, the `and` and `or` functions accept any number of arguments. The

 ClickHouse supports the `IS NULL` and `IS NOT NULL` operators.

-### IS NULL
+### IS NULL {#is_null}

 - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NULL` operator returns:
    - `1`, if the value is `NULL`.
@ -374,7 +374,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL
 └──────────────┘
 ```

-### IS NOT NULL
+### IS NOT NULL {#is_not_null}

 - For [Nullable](../../sql-reference/data-types/nullable.md) type values, the `IS NOT NULL` operator returns:
    - `0`, if the value is `NULL`.
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -335,7 +335,7 @@ The `ALTER` query lets you create and delete separate elements (columns) in nest

 There is no support for deleting columns in the primary key or the sampling key (columns that are used in the `ENGINE` expression). Changing the type for columns that are included in the primary key is only possible if this change does not cause the data to be modified (for example, you are allowed to add values to an Enum or to change a type from `DateTime` to `UInt32`).

-If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table. You can use the [clickhouse-copier](/docs/en/operations/utilities/clickhouse-copier.md) as an alternative to the `INSERT SELECT` query.
+If the `ALTER` query is not sufficient to make the table changes you need, you can create a new table, copy the data to it using the [INSERT SELECT](/docs/en/sql-reference/statements/insert-into.md/#inserting-the-results-of-select) query, then switch the tables using the [RENAME](/docs/en/sql-reference/statements/rename.md/#rename-table) query and delete the old table.

 The `ALTER` query blocks all reads and writes for the table. In other words, if a long `SELECT` is running at the time of the `ALTER` query, the `ALTER` query will wait for it to complete. At the same time, all new queries to the same table will wait while this `ALTER` is running.

--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -350,6 +350,7 @@ ALTER TABLE mt DELETE IN PARTITION ID '2' WHERE p = 2;
 You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:

 - As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
+- Using the keyword `ALL`. It can be used only with DROP/DETACH/ATTACH. For example, `ALTER TABLE visits ATTACH PARTITION ALL`.
 - As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
 - Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
 - In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](/docs/en/operations/system-tables/detached_parts.md/#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.
--- a/docs/en/sql-reference/table-functions/cluster.md
+++ b/docs/en/sql-reference/table-functions/cluster.md
@ -5,7 +5,7 @@ sidebar_label: cluster
 title: "cluster, clusterAllReplicas"
 ---

-Allows to access all shards in an existing cluster which configured in `remote_servers` section without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. One replica of each shard is queried.
+Allows to access all shards (configured in the `remote_servers` section) of a cluster without creating a [Distributed](../../engines/table-engines/special/distributed.md) table. Only one replica of each shard is queried.

 `clusterAllReplicas` function — same as `cluster`, but all replicas are queried. Each replica in a cluster is used as a separate shard/connection.

--- a/docs/en/sql-reference/table-functions/jdbc.md
+++ b/docs/en/sql-reference/table-functions/jdbc.md
@ -6,6 +6,11 @@ sidebar_label: jdbc

 # jdbc

+:::note
+clickhouse-jdbc-bridge contains experimental codes and is no longer supported. It may contain reliability issues and security vulnerabilities. Use it at your own risk. 
+ClickHouse recommend using built-in table functions in ClickHouse which provide a better alternative for ad-hoc querying scenarios (Postgres, MySQL, MongoDB, etc).
+:::
+
 `jdbc(datasource, schema, table)` - returns table that is connected via JDBC driver.

 This table function requires separate [clickhouse-jdbc-bridge](https://github.com/ClickHouse/clickhouse-jdbc-bridge) program to be running.
--- a/docs/ru/getting-started/tutorial.md
+++ b/docs/ru/getting-started/tutorial.md
@ -585,10 +585,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
 INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
 ```

-:::danger Внимание!
-Этот подход не годится для сегментирования больших таблиц. Есть инструмент [clickhouse-copier](../operations/utilities/clickhouse-copier.md), специально предназначенный для перераспределения любых больших таблиц.
-:::
-
 Как и следовало ожидать, вычислительно сложные запросы работают втрое быстрее, если они выполняются на трёх серверах, а не на одном.

 В данном случае мы использовали кластер из трёх сегментов с одной репликой для каждого.
--- a/docs/ru/operations/backup.md
+++ b/docs/ru/operations/backup.md
@ -24,10 +24,6 @@ sidebar_label: "Резервное копирование данных"

 Некоторые локальные файловые системы позволяют делать снимки (например, [ZFS](https://en.wikipedia.org/wiki/ZFS)), но они могут быть не лучшим выбором для обслуживания живых запросов. Возможным решением является создание дополнительных реплик с такой файловой системой и исключение их из [Distributed](../engines/table-engines/special/distributed.md) таблиц, используемых для запросов `SELECT`. Снимки на таких репликах будут недоступны для запросов, изменяющих данные. В качестве бонуса, эти реплики могут иметь особые конфигурации оборудования с большим количеством дисков, подключенных к серверу, что будет экономически эффективным.

-## clickhouse-copier {#clickhouse-copier}
-
-[clickhouse-copier](utilities/clickhouse-copier.md) — это универсальный инструмент, который изначально был создан для перешардирования таблиц с петабайтами данных. Его также можно использовать для резервного копирования и восстановления, поскольку он надёжно копирует данные между таблицами и кластерами ClickHouse.
-
 Для небольших объёмов данных можно применять `INSERT INTO ... SELECT ...` в удалённые таблицы.

 ## Манипуляции с партициями {#manipuliatsii-s-partitsiiami}
--- a/docs/ru/operations/utilities/clickhouse-copier.md
+++ b/docs/ru/operations/utilities/clickhouse-copier.md
@ -1,183 +0,0 @@
---
-slug: /ru/operations/utilities/clickhouse-copier
-sidebar_position: 59
-sidebar_label: clickhouse-copier
---
-
-# clickhouse-copier {#clickhouse-copier}
-
-Копирует данные из таблиц одного кластера в таблицы другого (или этого же) кластера.
-
-Можно запустить несколько `clickhouse-copier` для разных серверах для выполнения одного и того же задания. Для синхронизации между процессами используется ZooKeeper.
-
-После запуска, `clickhouse-copier`:
-
-   Соединяется с ZooKeeper и получает:
-
-    -   Задания на копирование.
-    -   Состояние заданий на копирование.
-
-   Выполняет задания.
-
-        Каждый запущенный процесс выбирает "ближайший" шард исходного кластера и копирует данные в кластер назначения, при необходимости перешардируя их.
-
-`clickhouse-copier` отслеживает изменения в ZooKeeper и применяет их «на лету».
-
-Для снижения сетевого трафика рекомендуем запускать `clickhouse-copier` на том же сервере, где находятся исходные данные.
-
-## Запуск Clickhouse-copier {#zapusk-clickhouse-copier}
-
-Утилиту следует запускать вручную следующим образом:
-
-``` bash
-$ clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
-```
-
-Параметры запуска:
-
-   `daemon` - запускает `clickhouse-copier` в режиме демона.
-   `config` - путь к файлу `zookeeper.xml` с параметрами соединения с ZooKeeper.
-   `task-path` - путь к ноде ZooKeeper. Нода используется для синхронизации между процессами `clickhouse-copier` и для хранения заданий. Задания хранятся в `$task-path/description`.
-   `task-file` - необязательный путь к файлу с описанием конфигурация заданий для загрузки в ZooKeeper.
-   `task-upload-force` - Загрузить `task-file` в ZooKeeper даже если уже было загружено.
-   `base-dir` - путь к логам и вспомогательным файлам. При запуске `clickhouse-copier` создает в `$base-dir` подкаталоги `clickhouse-copier_YYYYMMHHSS_<PID>`. Если параметр не указан, то каталоги будут создаваться в каталоге, где `clickhouse-copier` был запущен.
-
-## Формат Zookeeper.xml {#format-zookeeper-xml}
-
-``` xml
-<clickhouse>
-    <logger>
-        <level>trace</level>
-        <size>100M</size>
-        <count>3</count>
-    </logger>
-
-    <zookeeper>
-        <node index="1">
-            <host>127.0.0.1</host>
-            <port>2181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
-```
-
-## Конфигурация заданий на копирование {#konfiguratsiia-zadanii-na-kopirovanie}
-
-``` xml
-<clickhouse>
-    <!-- Configuration of clusters as in an ordinary server config -->
-    <remote_servers>
-        <source_cluster>
-		    <!--
-                source cluster & destination clusters accept exactly the same
-                parameters as parameters for the usual Distributed table
-                see https://clickhouse.com/docs/ru/engines/table-engines/special/distributed/
-            -->
-            <shard>
-                <internal_replication>false</internal_replication>
-                    <replica>
-                        <host>127.0.0.1</host>
-                        <port>9000</port>
-						<!--
-                        <user>default</user>
-                        <password>default</password>
-                        <secure>1</secure>
-                        -->
-                    </replica>
-            </shard>
-            ...
-        </source_cluster>
-
-        <destination_cluster>
-        ...
-        </destination_cluster>
-    </remote_servers>
-
-    <!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
-    <max_workers>2</max_workers>
-
-    <!-- Setting used to fetch (pull) data from source cluster tables -->
-    <settings_pull>
-        <readonly>1</readonly>
-    </settings_pull>
-
-    <!-- Setting used to insert (push) data to destination cluster tables -->
-    <settings_push>
-        <readonly>0</readonly>
-    </settings_push>
-
-    <!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
-         They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
-    <settings>
-        <connect_timeout>3</connect_timeout>
-        <!-- Sync insert is set forcibly, leave it here just in case. -->
-        <distributed_foreground_insert>1</distributed_foreground_insert>
-    </settings>
-
-    <!-- Copying tasks description.
-         You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
-         sequentially.
-    -->
-    <tables>
-        <!-- A table task, copies one table. -->
-        <table_hits>
-            <!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
-            <cluster_pull>source_cluster</cluster_pull>
-            <database_pull>test</database_pull>
-            <table_pull>hits</table_pull>
-
-            <!-- Destination cluster name and tables in which the data should be inserted -->
-            <cluster_push>destination_cluster</cluster_push>
-            <database_push>test</database_push>
-            <table_push>hits2</table_push>
-
-            <!-- Engine of destination tables.
-                 If destination tables have not be created, workers create them using columns definition from source tables and engine
-                 definition from here.
-
-                 NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
-                 be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
-                 specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
-                 system.parts table.
-            -->
-            <engine>
-            ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
-            PARTITION BY toMonday(date)
-            ORDER BY (CounterID, EventDate)
-            </engine>
-
-            <!-- Sharding key used to insert data to destination cluster -->
-            <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
-
-            <!-- Optional expression that filter data while pull them from source servers -->
-            <where_condition>CounterID != 0</where_condition>
-
-            <!-- This section specifies partitions that should be copied, other partition will be ignored.
-                 Partition names should have the same format as
-                 partition column of system.parts table (i.e. a quoted text).
-                 Since partition key of source and destination cluster could be different,
-                 these partition names specify destination partitions.
-
-                 NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
-                 it is strictly recommended to specify them explicitly.
-                 If you already have some ready partitions on destination cluster they
-                 will be removed at the start of the copying since they will be interpeted
-                 as unfinished data from the previous copying!!!
-            -->
-            <enabled_partitions>
-                <partition>'2018-02-26'</partition>
-                <partition>'2018-03-05'</partition>
-                ...
-            </enabled_partitions>
-        </table_hits>
-
-        <!-- Next table to copy. It is not copied until previous table is copying. -->
-        <table_visits>
-        ...
-        </table_visits>
-        ...
-    </tables>
-</clickhouse>
-```
-
-`clickhouse-copier` отслеживает изменения `/task/path/description` и применяет их «на лету». Если вы поменяете, например, значение `max_workers`, то количество процессов, выполняющих задания, также изменится.
--- a/docs/ru/operations/utilities/index.md
+++ b/docs/ru/operations/utilities/index.md
@ -7,7 +7,6 @@ sidebar_position: 56
 # Утилиты ClickHouse {#utility-clickhouse}

 -   [clickhouse-local](clickhouse-local.md) - позволяет выполнять SQL-запросы над данными без остановки сервера ClickHouse, подобно утилите `awk`.
-   [clickhouse-copier](clickhouse-copier.md) - копирует (и перешардирует) данные с одного кластера на другой.
 -   [clickhouse-benchmark](../../operations/utilities/clickhouse-benchmark.md) — устанавливает соединение с сервером ClickHouse и запускает циклическое выполнение указанных запросов.
 -   [clickhouse-format](../../operations/utilities/clickhouse-format.md) — позволяет форматировать входящие запросы.
 -   [ClickHouse obfuscator](../../operations/utilities/clickhouse-obfuscator.md) — обфусцирует данные.
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -94,7 +94,7 @@ RENAME COLUMN [IF EXISTS] name to new_name

 Переименовывает столбец `name` в `new_name`. Если указано выражение `IF EXISTS`, то запрос не будет возвращать ошибку при условии, что столбец `name` не существует. Поскольку переименование не затрагивает физические данные колонки, запрос выполняется практически мгновенно.

-**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`. 
+**ЗАМЕЧЕНИЕ**: Столбцы, являющиеся частью основного ключа или ключа сортировки (заданные с помощью `ORDER BY` или `PRIMARY KEY`), не могут быть переименованы. Попытка переименовать эти слобцы приведет к `SQL Error [524]`.

 Пример:

@ -254,7 +254,7 @@ SELECT groupArray(x), groupArray(s) FROM tmp;

 Отсутствует возможность удалять столбцы, входящие в первичный ключ или ключ для сэмплирования (в общем, входящие в выражение `ENGINE`). Изменение типа у столбцов, входящих в первичный ключ возможно только в том случае, если это изменение не приводит к изменению данных (например, разрешено добавление значения в Enum или изменение типа с `DateTime` на `UInt32`).

-Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу. В качестве альтернативы для запроса `INSERT SELECT`, можно использовать инструмент [clickhouse-copier](../../../sql-reference/statements/alter/index.md).
+Если возможностей запроса `ALTER` не хватает для нужного изменения таблицы, вы можете создать новую таблицу, скопировать туда данные с помощью запроса [INSERT SELECT](../insert-into.md#inserting-the-results-of-select), затем поменять таблицы местами с помощью запроса [RENAME](../rename.md#rename-table), и удалить старую таблицу.

 Запрос `ALTER` блокирует все чтения и записи для таблицы. То есть если на момент запроса `ALTER` выполнялся долгий `SELECT`, то запрос `ALTER` сначала дождётся его выполнения. И в это время все новые запросы к той же таблице будут ждать, пока завершится этот `ALTER`.

--- a/docs/zh/getting-started/tutorial.md
+++ b/docs/zh/getting-started/tutorial.md
@ -582,8 +582,6 @@ ENGINE = Distributed(perftest_3shards_1replicas, tutorial, hits_local, rand());
 INSERT INTO tutorial.hits_all SELECT * FROM tutorial.hits_v1;
 ```

-!!! warning "注意:"
-    这种方法不适合大型表的分片。 有一个单独的工具 [clickhouse-copier](../operations/utilities/clickhouse-copier.md) 这可以重新分片任意大表。

 正如您所期望的那样，如果计算量大的查询使用3台服务器而不是一个，则运行速度快N倍。

--- a/docs/zh/operations/backup.md
+++ b/docs/zh/operations/backup.md
@ -24,12 +24,6 @@ sidebar_label: "\u6570\u636E\u5907\u4EFD"

 某些本地文件系统提供快照功能（例如, [ZFS](https://en.wikipedia.org/wiki/ZFS)），但它们可能不是提供实时查询的最佳选择。 一个可能的解决方案是使用这种文件系统创建额外的副本，并将它们与用于`SELECT` 查询的 [分布式](../engines/table-engines/special/distributed.md) 表分离。 任何修改数据的查询都无法访问此类副本上的快照。 作为回报，这些副本可能具有特殊的硬件配置，每个服务器附加更多的磁盘，这将是经济高效的。

-## clickhouse-copier {#clickhouse-copier}
-
-[clickhouse-copier](utilities/clickhouse-copier.md) 是一个多功能工具，最初创建它是为了用于重新切分pb大小的表。 因为它能够在ClickHouse表和集群之间可靠地复制数据，所以它也可用于备份和还原数据。
-
-对于较小的数据量，一个简单的 `INSERT INTO ... SELECT ...` 到远程表也可以工作。
-
 ## part操作 {#manipulations-with-parts}

 ClickHouse允许使用 `ALTER TABLE ... FREEZE PARTITION ...` 查询以创建表分区的本地副本。 这是利用硬链接(hardlink)到 `/var/lib/clickhouse/shadow/` 文件夹中实现的，所以它通常不会因为旧数据而占用额外的磁盘空间。 创建的文件副本不由ClickHouse服务器处理，所以你可以把它们留在那里：你将有一个简单的备份，不需要任何额外的外部系统，但它仍然容易出现硬件问题。 出于这个原因，最好将它们远程复制到另一个位置，然后删除本地副本。 分布式文件系统和对象存储仍然是一个不错的选择，但是具有足够大容量的正常附加文件服务器也可以工作（在这种情况下，传输将通过网络文件系统或者也许是 [rsync](https://en.wikipedia.org/wiki/Rsync) 来进行).
--- a/docs/zh/operations/utilities/clickhouse-copier.md
+++ b/docs/zh/operations/utilities/clickhouse-copier.md
@ -1,172 +0,0 @@
---
-slug: /zh/operations/utilities/clickhouse-copier
---
-# clickhouse-copier {#clickhouse-copier}
-
-将数据从一个群集中的表复制到另一个（或相同）群集中的表。
-
-您可以运行多个 `clickhouse-copier` 不同服务器上的实例执行相同的作业。 ZooKeeper用于同步进程。
-
-开始后, `clickhouse-copier`:
-
-   连接到ZooKeeper并且接收:
-
-    -   复制作业。
-    -   复制作业的状态。
-
-   它执行的工作。
-
-        每个正在运行的进程都会选择源集群的“最接近”分片，然后将数据复制到目标集群，并在必要时重新分片数据。
-
-`clickhouse-copier` 跟踪ZooKeeper中的更改，并实时应用它们。
-
-为了减少网络流量，我们建议运行 `clickhouse-copier` 在源数据所在的同一服务器上。
-
-## 运行Clickhouse-copier {#running-clickhouse-copier}
-
-该实用程序应手动运行:
-
-``` bash
-clickhouse-copier --daemon --config zookeeper.xml --task-path /task/path --base-dir /path/to/dir
-```
-
-参数:
-
-   `daemon` — 在守护进程模式下启动`clickhouse-copier`。
-   `config` — `zookeeper.xml`文件的路径，其中包含用于连接ZooKeeper的参数。
-   `task-path` — ZooKeeper节点的路径。 该节点用于同步`clickhouse-copier`进程和存储任务。 任务存储在`$task-path/description`中。
-   `task-file` — 可选的非必须参数, 指定一个包含任务配置的参数文件, 用于初始上传到ZooKeeper。
-   `task-upload-force` — 即使节点已经存在，也强制上载`task-file`。
-   `base-dir` — 日志和辅助文件的路径。 启动时，`clickhouse-copier`在`$base-dir`中创建`clickhouse-copier_YYYYMMHHSS_<PID>`子目录。 如果省略此参数，则会在启动`clickhouse-copier`的目录中创建目录。
-
-
-
-## Zookeeper.xml格式 {#format-of-zookeeper-xml}
-
-``` xml
-<clickhouse>
-    <logger>
-        <level>trace</level>
-        <size>100M</size>
-        <count>3</count>
-    </logger>
-
-    <zookeeper>
-        <node index="1">
-            <host>127.0.0.1</host>
-            <port>2181</port>
-        </node>
-    </zookeeper>
-</clickhouse>
-```
-
-## 复制任务的配置 {#configuration-of-copying-tasks}
-
-``` xml
-<clickhouse>
-    <!-- Configuration of clusters as in an ordinary server config -->
-    <remote_servers>
-        <source_cluster>
-            <shard>
-                <internal_replication>false</internal_replication>
-                    <replica>
-                        <host>127.0.0.1</host>
-                        <port>9000</port>
-                    </replica>
-            </shard>
-            ...
-        </source_cluster>
-
-        <destination_cluster>
-        ...
-        </destination_cluster>
-    </remote_servers>
-
-    <!-- How many simultaneously active workers are possible. If you run more workers superfluous workers will sleep. -->
-    <max_workers>2</max_workers>
-
-    <!-- Setting used to fetch (pull) data from source cluster tables -->
-    <settings_pull>
-        <readonly>1</readonly>
-    </settings_pull>
-
-    <!-- Setting used to insert (push) data to destination cluster tables -->
-    <settings_push>
-        <readonly>0</readonly>
-    </settings_push>
-
-    <!-- Common setting for fetch (pull) and insert (push) operations. Also, copier process context uses it.
-         They are overlaid by <settings_pull/> and <settings_push/> respectively. -->
-    <settings>
-        <connect_timeout>3</connect_timeout>
-        <!-- Sync insert is set forcibly, leave it here just in case. -->
-        <distributed_foreground_insert>1</distributed_foreground_insert>
-    </settings>
-
-    <!-- Copying tasks description.
-         You could specify several table task in the same task description (in the same ZooKeeper node), they will be performed
-         sequentially.
-    -->
-    <tables>
-        <!-- A table task, copies one table. -->
-        <table_hits>
-            <!-- Source cluster name (from <remote_servers/> section) and tables in it that should be copied -->
-            <cluster_pull>source_cluster</cluster_pull>
-            <database_pull>test</database_pull>
-            <table_pull>hits</table_pull>
-
-            <!-- Destination cluster name and tables in which the data should be inserted -->
-            <cluster_push>destination_cluster</cluster_push>
-            <database_push>test</database_push>
-            <table_push>hits2</table_push>
-
-            <!-- Engine of destination tables.
-                 If destination tables have not be created, workers create them using columns definition from source tables and engine
-                 definition from here.
-
-                 NOTE: If the first worker starts insert data and detects that destination partition is not empty then the partition will
-                 be dropped and refilled, take it into account if you already have some data in destination tables. You could directly
-                 specify partitions that should be copied in <enabled_partitions/>, they should be in quoted format like partition column of
-                 system.parts table.
-            -->
-            <engine>
-            ENGINE=ReplicatedMergeTree('/clickhouse/tables/{cluster}/{shard}/hits2', '{replica}')
-            PARTITION BY toMonday(date)
-            ORDER BY (CounterID, EventDate)
-            </engine>
-
-            <!-- Sharding key used to insert data to destination cluster -->
-            <sharding_key>jumpConsistentHash(intHash64(UserID), 2)</sharding_key>
-
-            <!-- Optional expression that filter data while pull them from source servers -->
-            <where_condition>CounterID != 0</where_condition>
-
-            <!-- This section specifies partitions that should be copied, other partition will be ignored.
-                 Partition names should have the same format as
-                 partition column of system.parts table (i.e. a quoted text).
-                 Since partition key of source and destination cluster could be different,
-                 these partition names specify destination partitions.
-
-                 NOTE: In spite of this section is optional (if it is not specified, all partitions will be copied),
-                 it is strictly recommended to specify them explicitly.
-                 If you already have some ready partitions on destination cluster they
-                 will be removed at the start of the copying since they will be interpeted
-                 as unfinished data from the previous copying!!!
-            -->
-            <enabled_partitions>
-                <partition>'2018-02-26'</partition>
-                <partition>'2018-03-05'</partition>
-                ...
-            </enabled_partitions>
-        </table_hits>
-
-        <!-- Next table to copy. It is not copied until previous table is copying. -->
-        <table_visits>
-        ...
-        </table_visits>
-        ...
-    </tables>
-</clickhouse>
-```
-
-`clickhouse-copier` 跟踪更改 `/task/path/description` 并在飞行中应用它们。 例如，如果你改变的值 `max_workers`，运行任务的进程数也会发生变化。
--- a/docs/zh/operations/utilities/index.md
+++ b/docs/zh/operations/utilities/index.md
@ -4,5 +4,4 @@ slug: /zh/operations/utilities/
 # 实用工具 {#clickhouse-utility}

 -   [本地查询](clickhouse-local.md) —  在不停止ClickHouse服务的情况下，对数据执行查询操作(类似于 `awk` 命令)。
-   [跨集群复制](clickhouse-copier.md) —  在不同集群间复制数据。
 -   [性能测试](clickhouse-benchmark.md) — 连接到Clickhouse服务器，执行性能测试。
--- a/docs/zh/sql-reference/statements/alter.md
+++ b/docs/zh/sql-reference/statements/alter.md
@ -150,7 +150,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)

 不支持对primary key或者sampling key中的列（在 `ENGINE` 表达式中用到的列）进行删除操作。改变包含在primary key中的列的类型时，如果操作不会导致数据的变化（例如，往Enum中添加一个值，或者将`DateTime` 类型改成 `UInt32`），那么这种操作是可行的。

-如果 `ALTER` 操作不足以完成你想要的表变动操作，你可以创建一张新的表，通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去，然后通过  [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称，并删除原有的表。你可以使用 [clickhouse-copier](../../operations/utilities/clickhouse-copier.md) 代替 `INSERT SELECT`。
+如果 `ALTER` 操作不足以完成你想要的表变动操作，你可以创建一张新的表，通过 [INSERT SELECT](../../sql-reference/statements/insert-into.md#inserting-the-results-of-select)将数据拷贝进去，然后通过  [RENAME](../../sql-reference/statements/misc.md#misc_operations-rename)将新的表改成和原有表一样的名称，并删除原有的表。

 `ALTER` 操作会阻塞对表的所有读写操作。换句话说，当一个大的 `SELECT` 语句和 `ALTER`同时执行时，`ALTER`会等待，直到 `SELECT` 执行结束。与此同时，当 `ALTER` 运行时，新的 sql 语句将会等待。

--- a/packages/clickhouse-server.yaml
+++ b/packages/clickhouse-server.yaml
@ -50,8 +50,6 @@ contents:
  dst: /etc/init.d/clickhouse-server
 - src: clickhouse-server.service
  dst: /lib/systemd/system/clickhouse-server.service
- src: root/usr/bin/clickhouse-copier
-  dst: /usr/bin/clickhouse-copier
 - src: root/usr/bin/clickhouse-server
  dst: /usr/bin/clickhouse-server
 # clickhouse-keeper part
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -122,7 +122,6 @@ add_subdirectory (local)
 add_subdirectory (benchmark)
 add_subdirectory (extract-from-config)
 add_subdirectory (compressor)
-add_subdirectory (copier)
 add_subdirectory (format)
 add_subdirectory (obfuscator)
 add_subdirectory (install)
@ -200,7 +199,6 @@ clickhouse_program_install(clickhouse-server server)
 clickhouse_program_install(clickhouse-client client chc)
 clickhouse_program_install(clickhouse-local local chl ch)
 clickhouse_program_install(clickhouse-benchmark benchmark)
-clickhouse_program_install(clickhouse-copier copier)
 clickhouse_program_install(clickhouse-extract-from-config extract-from-config)
 clickhouse_program_install(clickhouse-compressor compressor)
 clickhouse_program_install(clickhouse-format format)
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -34,6 +34,7 @@
 #include <Common/StudentTTest.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ErrorCodes.h>
+#include <Core/BaseSettingsProgramOptions.h>


 /** A tool for evaluating ClickHouse performance.
@ -623,7 +624,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
        ;

        Settings settings;
-        settings.addProgramOptions(desc);
+        addProgramOptions(settings, desc);

        boost::program_options::variables_map options;
        boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -50,6 +50,7 @@
 #include <Functions/registerFunctions.h>
 #include <AggregateFunctions/registerAggregateFunctions.h>
 #include <Formats/registerFormats.h>
+#include <Formats/FormatFactory.h>

 namespace fs = std::filesystem;
 using namespace std::literals;
@ -932,7 +933,7 @@ void Client::addOptions(OptionsDescription & options_description)
        ("config,c", po::value<std::string>(), "config-file path (another shorthand)")
        ("connection", po::value<std::string>(), "connection to use (from the client config), by default connection name is hostname")
        ("secure,s", "Use TLS connection")
-        ("no-secure,s", "Don't use TLS connection")
+        ("no-secure", "Don't use TLS connection")
        ("user,u", po::value<std::string>()->default_value("default"), "user")
        ("password", po::value<std::string>(), "password")
        ("ask-password", "ask-password")
@ -1137,6 +1138,13 @@ void Client::processOptions(const OptionsDescription & options_description,
 }


+static bool checkIfStdoutIsRegularFile()
+{
+    struct stat file_stat;
+    return fstat(STDOUT_FILENO, &file_stat) == 0 && S_ISREG(file_stat.st_mode);
+}
+
+
 void Client::processConfig()
 {
    if (!queries.empty() && config().has("queries-file"))
@ -1173,7 +1181,14 @@ void Client::processConfig()
    pager = config().getString("pager", "");

    is_default_format = !config().has("vertical") && !config().has("format");
-    if (config().has("vertical"))
+    if (is_default_format && checkIfStdoutIsRegularFile())
+    {
+        is_default_format = false;
+        std::optional<String> format_from_file_name;
+        format_from_file_name = FormatFactory::instance().tryGetFormatFromFileDescriptor(STDOUT_FILENO);
+        format = format_from_file_name ? *format_from_file_name : "TabSeparated";
+    }
+    else if (config().has("vertical"))
        format = config().getString("format", "Vertical");
    else
        format = config().getString("format", is_interactive ? "PrettyCompact" : "TabSeparated");
@ -1377,8 +1392,8 @@ void Client::readArguments(
 }


-#pragma GCC diagnostic ignored "-Wunused-function"
-#pragma GCC diagnostic ignored "-Wmissing-declarations"
+#pragma clang diagnostic ignored "-Wunused-function"
+#pragma clang diagnostic ignored "-Wmissing-declarations"

 int mainEntryClickHouseClient(int argc, char ** argv)
 {
--- a/programs/compressor/Compressor.cpp
+++ b/programs/compressor/Compressor.cpp
@ -143,7 +143,7 @@ int mainEntryClickHouseCompressor(int argc, char ** argv)
            ParserCodec codec_parser;

            std::string codecs_line = boost::algorithm::join(codecs, ",");
-            auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+            auto ast = parseQuery(codec_parser, "(" + codecs_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
            codec = CompressionCodecFactory::instance().get(ast, nullptr);
        }
        else
--- a/programs/copier/Aliases.h
+++ b/programs/copier/Aliases.h
@ -1,15 +0,0 @@
-#pragma once
-
-#include <base/types.h>
-
-#include <Poco/Util/AbstractConfiguration.h>
-
-#include <utility>
-
-namespace DB
-{
-    using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;
-
-    using DatabaseAndTableName = std::pair<String, String>;
-    using ListOfDatabasesAndTableNames = std::vector<DatabaseAndTableName>;
-}
--- a/programs/copier/CMakeLists.txt
+++ b/programs/copier/CMakeLists.txt
@ -1,28 +0,0 @@
-set(CLICKHOUSE_COPIER_SOURCES
-        "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopierApp.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/ClusterCopier.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/Internals.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartition.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/ShardPartitionPiece.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/StatusAccumulator.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/TaskCluster.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/TaskShard.cpp"
-        "${CMAKE_CURRENT_SOURCE_DIR}/TaskTable.cpp")
-
-set (CLICKHOUSE_COPIER_LINK
-        PRIVATE
-                clickhouse_common_zookeeper
-                clickhouse_common_config
-                clickhouse_parsers
-                clickhouse_functions
-                clickhouse_table_functions
-                clickhouse_aggregate_functions
-                string_utils
-
-        PUBLIC
-                daemon
-)
-
-set(CLICKHOUSE_COPIER_INCLUDE SYSTEM PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
-
-clickhouse_program_add(copier)
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
--- a/programs/copier/ClusterCopier.h
+++ b/programs/copier/ClusterCopier.h
@ -1,240 +0,0 @@
-#pragma once
-
-#include "Aliases.h"
-#include "Internals.h"
-#include "TaskCluster.h"
-#include "TaskShard.h"
-#include "TaskTable.h"
-#include "ShardPartition.h"
-#include "ShardPartitionPiece.h"
-#include "ZooKeeperStaff.h"
-
-
-namespace DB
-{
-
-class ClusterCopier : WithMutableContext
-{
-public:
-    ClusterCopier(const String & task_path_,
-                  const String & host_id_,
-                  const String & proxy_database_name_,
-                  ContextMutablePtr context_,
-                  LoggerRawPtr log_)
-            : WithMutableContext(context_),
-            task_zookeeper_path(task_path_),
-            host_id(host_id_),
-            working_database_name(proxy_database_name_),
-            log(log_) {}
-
-    void init();
-
-    template <typename T>
-    decltype(auto) retry(T && func, UInt64 max_tries = 100);
-
-    void discoverShardPartitions(const ConnectionTimeouts & timeouts, const TaskShardPtr & task_shard);
-
-    /// Compute set of partitions, assume set of partitions aren't changed during the processing
-    void discoverTablePartitions(const ConnectionTimeouts & timeouts, TaskTable & task_table, UInt64 num_threads = 0);
-
-    void uploadTaskDescription(const std::string & task_path, const std::string & task_file, bool force);
-
-    void reloadTaskDescription();
-
-    void updateConfigIfNeeded();
-
-    void process(const ConnectionTimeouts & timeouts);
-
-    /// Disables DROP PARTITION commands that used to clear data after errors
-    void setSafeMode(bool is_safe_mode_ = true)
-    {
-        is_safe_mode = is_safe_mode_;
-    }
-
-    void setCopyFaultProbability(double copy_fault_probability_)
-    {
-        copy_fault_probability = copy_fault_probability_;
-    }
-
-    void setMoveFaultProbability(double move_fault_probability_)
-    {
-        move_fault_probability = move_fault_probability_;
-    }
-
-    void setExperimentalUseSampleOffset(bool value)
-    {
-        experimental_use_sample_offset = value;
-    }
-
-    void setMaxTableTries(UInt64 tries)
-    {
-        max_table_tries = tries;
-    }
-    void setMaxShardPartitionTries(UInt64 tries)
-    {
-        max_shard_partition_tries = tries;
-    }
-    void setMaxShardPartitionPieceTriesForAlter(UInt64 tries)
-    {
-        max_shard_partition_piece_tries_for_alter = tries;
-    }
-    void setRetryDelayMs(std::chrono::milliseconds ms)
-    {
-        retry_delay_ms = ms;
-    }
-
-protected:
-
-    String getWorkersPath() const
-    {
-        return task_cluster->task_zookeeper_path + "/task_active_workers";
-    }
-
-    String getWorkersPathVersion() const
-    {
-        return getWorkersPath() + "_version";
-    }
-
-    String getCurrentWorkerNodePath() const
-    {
-        return getWorkersPath() + "/" + host_id;
-    }
-
-    zkutil::EphemeralNodeHolder::Ptr createTaskWorkerNodeAndWaitIfNeed(
-            const zkutil::ZooKeeperPtr & zookeeper,
-            const String & description,
-            bool unprioritized);
-
-    /*
-     * Checks that partition piece or some other entity is clean.
-     * The only requirement is that you have to pass is_dirty_flag_path and is_dirty_cleaned_path to the function.
-     * And is_dirty_flag_path is a parent of is_dirty_cleaned_path.
-     * */
-    static bool checkPartitionPieceIsClean(
-            const zkutil::ZooKeeperPtr & zookeeper,
-            const CleanStateClock & clean_state_clock,
-            const String & task_status_path);
-
-    bool checkAllPiecesInPartitionAreDone(const TaskTable & task_table, const String & partition_name, const TasksShard & shards_with_partition);
-
-    /** Checks that the whole partition of a table was copied. We should do it carefully due to dirty lock.
-     * State of some task could change during the processing.
-     * We have to ensure that all shards have the finished state and there is no dirty flag.
-     * Moreover, we have to check status twice and check zxid, because state can change during the checking.
-     */
-
-    /* The same as function above
-     * Assume that we don't know on which shards do we have partition certain piece.
-     * We'll check them all (I mean shards that contain the whole partition)
-     * And shards that don't have certain piece MUST mark that piece is_done true.
-     * */
-    bool checkPartitionPieceIsDone(const TaskTable & task_table, const String & partition_name,
-                                   size_t piece_number, const TasksShard & shards_with_partition);
-
-
-    /*Alter successful insertion to helping tables it will move all pieces to destination table*/
-    TaskStatus tryMoveAllPiecesToDestinationTable(const TaskTable & task_table, const String & partition_name);
-
-    /// Removes MATERIALIZED and ALIAS columns from create table query
-    static ASTPtr removeAliasMaterializedAndTTLColumnsFromCreateQuery(const ASTPtr & query_ast, bool allow_to_copy_alias_and_materialized_columns);
-
-    bool tryDropPartitionPiece(ShardPartition & task_partition, size_t current_piece_number,
-            const zkutil::ZooKeeperPtr & zookeeper, const CleanStateClock & clean_state_clock);
-
-    bool tryProcessTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
-
-    TaskStatus tryCreateDestinationTable(const ConnectionTimeouts & timeouts, TaskTable & task_table);
-    /// Job for copying partition from particular shard.
-    TaskStatus tryProcessPartitionTask(const ConnectionTimeouts & timeouts,
-                                       ShardPartition & task_partition,
-                                       bool is_unprioritized_task);
-
-    TaskStatus iterateThroughAllPiecesInPartition(const ConnectionTimeouts & timeouts,
-                                                  ShardPartition & task_partition,
-                                                  bool is_unprioritized_task);
-
-    TaskStatus processPartitionPieceTaskImpl(const ConnectionTimeouts & timeouts,
-                                             ShardPartition & task_partition,
-                                             size_t current_piece_number,
-                                             bool is_unprioritized_task);
-
-    void dropAndCreateLocalTable(const ASTPtr & create_ast);
-
-    void dropLocalTableIfExists(const DatabaseAndTableName & table_name) const;
-
-    void dropHelpingTables(const TaskTable & task_table);
-
-    void dropHelpingTablesByPieceNumber(const TaskTable & task_table, size_t current_piece_number);
-
-    /// Is used for usage less disk space.
-    /// After all pieces were successfully moved to original destination
-    /// table we can get rid of partition pieces (partitions in helping tables).
-    void dropParticularPartitionPieceFromAllHelpingTables(const TaskTable & task_table, const String & partition_name);
-
-    String getRemoteCreateTable(const DatabaseAndTableName & table, Connection & connection, const Settings & settings);
-
-    ASTPtr getCreateTableForPullShard(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
-
-    /// If it is implicitly asked to create split Distributed table for certain piece on current shard, we will do it.
-    void createShardInternalTables(const ConnectionTimeouts & timeouts, TaskShard & task_shard, bool create_split = true);
-
-    std::set<String> getShardPartitions(const ConnectionTimeouts & timeouts, TaskShard & task_shard);
-
-    bool checkShardHasPartition(const ConnectionTimeouts & timeouts, TaskShard & task_shard, const String & partition_quoted_name);
-
-    bool checkPresentPartitionPiecesOnCurrentShard(const ConnectionTimeouts & timeouts,
-             TaskShard & task_shard, const String & partition_quoted_name, size_t current_piece_number);
-
-    /*
-     * This class is used in executeQueryOnCluster function
-     * You can execute query on each shard (no sense it is executed on each replica of a shard or not)
-     * or you can execute query on each replica on each shard.
-     * First mode is useful for INSERTS queries.
-     * */
-    enum ClusterExecutionMode
-    {
-        ON_EACH_SHARD,
-        ON_EACH_NODE
-    };
-
-    /** Executes simple query (without output streams, for example DDL queries) on each shard of the cluster
-      * Returns number of shards for which at least one replica executed query successfully
-      */
-    UInt64 executeQueryOnCluster(
-            const ClusterPtr & cluster,
-            const String & query,
-            const Settings & current_settings,
-            ClusterExecutionMode execution_mode = ClusterExecutionMode::ON_EACH_SHARD) const;
-
-private:
-    String task_zookeeper_path;
-    String task_description_path;
-    String host_id;
-    String working_database_name;
-
-    /// Auto update config stuff
-    UInt64 task_description_current_version = 1;
-    std::atomic<UInt64> task_description_version{1};
-    Coordination::WatchCallback task_description_watch_callback;
-    /// ZooKeeper session used to set the callback
-    zkutil::ZooKeeperPtr task_description_watch_zookeeper;
-
-    ConfigurationPtr task_cluster_initial_config;
-    ConfigurationPtr task_cluster_current_config;
-
-    std::unique_ptr<TaskCluster> task_cluster;
-
-    bool is_safe_mode = false;
-    double copy_fault_probability = 0.0;
-    double move_fault_probability = 0.0;
-
-    bool experimental_use_sample_offset{false};
-
-    LoggerRawPtr log;
-
-    UInt64 max_table_tries = 3;
-    UInt64 max_shard_partition_tries = 3;
-    UInt64 max_shard_partition_piece_tries_for_alter = 10;
-    std::chrono::milliseconds retry_delay_ms{1000};
-};
-}
--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@ -1,252 +0,0 @@
-#include "ClusterCopierApp.h"
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/StatusFile.h>
-#include <Common/TerminalSize.h>
-#include <Databases/registerDatabases.h>
-#include <IO/ConnectionTimeouts.h>
-#include <Interpreters/registerInterpreters.h>
-#include <Formats/registerFormats.h>
-#include <Common/scope_guard_safe.h>
-#include <unistd.h>
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-namespace DB
-{
-
-/// ClusterCopierApp
-
-void ClusterCopierApp::initialize(Poco::Util::Application & self)
-{
-    is_help = config().has("help");
-    if (is_help)
-        return;
-
-    config_xml_path = config().getString("config-file");
-    task_path = config().getString("task-path");
-    log_level = config().getString("log-level", "info");
-    is_safe_mode = config().has("safe-mode");
-    is_status_mode = config().has("status");
-    if (config().has("copy-fault-probability"))
-        copy_fault_probability = std::max(std::min(config().getDouble("copy-fault-probability"), 1.0), 0.0);
-    if (config().has("move-fault-probability"))
-        move_fault_probability = std::max(std::min(config().getDouble("move-fault-probability"), 1.0), 0.0);
-    base_dir = (config().has("base-dir")) ? config().getString("base-dir") : fs::current_path().string();
-
-    max_table_tries = std::max<size_t>(config().getUInt("max-table-tries", 3), 1);
-    max_shard_partition_tries = std::max<size_t>(config().getUInt("max-shard-partition-tries", 3), 1);
-    max_shard_partition_piece_tries_for_alter = std::max<size_t>(config().getUInt("max-shard-partition-piece-tries-for-alter", 10), 1);
-    retry_delay_ms = std::chrono::milliseconds(std::max<size_t>(config().getUInt("retry-delay-ms", 1000), 100));
-
-    if (config().has("experimental-use-sample-offset"))
-        experimental_use_sample_offset = config().getBool("experimental-use-sample-offset");
-
-    // process_id is '<hostname>#<start_timestamp>_<pid>'
-    time_t timestamp = Poco::Timestamp().epochTime();
-    auto curr_pid = Poco::Process::id();
-
-    process_id = std::to_string(DateLUT::serverTimezoneInstance().toNumYYYYMMDDhhmmss(timestamp)) + "_" + std::to_string(curr_pid);
-    host_id = escapeForFileName(getFQDNOrHostName()) + '#' + process_id;
-    process_path = fs::weakly_canonical(fs::path(base_dir) / ("clickhouse-copier_" + process_id));
-    fs::create_directories(process_path);
-
-    /// Override variables for BaseDaemon
-    if (config().has("log-level"))
-        config().setString("logger.level", config().getString("log-level"));
-
-    if (config().has("base-dir") || !config().has("logger.log"))
-        config().setString("logger.log", fs::path(process_path) / "log.log");
-
-    if (config().has("base-dir") || !config().has("logger.errorlog"))
-        config().setString("logger.errorlog", fs::path(process_path) / "log.err.log");
-
-    Base::initialize(self);
-}
-
-
-void ClusterCopierApp::handleHelp(const std::string &, const std::string &)
-{
-    uint16_t terminal_width = 0;
-    if (isatty(STDIN_FILENO))
-        terminal_width = getTerminalWidth();
-
-    Poco::Util::HelpFormatter help_formatter(options());
-    if (terminal_width)
-        help_formatter.setWidth(terminal_width);
-    help_formatter.setCommand(commandName());
-    help_formatter.setHeader("Copies tables from one cluster to another");
-    help_formatter.setUsage("--config-file <config-file> --task-path <task-path>");
-    help_formatter.format(std::cerr);
-    help_formatter.setFooter("See also: https://clickhouse.com/docs/en/operations/utilities/clickhouse-copier/");
-
-    stopOptionsProcessing();
-}
-
-
-void ClusterCopierApp::defineOptions(Poco::Util::OptionSet & options)
-{
-    Base::defineOptions(options);
-
-    options.addOption(Poco::Util::Option("task-path", "", "path to task in ZooKeeper")
-                          .argument("task-path").binding("task-path"));
-    options.addOption(Poco::Util::Option("task-file", "", "path to task file for uploading in ZooKeeper to task-path")
-                          .argument("task-file").binding("task-file"));
-    options.addOption(Poco::Util::Option("task-upload-force", "", "Force upload task-file even node already exists. Default is false.")
-                          .argument("task-upload-force").binding("task-upload-force"));
-    options.addOption(Poco::Util::Option("safe-mode", "", "disables ALTER DROP PARTITION in case of errors")
-                          .binding("safe-mode"));
-    options.addOption(Poco::Util::Option("copy-fault-probability", "", "the copying fails with specified probability (used to test partition state recovering)")
-                          .argument("copy-fault-probability").binding("copy-fault-probability"));
-    options.addOption(Poco::Util::Option("move-fault-probability", "", "the moving fails with specified probability (used to test partition state recovering)")
-                              .argument("move-fault-probability").binding("move-fault-probability"));
-    options.addOption(Poco::Util::Option("log-level", "", "sets log level")
-                          .argument("log-level").binding("log-level"));
-    options.addOption(Poco::Util::Option("base-dir", "", "base directory for copiers, consecutive copier launches will populate /base-dir/launch_id/* directories")
-                          .argument("base-dir").binding("base-dir"));
-    options.addOption(Poco::Util::Option("experimental-use-sample-offset", "", "Use SAMPLE OFFSET query instead of cityHash64(PRIMARY KEY) % n == k")
-                          .argument("experimental-use-sample-offset").binding("experimental-use-sample-offset"));
-    options.addOption(Poco::Util::Option("status", "", "Get for status for current execution").binding("status"));
-
-    options.addOption(Poco::Util::Option("max-table-tries", "", "Number of tries for the copy table task")
-                          .argument("max-table-tries").binding("max-table-tries"));
-    options.addOption(Poco::Util::Option("max-shard-partition-tries", "", "Number of tries for the copy one partition task")
-                          .argument("max-shard-partition-tries").binding("max-shard-partition-tries"));
-    options.addOption(Poco::Util::Option("max-shard-partition-piece-tries-for-alter", "", "Number of tries for final ALTER ATTACH to destination table")
-                          .argument("max-shard-partition-piece-tries-for-alter").binding("max-shard-partition-piece-tries-for-alter"));
-    options.addOption(Poco::Util::Option("retry-delay-ms", "", "Delay between task retries")
-                          .argument("retry-delay-ms").binding("retry-delay-ms"));
-
-    using Me = std::decay_t<decltype(*this)>;
-    options.addOption(Poco::Util::Option("help", "", "produce this help message").binding("help")
-                          .callback(Poco::Util::OptionCallback<Me>(this, &Me::handleHelp)));
-}
-
-
-void ClusterCopierApp::mainImpl()
-{
-    /// Status command
-    {
-        if (is_status_mode)
-        {
-            SharedContextHolder shared_context = Context::createShared();
-            auto context = Context::createGlobal(shared_context.get());
-            context->makeGlobalContext();
-            SCOPE_EXIT_SAFE(context->shutdown());
-
-            auto zookeeper = context->getZooKeeper();
-            auto status_json = zookeeper->get(task_path + "/status");
-
-            LOG_INFO(&logger(), "{}", status_json);
-            std::cout << status_json << std::endl;
-
-            context->resetZooKeeper();
-            return;
-        }
-    }
-    StatusFile status_file(process_path + "/status", StatusFile::write_full_info);
-    ThreadStatus thread_status;
-
-    auto * log = &logger();
-    LOG_INFO(log, "Starting clickhouse-copier (id {}, host_id {}, path {}, revision {})", process_id, host_id, process_path, ClickHouseRevision::getVersionRevision());
-
-    SharedContextHolder shared_context = Context::createShared();
-    auto context = Context::createGlobal(shared_context.get());
-    context->makeGlobalContext();
-    SCOPE_EXIT_SAFE(context->shutdown());
-
-    context->setConfig(loaded_config.configuration);
-    context->setApplicationType(Context::ApplicationType::LOCAL);
-    context->setPath(process_path + "/");
-
-    registerInterpreters();
-    registerFunctions();
-    registerAggregateFunctions();
-    registerTableFunctions();
-    registerDatabases();
-    registerStorages();
-    registerDictionaries();
-    registerDisks(/* global_skip_access_check= */ true);
-    registerFormats();
-
-    static const std::string default_database = "_local";
-    DatabaseCatalog::instance().attachDatabase(default_database, std::make_shared<DatabaseMemory>(default_database, context));
-    context->setCurrentDatabase(default_database);
-
-    /// Disable queries logging, since:
-    /// - There are bits that is not allowed for global context, like adding factories info (for the query_log)
-    /// - And anyway it is useless for copier.
-    context->setSetting("log_queries", false);
-
-    auto local_context = Context::createCopy(context);
-
-    /// Initialize query scope just in case.
-    CurrentThread::QueryScope query_scope(local_context);
-
-    auto copier = std::make_unique<ClusterCopier>(
-        task_path, host_id, default_database, local_context, log);
-    copier->setSafeMode(is_safe_mode);
-    copier->setCopyFaultProbability(copy_fault_probability);
-    copier->setMoveFaultProbability(move_fault_probability);
-    copier->setMaxTableTries(max_table_tries);
-    copier->setMaxShardPartitionTries(max_shard_partition_tries);
-    copier->setMaxShardPartitionPieceTriesForAlter(max_shard_partition_piece_tries_for_alter);
-    copier->setRetryDelayMs(retry_delay_ms);
-    copier->setExperimentalUseSampleOffset(experimental_use_sample_offset);
-
-    auto task_file = config().getString("task-file", "");
-    if (!task_file.empty())
-        copier->uploadTaskDescription(task_path, task_file, config().getBool("task-upload-force", false));
-
-    zkutil::validateZooKeeperConfig(config());
-
-    copier->init();
-    copier->process(ConnectionTimeouts::getTCPTimeoutsWithoutFailover(context->getSettingsRef()));
-
-    /// Reset ZooKeeper before removing ClusterCopier.
-    /// Otherwise zookeeper watch can call callback which use already removed ClusterCopier object.
-    context->resetZooKeeper();
-}
-
-
-int ClusterCopierApp::main(const std::vector<std::string> &)
-{
-    if (is_help)
-        return 0;
-
-    try
-    {
-        mainImpl();
-    }
-    catch (...)
-    {
-        tryLogCurrentException(&Poco::Logger::root(), __PRETTY_FUNCTION__);
-        auto code = getCurrentExceptionCode();
-
-        return (code) ? code : -1;
-    }
-
-    return 0;
-}
-
-
-}
-
-#pragma GCC diagnostic ignored "-Wunused-function"
-#pragma GCC diagnostic ignored "-Wmissing-declarations"
-
-int mainEntryClickHouseClusterCopier(int argc, char ** argv)
-{
-    try
-    {
-        DB::ClusterCopierApp app;
-        return app.run(argc, argv);
-    }
-    catch (...)
-    {
-        std::cerr << DB::getCurrentExceptionMessage(true) << "\n";
-        auto code = DB::getCurrentExceptionCode();
-
-        return (code) ? code : -1;
-    }
-}
--- a/programs/copier/ClusterCopierApp.h
+++ b/programs/copier/ClusterCopierApp.h
@ -1,99 +0,0 @@
-#pragma once
-
-#include <Poco/Util/ServerApplication.h>
-#include <Daemon/BaseDaemon.h>
-
-#include "ClusterCopier.h"
-
-/* clickhouse cluster copier util
- * Copies tables data from one cluster to new tables of other (possibly the same) cluster in distributed fault-tolerant manner.
- *
- * See overview in the docs: docs/en/utils/clickhouse-copier.md
- *
- * Implementation details:
- *
- * cluster-copier workers pull each partition of each shard of the source cluster and push it to the destination cluster through
- * Distributed table (to perform data resharding). So, worker job is a partition of a source shard.
- * A job has three states: Active, Finished and Abandoned. Abandoned means that worker died and did not finish the job.
- *
- * If an error occurred during the copying (a worker failed or a worker did not finish the INSERT), then the whole partition (on
- * all destination servers) should be dropped and refilled. So, copying entity is a partition of all destination shards.
- * If a failure is detected a special /is_dirty node is created in ZooKeeper signalling that other workers copying the same partition
- * should stop, after a refilling procedure should start.
- *
- * ZooKeeper task node has the following structure:
- *  /task/path_root                     - path passed in --task-path parameter
- *      /description                    - contains user-defined XML config of the task
- *      /task_active_workers            - contains ephemeral nodes of all currently active workers, used to implement max_workers limitation
- *          /server_fqdn#PID_timestamp  - cluster-copier worker ID
- *          ...
- *      /tables             - directory with table tasks
- *      /cluster.db.table1  - directory of table_hits task
- *          /partition1     - directory for partition1
- *              /shards     - directory for source cluster shards
- *                  /1      - worker job for the first shard of partition1 of table test.hits
- *                            Contains info about current status (Active or Finished) and worker ID.
- *                  /2
- *                  ...
- *              /partition_active_workers
- *                  /1      - for each job in /shards a corresponding ephemeral node created in /partition_active_workers
- *                            It is used to detect Abandoned jobs (if there is Active node in /shards and there is no node in
- *                            /partition_active_workers).
- *                            Also, it is used to track active workers in the partition (when we need to refill the partition we do
- *                            not DROP PARTITION while there are active workers)
- *                  /2
- *                  ...
- *              /is_dirty   - the node is set if some worker detected that an error occurred (the INSERT is failed or an Abandoned node is
- *                            detected). If the node appeared workers in this partition should stop and start cleaning and refilling
- *                            partition procedure.
- *                            During this procedure a single 'cleaner' worker is selected. The worker waits for stopping all partition
- *                            workers, removes /shards node, executes DROP PARTITION on each destination node and removes /is_dirty node.
- *                  /cleaner- An ephemeral node used to select 'cleaner' worker. Contains ID of the worker.
- *      /cluster.db.table2
- *          ...
- */
-
-namespace DB
-{
-
-class ClusterCopierApp : public BaseDaemon
-{
-public:
-
-    void initialize(Poco::Util::Application & self) override;
-
-    void handleHelp(const std::string &, const std::string &);
-
-    void defineOptions(Poco::Util::OptionSet & options) override;
-
-    int main(const std::vector<std::string> &) override;
-
-private:
-
-    using Base = BaseDaemon;
-
-    void mainImpl();
-
-    std::string config_xml_path;
-    std::string task_path;
-    std::string log_level = "info";
-    bool is_safe_mode = false;
-    bool is_status_mode = false;
-    double copy_fault_probability = 0.0;
-    double move_fault_probability = 0.0;
-    bool is_help = false;
-
-    UInt64 max_table_tries = 3;
-    UInt64 max_shard_partition_tries = 3;
-    UInt64 max_shard_partition_piece_tries_for_alter = 10;
-    std::chrono::milliseconds retry_delay_ms{1000};
-
-    bool experimental_use_sample_offset{false};
-
-    std::string base_dir;
-    std::string process_path;
-    std::string process_id;
-    std::string host_id;
-};
-
-}
--- a/Show More
+++ b/Show More