Merge branch 'master' into fix-regexp-recompilation-in-parser

2024-11-24 00:22:29 +00:00 · 2024-03-21 02:21:09 +01:00 · 2024-03-21 02:21:09 +01:00 · 99ed82d561
commit 99ed82d561
parent cfab620f49 97b7daef84
264 changed files with 10527 additions and 2629 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -61,11 +61,16 @@ if (ENABLE_CHECK_HEAVY_BUILDS)
    # set CPU time limit to 1000 seconds
    set (RLIMIT_CPU 1000)

-    # -fsanitize=memory and address are too heavy
+    # Sanitizers are too heavy
    if (SANITIZE OR SANITIZE_COVERAGE OR WITH_COVERAGE)
       set (RLIMIT_DATA 10000000000) # 10G
    endif()

+    # For some files currently building RISCV64 might be too slow. TODO: Improve compilation times per file
+    if (ARCH_RISCV64)
+        set (RLIMIT_CPU 1800)
+    endif()
+
    set (CMAKE_CXX_COMPILER_LAUNCHER prlimit --as=${RLIMIT_AS} --data=${RLIMIT_DATA} --cpu=${RLIMIT_CPU} ${CMAKE_CXX_COMPILER_LAUNCHER})
 endif ()

--- a/base/base/CMakeLists.txt
+++ b/base/base/CMakeLists.txt
@ -20,6 +20,7 @@ set (SRCS
    getPageSize.cpp
    getThreadId.cpp
    int8_to_string.cpp
+    itoa.cpp
    JSON.cpp
    mremap.cpp
    phdr_cache.cpp
--- a/base/base/IPv4andIPv6.h
+++ b/base/base/IPv4andIPv6.h
@ -1,8 +1,7 @@
 #pragma once

-#include <base/strong_typedef.h>
 #include <base/extended_types.h>
-#include <Common/formatIPv6.h>
+#include <base/strong_typedef.h>
 #include <Common/memcmpSmall.h>

 namespace DB
@ -62,7 +61,8 @@ namespace std
    {
        size_t operator()(const DB::IPv6 & x) const
        {
-            return std::hash<std::string_view>{}(std::string_view(reinterpret_cast<const char*>(&x.toUnderType()), IPV6_BINARY_LENGTH));
+            return std::hash<std::string_view>{}(
+                std::string_view(reinterpret_cast<const char *>(&x.toUnderType()), sizeof(DB::IPv6::UnderlyingType)));
        }
    };

--- a/base/base/itoa.cpp
+++ b/base/base/itoa.cpp
@ -0,0 +1,503 @@
+// Based on https://github.com/amdn/itoa and combined with our optimizations
+//
+//=== itoa.cpp - Fast integer to ascii conversion                 --*- C++ -*-//
+//
+// The MIT License (MIT)
+// Copyright (c) 2016 Arturo Martin-de-Nicolas
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+//     The above copyright notice and this permission notice shall be included
+//     in all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+//===----------------------------------------------------------------------===//
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <type_traits>
+#include <base/defines.h>
+#include <base/extended_types.h>
+#include <base/itoa.h>
+
+namespace
+{
+template <typename T>
+ALWAYS_INLINE inline constexpr T pow10(size_t x)
+{
+    return x ? 10 * pow10<T>(x - 1) : 1;
+}
+
+// Division by a power of 10 is implemented using a multiplicative inverse.
+// This strength reduction is also done by optimizing compilers, but
+// presently the fastest results are produced by using the values
+// for the multiplication and the shift as given by the algorithm
+// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
+//
+// http://www.agner.org/optimize/optimizing_assembly.pdf
+//
+// "Integer division by a constant (all processors)
+// A floating point number can be divided by a constant by multiplying
+// with the reciprocal. If we want to do the same with integers, we have
+// to scale the reciprocal by 2n and then shift the product to the right
+// by n. There are various algorithms for finding a suitable value of n
+// and compensating for rounding errors. The algorithm described below
+// was invented by Terje Mathisen, Norway, and not published elsewhere."
+
+/// Division by constant is performed by:
+/// 1. Adding 1 if needed;
+/// 2. Multiplying by another constant;
+/// 3. Shifting right by another constant.
+template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
+struct Division
+{
+    static constexpr bool add{add_};
+    static constexpr UInt multiplier{multiplier_};
+    static constexpr unsigned shift{shift_};
+};
+
+/// Select a type with appropriate number of bytes from the list of types.
+/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
+/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
+template <size_t N, typename T, typename... Ts>
+struct SelectType
+{
+    using Result = typename SelectType<N / 2, Ts...>::Result;
+};
+
+template <typename T, typename... Ts>
+struct SelectType<1, T, Ts...>
+{
+    using Result = T;
+};
+
+
+/// Division by 10^N where N is the size of the type.
+template <size_t N>
+using DivisionBy10PowN = typename SelectType<
+    N,
+    Division<uint8_t, false, 205U, 11>, /// divide by 10
+    Division<uint16_t, true, 41943U, 22>, /// divide by 100
+    Division<uint32_t, false, 3518437209U, 45>, /// divide by 10000
+    Division<uint64_t, false, 12379400392853802749ULL, 90> /// divide by 100000000
+    >::Result;
+
+template <size_t N>
+using UnsignedOfSize = typename SelectType<N, uint8_t, uint16_t, uint32_t, uint64_t, __uint128_t>::Result;
+
+/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
+template <size_t N>
+struct QuotientAndRemainder
+{
+    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
+    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
+};
+
+template <size_t N>
+QuotientAndRemainder<N> inline split(UnsignedOfSize<N> value)
+{
+    constexpr DivisionBy10PowN<N> division;
+
+    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
+    UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
+
+    return {quotient, remainder};
+}
+
+ALWAYS_INLINE inline char * outDigit(char * p, uint8_t value)
+{
+    *p = '0' + value;
+    ++p;
+    return p;
+}
+
+// Using a lookup table to convert binary numbers from 0 to 99
+// into ascii characters as described by Andrei Alexandrescu in
+// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
+
+const char digits[201] = "00010203040506070809"
+                         "10111213141516171819"
+                         "20212223242526272829"
+                         "30313233343536373839"
+                         "40414243444546474849"
+                         "50515253545556575859"
+                         "60616263646566676869"
+                         "70717273747576777879"
+                         "80818283848586878889"
+                         "90919293949596979899";
+
+ALWAYS_INLINE inline char * outTwoDigits(char * p, uint8_t value)
+{
+    memcpy(p, &digits[value * 2], 2);
+    p += 2;
+    return p;
+}
+
+namespace convert
+{
+template <typename UInt, size_t N = sizeof(UInt)>
+char * head(char * p, UInt u);
+template <typename UInt, size_t N = sizeof(UInt)>
+char * tail(char * p, UInt u);
+
+//===----------------------------------------------------------===//
+//     head: find most significant digit, skip leading zeros
+//===----------------------------------------------------------===//
+
+// "x" contains quotient and remainder after division by 10^N
+// quotient is less than 10^N
+template <size_t N>
+ALWAYS_INLINE inline char * head(char * p, QuotientAndRemainder<N> x)
+{
+    p = head(p, UnsignedOfSize<N / 2>(x.quotient));
+    p = tail(p, x.remainder);
+    return p;
+}
+
+// "u" is less than 10^2*N
+template <typename UInt, size_t N>
+ALWAYS_INLINE inline char * head(char * p, UInt u)
+{
+    return u < pow10<UnsignedOfSize<N>>(N) ? head(p, UnsignedOfSize<N / 2>(u)) : head<N>(p, split<N>(u));
+}
+
+// recursion base case, selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    return u < 10 ? outDigit(p, u) : outTwoDigits(p, u);
+}
+
+//===----------------------------------------------------------===//
+//     tail: produce all digits including leading zeros
+//===----------------------------------------------------------===//
+
+// recursive step, "u" is less than 10^2*N
+template <typename UInt, size_t N>
+ALWAYS_INLINE inline char * tail(char * p, UInt u)
+{
+    QuotientAndRemainder<N> x = split<N>(u);
+    p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
+    p = tail(p, x.remainder);
+    return p;
+}
+
+// recursion base case, selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    return outTwoDigits(p, u);
+}
+
+//===----------------------------------------------------------===//
+// large values are >= 10^2*N
+// where x contains quotient and remainder after division by 10^N
+//===----------------------------------------------------------===//
+template <size_t N>
+ALWAYS_INLINE inline char * large(char * p, QuotientAndRemainder<N> x)
+{
+    QuotientAndRemainder<N> y = split<N>(x.quotient);
+    p = head(p, UnsignedOfSize<N / 2>(y.quotient));
+    p = tail(p, y.remainder);
+    p = tail(p, x.remainder);
+    return p;
+}
+
+//===----------------------------------------------------------===//
+// handle values of "u" that might be >= 10^2*N
+// where N is the size of "u" in bytes
+//===----------------------------------------------------------===//
+template <typename UInt, size_t N = sizeof(UInt)>
+ALWAYS_INLINE inline char * uitoa(char * p, UInt u)
+{
+    if (u < pow10<UnsignedOfSize<N>>(N))
+        return head(p, UnsignedOfSize<N / 2>(u));
+    QuotientAndRemainder<N> x = split<N>(u);
+
+    return u < pow10<UnsignedOfSize<N>>(2 * N) ? head<N>(p, x) : large<N>(p, x);
+}
+
+// selected when "u" is one byte
+template <>
+ALWAYS_INLINE inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
+{
+    if (u < 10)
+        return outDigit(p, u);
+    else if (u < 100)
+        return outTwoDigits(p, u);
+    else
+    {
+        p = outDigit(p, u / 100);
+        p = outTwoDigits(p, u % 100);
+        return p;
+    }
+}
+
+//===----------------------------------------------------------===//
+//     handle unsigned and signed integral operands
+//===----------------------------------------------------------===//
+
+// itoa: handle unsigned integral operands (selected by SFINAE)
+template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
+ALWAYS_INLINE inline char * itoa(U u, char * p)
+{
+    return convert::uitoa(p, u);
+}
+
+// itoa: handle signed integral operands (selected by SFINAE)
+template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
+ALWAYS_INLINE inline char * itoa(I i, char * p)
+{
+    // Need "mask" to be filled with a copy of the sign bit.
+    // If "i" is a negative value, then the result of "operator >>"
+    // is implementation-defined, though usually it is an arithmetic
+    // right shift that replicates the sign bit.
+    // Use a conditional expression to be portable,
+    // a good optimizing compiler generates an arithmetic right shift
+    // and avoids the conditional branch.
+    UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
+    // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
+    // Cannot use std::abs() because the result is undefined
+    // in 2's complement systems for the most-negative value.
+    // Want to avoid conditional branch for performance reasons since
+    // CPU branch prediction will be ineffective when negative values
+    // occur randomly.
+    // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
+    // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
+    // This yields the absolute value with the desired type without
+    // using a conditional branch and without invoking undefined or
+    // implementation defined behavior:
+    UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
+    // Unconditionally store a minus sign when producing digits
+    // in a forward direction and increment the pointer only if
+    // the value is in fact negative.
+    // This avoids a conditional branch and is safe because we will
+    // always produce at least one digit and it will overwrite the
+    // minus sign when the value is not negative.
+    *p = '-';
+    p += (mask & 1);
+    p = convert::uitoa(p, u);
+    return p;
+}
+}
+
+const uint64_t max_multiple_of_hundred_that_fits_in_64_bits = 1'00'00'00'00'00'00'00'00'00ull;
+const int max_multiple_of_hundred_blocks = 9;
+static_assert(max_multiple_of_hundred_that_fits_in_64_bits % 100 == 0);
+
+ALWAYS_INLINE inline char * writeUIntText(UInt128 _x, char * p)
+{
+    /// If we the highest 64bit item is empty, we can print just the lowest item as u64
+    if (_x.items[UInt128::_impl::little(1)] == 0)
+        return convert::itoa(_x.items[UInt128::_impl::little(0)], p);
+
+    /// Doing operations using __int128 is faster and we already rely on this feature
+    using T = unsigned __int128;
+    T x = (T(_x.items[UInt128::_impl::little(1)]) << 64) + T(_x.items[UInt128::_impl::little(0)]);
+
+    /// We are going to accumulate blocks of 2 digits to print until the number is small enough to be printed as u64
+    /// To do this we could do: x / 100, x % 100
+    /// But these would mean doing many iterations with long integers, so instead we divide by a much longer integer
+    /// multiple of 100 (100^9) and then get the blocks out of it (as u64)
+    /// Once we reach u64::max we can stop and use the fast method to print that in the front
+    static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
+    static const T largest_uint64 = std::numeric_limits<uint64_t>::max();
+    uint8_t two_values[20] = {0}; // 39 Max characters / 2
+
+    int current_block = 0;
+    while (x > largest_uint64)
+    {
+        uint64_t u64_remainder = uint64_t(x % large_divisor);
+        x /= large_divisor;
+
+        int pos = current_block;
+        while (u64_remainder)
+        {
+            two_values[pos] = uint8_t(u64_remainder % 100);
+            pos++;
+            u64_remainder /= 100;
+        }
+        current_block += max_multiple_of_hundred_blocks;
+    }
+
+    char * highest_part_print = convert::itoa(uint64_t(x), p);
+    for (int i = 0; i < current_block; i++)
+    {
+        outTwoDigits(highest_part_print, two_values[current_block - 1 - i]);
+        highest_part_print += 2;
+    }
+
+    return highest_part_print;
+}
+
+ALWAYS_INLINE inline char * writeUIntText(UInt256 _x, char * p)
+{
+    /// If possible, treat it as a smaller integer as they are much faster to print
+    if (_x.items[UInt256::_impl::little(3)] == 0 && _x.items[UInt256::_impl::little(2)] == 0)
+        return writeUIntText(UInt128{_x.items[UInt256::_impl::little(0)], _x.items[UInt256::_impl::little(1)]}, p);
+
+    /// If available (x86) we transform from our custom class to _BitInt(256) which has better support in the compiler
+    /// and produces better code
+    using T =
+#if defined(__x86_64__)
+#    pragma clang diagnostic push
+#    pragma clang diagnostic ignored "-Wbit-int-extension"
+        unsigned _BitInt(256)
+#    pragma clang diagnostic pop
+#else
+        UInt256
+#endif
+        ;
+
+#if defined(__x86_64__)
+    T x = (T(_x.items[UInt256::_impl::little(3)]) << 192) + (T(_x.items[UInt256::_impl::little(2)]) << 128)
+        + (T(_x.items[UInt256::_impl::little(1)]) << 64) + T(_x.items[UInt256::_impl::little(0)]);
+#else
+    T x = _x;
+#endif
+
+    /// Similar to writeUIntText(UInt128) only that in this case we will stop as soon as we reach the largest u128
+    /// and switch to that function
+    uint8_t two_values[39] = {0}; // 78 Max characters / 2
+    int current_pos = 0;
+
+    static const T large_divisor = max_multiple_of_hundred_that_fits_in_64_bits;
+    static const T largest_uint128 = T(std::numeric_limits<uint64_t>::max()) << 64 | T(std::numeric_limits<uint64_t>::max());
+
+    while (x > largest_uint128)
+    {
+        uint64_t u64_remainder = uint64_t(x % large_divisor);
+        x /= large_divisor;
+
+        int pos = current_pos;
+        while (u64_remainder)
+        {
+            two_values[pos] = uint8_t(u64_remainder % 100);
+            pos++;
+            u64_remainder /= 100;
+        }
+        current_pos += max_multiple_of_hundred_blocks;
+    }
+
+#if defined(__x86_64__)
+    UInt128 pending{uint64_t(x), uint64_t(x >> 64)};
+#else
+    UInt128 pending{x.items[UInt256::_impl::little(0)], x.items[UInt256::_impl::little(1)]};
+#endif
+
+    char * highest_part_print = writeUIntText(pending, p);
+    for (int i = 0; i < current_pos; i++)
+    {
+        outTwoDigits(highest_part_print, two_values[current_pos - 1 - i]);
+        highest_part_print += 2;
+    }
+
+    return highest_part_print;
+}
+
+ALWAYS_INLINE inline char * writeLeadingMinus(char * pos)
+{
+    *pos = '-';
+    return pos + 1;
+}
+
+template <typename T>
+ALWAYS_INLINE inline char * writeSIntText(T x, char * pos)
+{
+    static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
+
+    using UnsignedT = make_unsigned_t<T>;
+    static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
+
+    if (unlikely(x == min_int))
+    {
+        if constexpr (std::is_same_v<T, Int128>)
+        {
+            const char * res = "-170141183460469231731687303715884105728";
+            memcpy(pos, res, strlen(res));
+            return pos + strlen(res);
+        }
+        else if constexpr (std::is_same_v<T, Int256>)
+        {
+            const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
+            memcpy(pos, res, strlen(res));
+            return pos + strlen(res);
+        }
+    }
+
+    if (x < 0)
+    {
+        x = -x;
+        pos = writeLeadingMinus(pos);
+    }
+    return writeUIntText(UnsignedT(x), pos);
+}
+}
+
+char * itoa(UInt8 i, char * p)
+{
+    return convert::itoa(uint8_t(i), p);
+}
+
+char * itoa(Int8 i, char * p)
+{
+    return convert::itoa(int8_t(i), p);
+}
+
+char * itoa(UInt128 i, char * p)
+{
+    return writeUIntText(i, p);
+}
+
+char * itoa(Int128 i, char * p)
+{
+    return writeSIntText(i, p);
+}
+
+char * itoa(UInt256 i, char * p)
+{
+    return writeUIntText(i, p);
+}
+
+char * itoa(Int256 i, char * p)
+{
+    return writeSIntText(i, p);
+}
+
+#define DEFAULT_ITOA(T) \
+    char * itoa(T i, char * p) \
+    { \
+        return convert::itoa(i, p); \
+    }
+
+#define FOR_MISSING_INTEGER_TYPES(M) \
+    M(uint8_t) \
+    M(UInt16) \
+    M(UInt32) \
+    M(UInt64) \
+    M(int8_t) \
+    M(Int16) \
+    M(Int32) \
+    M(Int64)
+
+FOR_MISSING_INTEGER_TYPES(DEFAULT_ITOA)
+
+#if defined(OS_DARWIN)
+DEFAULT_ITOA(unsigned long)
+DEFAULT_ITOA(long)
+#endif
+
+#undef FOR_MISSING_INTEGER_TYPES
+#undef DEFAULT_ITOA
--- a/base/base/itoa.h
+++ b/base/base/itoa.h
@ -1,446 +1,30 @@
 #pragma once

-// Based on https://github.com/amdn/itoa and combined with our optimizations
-//
-//=== itoa.h - Fast integer to ascii conversion                   --*- C++ -*-//
-//
-// The MIT License (MIT)
-// Copyright (c) 2016 Arturo Martin-de-Nicolas
-//
-// Permission is hereby granted, free of charge, to any person obtaining a copy
-// of this software and associated documentation files (the "Software"), to deal
-// in the Software without restriction, including without limitation the rights
-// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the Software is
-// furnished to do so, subject to the following conditions:
-//
-//     The above copyright notice and this permission notice shall be included
-//     in all copies or substantial portions of the Software.
-//
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-// SOFTWARE.
-//===----------------------------------------------------------------------===//
-
-#include <cstdint>
-#include <cstddef>
-#include <cstring>
-#include <type_traits>
 #include <base/extended_types.h>

+#define FOR_INTEGER_TYPES(M) \
+    M(uint8_t) \
+    M(UInt8) \
+    M(UInt16) \
+    M(UInt32) \
+    M(UInt64) \
+    M(UInt128) \
+    M(UInt256) \
+    M(int8_t) \
+    M(Int8) \
+    M(Int16) \
+    M(Int32) \
+    M(Int64) \
+    M(Int128) \
+    M(Int256)

-template <typename T>
-inline int digits10(T x)
-{
-    if (x < 10ULL)
-        return 1;
-    if (x < 100ULL)
-        return 2;
-    if (x < 1000ULL)
-        return 3;
+#define INSTANTIATION(T) char * itoa(T i, char * p);
+FOR_INTEGER_TYPES(INSTANTIATION)

-    if (x < 1000000000000ULL)
-    {
-        if (x < 100000000ULL)
-        {
-            if (x < 1000000ULL)
-            {
-                if (x < 10000ULL)
-                    return 4;
-                else
-                    return 5 + (x >= 100000ULL);
-            }
+#if defined(OS_DARWIN)
+INSTANTIATION(unsigned long)
+INSTANTIATION(long)
+#endif

-            return 7 + (x >= 10000000ULL);
-        }
-
-        if (x < 10000000000ULL)
-            return 9 + (x >= 1000000000ULL);
-
-        return 11 + (x >= 100000000000ULL);
-    }
-
-    return 12 + digits10(x / 1000000000000ULL);
-}
-
-
-namespace impl
-{
-
-template <typename T>
-static constexpr T pow10(size_t x)
-{
-    return x ? 10 * pow10<T>(x - 1) : 1;
-}
-
-// Division by a power of 10 is implemented using a multiplicative inverse.
-// This strength reduction is also done by optimizing compilers, but
-// presently the fastest results are produced by using the values
-// for the multiplication and the shift as given by the algorithm
-// described by Agner Fog in "Optimizing Subroutines in Assembly Language"
-//
-// http://www.agner.org/optimize/optimizing_assembly.pdf
-//
-// "Integer division by a constant (all processors)
-// A floating point number can be divided by a constant by multiplying
-// with the reciprocal. If we want to do the same with integers, we have
-// to scale the reciprocal by 2n and then shift the product to the right
-// by n. There are various algorithms for finding a suitable value of n
-// and compensating for rounding errors. The algorithm described below
-// was invented by Terje Mathisen, Norway, and not published elsewhere."
-
-/// Division by constant is performed by:
-/// 1. Adding 1 if needed;
-/// 2. Multiplying by another constant;
-/// 3. Shifting right by another constant.
-template <typename UInt, bool add_, UInt multiplier_, unsigned shift_>
-struct Division
-{
-    static constexpr bool add{add_};
-    static constexpr UInt multiplier{multiplier_};
-    static constexpr unsigned shift{shift_};
-};
-
-/// Select a type with appropriate number of bytes from the list of types.
-/// First parameter is the number of bytes requested. Then goes a list of types with 1, 2, 4, ... number of bytes.
-/// Example: SelectType<4, uint8_t, uint16_t, uint32_t, uint64_t> will select uint32_t.
-template <size_t N, typename T, typename... Ts>
-struct SelectType
-{
-    using Result = typename SelectType<N / 2, Ts...>::Result;
-};
-
-template <typename T, typename... Ts>
-struct SelectType<1, T, Ts...>
-{
-    using Result = T;
-};
-
-
-/// Division by 10^N where N is the size of the type.
-template <size_t N>
-using DivisionBy10PowN = typename SelectType
-<
-    N,
-    Division<uint8_t, false, 205U, 11>,                           /// divide by 10
-    Division<uint16_t, true, 41943U, 22>,                         /// divide by 100
-    Division<uint32_t, false, 3518437209U, 45>,                   /// divide by 10000
-    Division<uint64_t, false, 12379400392853802749ULL, 90>        /// divide by 100000000
->::Result;
-
-template <size_t N>
-using UnsignedOfSize = typename SelectType
-<
-    N,
-    uint8_t,
-    uint16_t,
-    uint32_t,
-    uint64_t,
-    __uint128_t
->::Result;
-
-/// Holds the result of dividing an unsigned N-byte variable by 10^N resulting in
-template <size_t N>
-struct QuotientAndRemainder
-{
-    UnsignedOfSize<N> quotient; // quotient with fewer than 2*N decimal digits
-    UnsignedOfSize<N / 2> remainder; // remainder with at most N decimal digits
-};
-
-template <size_t N>
-QuotientAndRemainder<N> static inline split(UnsignedOfSize<N> value)
-{
-    constexpr DivisionBy10PowN<N> division;
-
-    UnsignedOfSize<N> quotient = (division.multiplier * (UnsignedOfSize<2 * N>(value) + division.add)) >> division.shift;
-    UnsignedOfSize<N / 2> remainder = static_cast<UnsignedOfSize<N / 2>>(value - quotient * pow10<UnsignedOfSize<N / 2>>(N));
-
-    return {quotient, remainder};
-}
-
-
-static inline char * outDigit(char * p, uint8_t value)
-{
-    *p = '0' + value;
-    ++p;
-    return p;
-}
-
-// Using a lookup table to convert binary numbers from 0 to 99
-// into ascii characters as described by Andrei Alexandrescu in
-// https://www.facebook.com/notes/facebook-engineering/three-optimization-tips-for-c/10151361643253920/
-
-static const char digits[201] = "00010203040506070809"
-                                "10111213141516171819"
-                                "20212223242526272829"
-                                "30313233343536373839"
-                                "40414243444546474849"
-                                "50515253545556575859"
-                                "60616263646566676869"
-                                "70717273747576777879"
-                                "80818283848586878889"
-                                "90919293949596979899";
-
-static inline char * outTwoDigits(char * p, uint8_t value)
-{
-    memcpy(p, &digits[value * 2], 2);
-    p += 2;
-    return p;
-}
-
-
-namespace convert
-{
-    template <typename UInt, size_t N = sizeof(UInt)> static char * head(char * p, UInt u);
-    template <typename UInt, size_t N = sizeof(UInt)> static char * tail(char * p, UInt u);
-
-    //===----------------------------------------------------------===//
-    //     head: find most significant digit, skip leading zeros
-    //===----------------------------------------------------------===//
-
-    // "x" contains quotient and remainder after division by 10^N
-    // quotient is less than 10^N
-    template <size_t N>
-    static inline char * head(char * p, QuotientAndRemainder<N> x)
-    {
-        p = head(p, UnsignedOfSize<N / 2>(x.quotient));
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    // "u" is less than 10^2*N
-    template <typename UInt, size_t N>
-    static inline char * head(char * p, UInt u)
-    {
-        return u < pow10<UnsignedOfSize<N>>(N)
-            ? head(p, UnsignedOfSize<N / 2>(u))
-            : head<N>(p, split<N>(u));
-    }
-
-    // recursion base case, selected when "u" is one byte
-    template <>
-    inline char * head<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        return u < 10
-            ? outDigit(p, u)
-            : outTwoDigits(p, u);
-    }
-
-    //===----------------------------------------------------------===//
-    //     tail: produce all digits including leading zeros
-    //===----------------------------------------------------------===//
-
-    // recursive step, "u" is less than 10^2*N
-    template <typename UInt, size_t N>
-    static inline char * tail(char * p, UInt u)
-    {
-        QuotientAndRemainder<N> x = split<N>(u);
-        p = tail(p, UnsignedOfSize<N / 2>(x.quotient));
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    // recursion base case, selected when "u" is one byte
-    template <>
-    inline char * tail<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        return outTwoDigits(p, u);
-    }
-
-    //===----------------------------------------------------------===//
-    // large values are >= 10^2*N
-    // where x contains quotient and remainder after division by 10^N
-    //===----------------------------------------------------------===//
-
-    template <size_t N>
-    static inline char * large(char * p, QuotientAndRemainder<N> x)
-    {
-        QuotientAndRemainder<N> y = split<N>(x.quotient);
-        p = head(p, UnsignedOfSize<N / 2>(y.quotient));
-        p = tail(p, y.remainder);
-        p = tail(p, x.remainder);
-        return p;
-    }
-
-    //===----------------------------------------------------------===//
-    // handle values of "u" that might be >= 10^2*N
-    // where N is the size of "u" in bytes
-    //===----------------------------------------------------------===//
-
-    template <typename UInt, size_t N = sizeof(UInt)>
-    static inline char * uitoa(char * p, UInt u)
-    {
-        if (u < pow10<UnsignedOfSize<N>>(N))
-            return head(p, UnsignedOfSize<N / 2>(u));
-        QuotientAndRemainder<N> x = split<N>(u);
-
-        return u < pow10<UnsignedOfSize<N>>(2 * N)
-            ? head<N>(p, x)
-            : large<N>(p, x);
-    }
-
-    // selected when "u" is one byte
-    template <>
-    inline char * uitoa<UnsignedOfSize<1>, 1>(char * p, UnsignedOfSize<1> u)
-    {
-        if (u < 10)
-            return outDigit(p, u);
-        else if (u < 100)
-            return outTwoDigits(p, u);
-        else
-        {
-            p = outDigit(p, u / 100);
-            p = outTwoDigits(p, u % 100);
-            return p;
-        }
-    }
-
-    //===----------------------------------------------------------===//
-    //     handle unsigned and signed integral operands
-    //===----------------------------------------------------------===//
-
-    // itoa: handle unsigned integral operands (selected by SFINAE)
-    template <typename U, std::enable_if_t<!std::is_signed_v<U> && std::is_integral_v<U>> * = nullptr>
-    static inline char * itoa(U u, char * p)
-    {
-        return convert::uitoa(p, u);
-    }
-
-    // itoa: handle signed integral operands (selected by SFINAE)
-    template <typename I, size_t N = sizeof(I), std::enable_if_t<std::is_signed_v<I> && std::is_integral_v<I>> * = nullptr>
-    static inline char * itoa(I i, char * p)
-    {
-        // Need "mask" to be filled with a copy of the sign bit.
-        // If "i" is a negative value, then the result of "operator >>"
-        // is implementation-defined, though usually it is an arithmetic
-        // right shift that replicates the sign bit.
-        // Use a conditional expression to be portable,
-        // a good optimizing compiler generates an arithmetic right shift
-        // and avoids the conditional branch.
-        UnsignedOfSize<N> mask = i < 0 ? ~UnsignedOfSize<N>(0) : 0;
-        // Now get the absolute value of "i" and cast to unsigned type UnsignedOfSize<N>.
-        // Cannot use std::abs() because the result is undefined
-        // in 2's complement systems for the most-negative value.
-        // Want to avoid conditional branch for performance reasons since
-        // CPU branch prediction will be ineffective when negative values
-        // occur randomly.
-        // Let "u" be "i" cast to unsigned type UnsignedOfSize<N>.
-        // Subtract "u" from 2*u if "i" is positive or 0 if "i" is negative.
-        // This yields the absolute value with the desired type without
-        // using a conditional branch and without invoking undefined or
-        // implementation defined behavior:
-        UnsignedOfSize<N> u = ((2 * UnsignedOfSize<N>(i)) & ~mask) - UnsignedOfSize<N>(i);
-        // Unconditionally store a minus sign when producing digits
-        // in a forward direction and increment the pointer only if
-        // the value is in fact negative.
-        // This avoids a conditional branch and is safe because we will
-        // always produce at least one digit and it will overwrite the
-        // minus sign when the value is not negative.
-        *p = '-';
-        p += (mask & 1);
-        p = convert::uitoa(p, u);
-        return p;
-    }
-}
-
-
-template <typename T>
-static inline char * writeUIntText(T x, char * p)
-{
-    static_assert(is_unsigned_v<T>);
-
-    int len = digits10(x);
-    auto * pp = p + len;
-    while (x >= 100)
-    {
-        const auto i = x % 100;
-        x /= 100;
-        pp -= 2;
-        outTwoDigits(pp, i);
-    }
-    if (x < 10)
-        *p = '0' + x;
-    else
-        outTwoDigits(p, x);
-    return p + len;
-}
-
-static inline char * writeLeadingMinus(char * pos)
-{
-    *pos = '-';
-    return pos + 1;
-}
-
-template <typename T>
-static inline char * writeSIntText(T x, char * pos)
-{
-    static_assert(std::is_same_v<T, Int128> || std::is_same_v<T, Int256>);
-
-    using UnsignedT = make_unsigned_t<T>;
-    static constexpr T min_int = UnsignedT(1) << (sizeof(T) * 8 - 1);
-
-    if (unlikely(x == min_int))
-    {
-        if constexpr (std::is_same_v<T, Int128>)
-        {
-            const char * res = "-170141183460469231731687303715884105728";
-            memcpy(pos, res, strlen(res));
-            return pos + strlen(res);
-        }
-        else if constexpr (std::is_same_v<T, Int256>)
-        {
-            const char * res = "-57896044618658097711785492504343953926634992332820282019728792003956564819968";
-            memcpy(pos, res, strlen(res));
-            return pos + strlen(res);
-        }
-    }
-
-    if (x < 0)
-    {
-        x = -x;
-        pos = writeLeadingMinus(pos);
-    }
-    return writeUIntText(UnsignedT(x), pos);
-}
-
-}
-
-template <typename I>
-char * itoa(I i, char * p)
-{
-    return impl::convert::itoa(i, p);
-}
-
-template <>
-inline char * itoa(char8_t i, char * p)
-{
-    return impl::convert::itoa(uint8_t(i), p);
-}
-
-template <>
-inline char * itoa(UInt128 i, char * p)
-{
-    return impl::writeUIntText(i, p);
-}
-
-template <>
-inline char * itoa(Int128 i, char * p)
-{
-    return impl::writeSIntText(i, p);
-}
-
-template <>
-inline char * itoa(UInt256 i, char * p)
-{
-    return impl::writeUIntText(i, p);
-}
-
-template <>
-inline char * itoa(Int256 i, char * p)
-{
-    return impl::writeSIntText(i, p);
-}
+#undef FOR_INTEGER_TYPES
+#undef INSTANTIATION
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -173,16 +173,15 @@ function fuzz

    mkdir -p /var/run/clickhouse-server

-    # NOTE: we use process substitution here to preserve keep $! as a pid of clickhouse-server
-    # server.log -> CH logs
-    # stderr.log -> Process logs (sanitizer)
+    # server.log -> All server logs, including sanitizer
+    # stderr.log -> Process logs (sanitizer) only
    clickhouse-server \
        --config-file db/config.xml \
        --pid-file /var/run/clickhouse-server/clickhouse-server.pid \
        --  --path db \
            --logger.console=0 \
-            --logger.log=server.log > stderr.log 2>&1 &
-    server_pid=$!
+            --logger.log=server.log 2>&1 | tee -a stderr.log >> server.log 2>&1 &
+    server_pid=$(pidof clickhouse-server)

    kill -0 $server_pid

@ -310,7 +309,7 @@ quit
    if [ "$server_died" == 1 ]
    then
        # The server has died.
-        if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log stderr.log > description.txt
+        if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt
        then
            echo "Lost connection to server. See the logs." > description.txt
        fi
--- a/docker/test/integration/runner/Dockerfile
+++ b/docker/test/integration/runner/Dockerfile
@ -126,7 +126,6 @@ RUN set -x \

 COPY modprobe.sh /usr/local/bin/modprobe
 COPY dockerd-entrypoint.sh /usr/local/bin/
-COPY compose/ /compose/
 COPY misc/ /misc/


--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -51,10 +51,7 @@ fi
 config_logs_export_cluster /etc/clickhouse-server/config.d/system_logs_export.yaml

 if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; then
-    sudo cat /etc/clickhouse-server/config.d/zookeeper.xml \
-    | sed "/<use_compression>1<\/use_compression>/d" \
-    > /etc/clickhouse-server/config.d/zookeeper.xml.tmp
-    sudo mv /etc/clickhouse-server/config.d/zookeeper.xml.tmp /etc/clickhouse-server/config.d/zookeeper.xml
+    sudo sed -i "/<use_compression>1<\/use_compression>/d" /etc/clickhouse-server/config.d/zookeeper.xml

    # it contains some new settings, but we can safely remove it
    rm /etc/clickhouse-server/config.d/handlers.yaml
@ -62,23 +59,14 @@ if [[ -n "$BUGFIX_VALIDATE_CHECK" ]] && [[ "$BUGFIX_VALIDATE_CHECK" -eq 1 ]]; th
    rm /etc/clickhouse-server/config.d/zero_copy_destructive_operations.xml

    #todo: remove these after 24.3 released.
-    sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
-      | sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
-      > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
-    sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
+    sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml

    #todo: remove these after 24.3 released.
-    sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
-      | sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
-      > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
-    sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+    sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml

    function remove_keeper_config()
    {
-        sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
-          | sed "/<$1>$2<\/$1>/d" \
-          > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
-        sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+        sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
    }
    # commit_logs_cache_size_threshold setting doesn't exist on some older versions
    remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"
@ -113,25 +101,13 @@ else
 fi

 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
-    sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
-    | sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" \
-    > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_1/</filesystem_caches_path>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml

-    sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
-    | sed "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" \
-    > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches/</filesystem_caches_path>|<filesystem_caches_path>/var/lib/clickhouse/filesystem_caches_2/</filesystem_caches_path>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml

-    sudo cat /etc/clickhouse-server1/config.d/filesystem_caches_path.xml \
-    | sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" \
-    > /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server1/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server1/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_1/</custom_cached_disks_base_directory>|" /etc/clickhouse-server1/config.d/filesystem_caches_path.xml

-    sudo cat /etc/clickhouse-server2/config.d/filesystem_caches_path.xml \
-    | sed "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" \
-    > /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp
-    mv /etc/clickhouse-server2/config.d/filesystem_caches_path.xml.tmp /etc/clickhouse-server2/config.d/filesystem_caches_path.xml
+    sudo sed -i "s|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches/</custom_cached_disks_base_directory>|<custom_cached_disks_base_directory replace=\"replace\">/var/lib/clickhouse/filesystem_caches_2/</custom_cached_disks_base_directory>|" /etc/clickhouse-server2/config.d/filesystem_caches_path.xml

    mkdir -p /var/run/clickhouse-server1
    sudo chown clickhouse:clickhouse /var/run/clickhouse-server1
--- a/docker/test/upgrade/run.sh
+++ b/docker/test/upgrade/run.sh
@ -67,10 +67,7 @@ configure

 function remove_keeper_config()
 {
-  sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
-    | sed "/<$1>$2<\/$1>/d" \
-    > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
-  sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+  sudo sed -i "/<$1>$2<\/$1>/d" /etc/clickhouse-server/config.d/keeper_port.xml
 }

 # async_replication setting doesn't exist on some older versions
@ -80,16 +77,10 @@ remove_keeper_config "async_replication" "1"
 remove_keeper_config "create_if_not_exists" "[01]"

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
-  | sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
-  > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
+sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
-  | sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
-  > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml

 # latest_logs_cache_size_threshold setting doesn't exist on some older versions
 remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
@ -120,22 +111,13 @@ export ZOOKEEPER_FAULT_INJECTION=0
 configure

 # force_sync=false doesn't work correctly on some older versions
-sudo cat /etc/clickhouse-server/config.d/keeper_port.xml \
-  | sed "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" \
-  > /etc/clickhouse-server/config.d/keeper_port.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/keeper_port.xml.tmp /etc/clickhouse-server/config.d/keeper_port.xml
+sudo sed -i "s|<force_sync>false</force_sync>|<force_sync>true</force_sync>|" /etc/clickhouse-server/config.d/keeper_port.xml

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/azure_storage_conf.xml \
-  | sed "s|<object_storage_type>azure|<object_storage_type>azure_blob_storage|" \
-  > /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/azure_storage_conf.xml.tmp /etc/clickhouse-server/config.d/azure_storage_conf.xml
+sudo sed -i "s|<object_storage_type>azure<|<object_storage_type>azure_blob_storage<|" /etc/clickhouse-server/config.d/azure_storage_conf.xml

 #todo: remove these after 24.3 released.
-sudo cat /etc/clickhouse-server/config.d/storage_conf.xml \
-  | sed "s|<object_storage_type>local|<object_storage_type>local_blob_storage|" \
-  > /etc/clickhouse-server/config.d/storage_conf.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/storage_conf.xml.tmp /etc/clickhouse-server/config.d/storage_conf.xml
+sudo sed -i "s|<object_storage_type>local<|<object_storage_type>local_blob_storage<|" /etc/clickhouse-server/config.d/storage_conf.xml

 # async_replication setting doesn't exist on some older versions
 remove_keeper_config "async_replication" "1"
@ -150,10 +132,7 @@ remove_keeper_config "latest_logs_cache_size_threshold" "[[:digit:]]\+"
 remove_keeper_config "commit_logs_cache_size_threshold" "[[:digit:]]\+"

 # But we still need default disk because some tables loaded only into it
-sudo cat /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml \
-  | sed "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" \
-  > /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp
-mv /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml.tmp /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
+sudo sed -i "s|<main><disk>s3</disk></main>|<main><disk>s3</disk></main><default><disk>default</disk></default>|" /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
 sudo chown clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml
 sudo chgrp clickhouse /etc/clickhouse-server/config.d/s3_storage_policy_by_default.xml

@ -256,10 +235,7 @@ then
 fi

 # Just in case previous version left some garbage in zk
-sudo cat /etc/clickhouse-server/config.d/lost_forever_check.xml \
-  | sed "s|>1<|>0<|g" \
-  > /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp
-sudo mv /etc/clickhouse-server/config.d/lost_forever_check.xml.tmp /etc/clickhouse-server/config.d/lost_forever_check.xml
+sudo sed -i "s|>1<|>0<|g" /etc/clickhouse-server/config.d/lost_forever_check.xml \
 rm /etc/clickhouse-server/config.d/filesystem_caches_path.xml

 start 500
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -933,9 +933,9 @@ Hard limit is configured via system tools

 ## database_atomic_delay_before_drop_table_sec {#database_atomic_delay_before_drop_table_sec}

-The delay before a table data is dropped in seconds. If the `DROP TABLE` query has a `SYNC` modifier, this setting is ignored.
+Sets the delay before remove table data in seconds. If the query has `SYNC` modifier, this setting is ignored.

-Default value: `480` (8 minutes).
+Default value: `480` (8 minute).

 ## database_catalog_unused_dir_hide_timeout_sec {#database_catalog_unused_dir_hide_timeout_sec}

--- a/docs/en/sql-reference/statements/undrop.md
+++ b/docs/en/sql-reference/statements/undrop.md
@ -13,6 +13,13 @@ a system table called `system.dropped_tables`.

 If you have a materialized view without a `TO` clause associated with the dropped table, then you will also have to UNDROP the inner table of that view.

+:::note
+UNDROP TABLE is experimental.  To use it add this setting: 
+```sql
+set allow_experimental_undrop_table_query = 1;
+```
+:::
+
 :::tip
 Also see [DROP TABLE](/docs/en/sql-reference/statements/drop.md)
 :::
@ -25,53 +32,60 @@ UNDROP TABLE [db.]name [UUID '<uuid>'] [ON CLUSTER cluster]

 **Example**

+``` sql
+set allow_experimental_undrop_table_query = 1;
+```
+
 ```sql
-CREATE TABLE tab
+CREATE TABLE undropMe
 (
    `id` UInt8
 )
 ENGINE = MergeTree
-ORDER BY id;
-
-DROP TABLE tab;
-
-SELECT *
-FROM system.dropped_tables
-FORMAT Vertical;
+ORDER BY id
 ```

+```sql
+DROP TABLE undropMe
+```
+```sql
+SELECT *
+FROM system.dropped_tables
+FORMAT Vertical
+```
 ```response
 Row 1:
 ──────
 index:                 0
 database:              default
-table:                 tab
+table:                 undropMe
 uuid:                  aa696a1a-1d70-4e60-a841-4c80827706cc
 engine:                MergeTree
-metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.tab.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
+metadata_dropped_path: /var/lib/clickhouse/metadata_dropped/default.undropMe.aa696a1a-1d70-4e60-a841-4c80827706cc.sql
 table_dropped_time:    2023-04-05 14:12:12

 1 row in set. Elapsed: 0.001 sec. 
 ```
-
 ```sql
-UNDROP TABLE tab;
-
+UNDROP TABLE undropMe
+```
+```response
+Ok.
+```
+```sql
 SELECT *
 FROM system.dropped_tables
-FORMAT Vertical;
-
+FORMAT Vertical
+```
 ```response
 Ok.

 0 rows in set. Elapsed: 0.001 sec. 
 ```
-
 ```sql
-DESCRIBE TABLE tab
-FORMAT Vertical;
+DESCRIBE TABLE undropMe
+FORMAT Vertical
 ```
-
 ```response
 Row 1:
 ──────
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -34,6 +34,7 @@
 #include <Common/StudentTTest.h>
 #include <Common/CurrentMetrics.h>
 #include <Common/ErrorCodes.h>
+#include <Core/BaseSettingsProgramOptions.h>


 /** A tool for evaluating ClickHouse performance.
@ -623,7 +624,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
        ;

        Settings settings;
-        settings.addProgramOptions(desc);
+        addProgramOptions(settings, desc);

        boost::program_options::variables_map options;
        boost::program_options::store(boost::program_options::parse_command_line(argc, argv, desc), options);
--- a/programs/format/Format.cpp
+++ b/programs/format/Format.cpp
@ -17,6 +17,7 @@
 #include <Common/ErrorCodes.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/TerminalSize.h>
+#include <Core/BaseSettingsProgramOptions.h>

 #include <Interpreters/Context.h>
 #include <Functions/FunctionFactory.h>
@ -102,7 +103,7 @@ int mainEntryClickHouseFormat(int argc, char ** argv)
        {
            std::string_view name = field.getName();
            if (name == "max_parser_depth" || name == "max_query_size")
-                cmd_settings.addProgramOption(desc, name, field);
+                addProgramOption(cmd_settings, desc, name, field);
        }

        boost::program_options::variables_map options;
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -36,6 +36,7 @@
 #include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Common/getExecutablePath.h>
 #include <Common/ProfileEvents.h>
+#include <Common/Scheduler/IResourceManager.h>
 #include <Common/ThreadProfileEvents.h>
 #include <Common/ThreadStatus.h>
 #include <Common/getMappedArea.h>
--- a/src/Access/Common/QuotaDefs.cpp
+++ b/src/Access/Common/QuotaDefs.cpp
@ -2,6 +2,7 @@
 #include <Common/Exception.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <base/range.h>

 #include <boost/algorithm/string/case_conv.hpp>
 #include <boost/algorithm/string/classification.hpp>
--- a/src/Access/IAccessStorage.h
+++ b/src/Access/IAccessStorage.h
@ -13,6 +13,8 @@
 #include <optional>
 #include <vector>

+#include <boost/noncopyable.hpp>
+

 namespace Poco { class Logger; }
 namespace Poco::Net { class IPAddress; }
--- a/src/Access/User.cpp
+++ b/src/Access/User.cpp
@ -1,4 +1,5 @@
 #include <Access/User.h>
+#include <Common/StringUtils/StringUtils.h>
 #include <Core/Protocol.h>
 #include <base/insertAtEnd.h>

--- a/src/AggregateFunctions/AggregateFunctionRetention.cpp
+++ b/src/AggregateFunctions/AggregateFunctionRetention.cpp
@ -1,16 +1,16 @@
 #include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/IAggregateFunction.h>
 #include <AggregateFunctions/FactoryHelpers.h>
-
-#include <unordered_set>
 #include <Columns/ColumnArray.h>
 #include <Common/assert_cast.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
-#include <bitset>
+#include <base/range.h>

-#include <AggregateFunctions/IAggregateFunction.h>
+#include <bitset>
+#include <unordered_set>


 namespace DB
--- a/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
+++ b/src/AggregateFunctions/AggregateFunctionSequenceMatch.cpp
@ -10,6 +10,8 @@
 #include <Common/assert_cast.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <base/range.h>
+
 #include <bitset>
 #include <stack>

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -48,6 +48,7 @@
 #include <Interpreters/SelectQueryOptions.h>
 #include <Interpreters/Set.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/InterpreterSelectQueryAnalyzer.h>

@ -776,6 +777,7 @@ struct IdentifierResolveScope
    std::unordered_map<QueryTreeNodePtr, TableExpressionData> table_expression_node_to_data;

    QueryTreeNodePtrWithHashSet nullable_group_by_keys;
+    QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> nullable_join_columns;

    /// Use identifier lookup to result cache
    bool use_identifier_lookup_to_result_cache = true;
@ -1276,7 +1278,11 @@ private:
        return {};
    }

-    static void convertJoinedColumnTypeToNullIfNeeded(QueryTreeNodePtr & resolved_identifier, const JoinKind & join_kind, std::optional<JoinTableSide> resolved_side)
+    static QueryTreeNodePtr convertJoinedColumnTypeToNullIfNeeded(
+        const QueryTreeNodePtr & resolved_identifier,
+        const JoinKind & join_kind,
+        std::optional<JoinTableSide> resolved_side,
+        IdentifierResolveScope & scope)
    {
        if (resolved_identifier->getNodeType() == QueryTreeNodeType::COLUMN &&
            JoinCommon::canBecomeNullable(resolved_identifier->getResultType()) &&
@ -1284,9 +1290,20 @@ private:
            (isLeft(join_kind) && resolved_side && *resolved_side == JoinTableSide::Right) ||
            (isRight(join_kind) && resolved_side && *resolved_side == JoinTableSide::Left)))
        {
-            auto & resolved_column = resolved_identifier->as<ColumnNode &>();
-            resolved_column.setColumnType(makeNullableOrLowCardinalityNullable(resolved_column.getColumnType()));
+            auto nullable_resolved_identifier = resolved_identifier->clone();
+            auto & resolved_column = nullable_resolved_identifier->as<ColumnNode &>();
+            auto new_result_type = makeNullableOrLowCardinalityNullable(resolved_column.getColumnType());
+            resolved_column.setColumnType(new_result_type);
+            if (resolved_column.hasExpression())
+            {
+                auto & resolved_expression = resolved_column.getExpression();
+                if (!resolved_expression->getResultType()->equals(*new_result_type))
+                    resolved_expression = buildCastFunction(resolved_expression, new_result_type, scope.context, true);
+            }
+            scope.nullable_join_columns[nullable_resolved_identifier] = resolved_identifier;
+            return nullable_resolved_identifier;
        }
+        return nullptr;
    }

    /// Resolve identifier functions
@ -3258,6 +3275,32 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromTableExpression(const Id
    return {};
 }

+QueryTreeNodePtr checkIsMissedObjectJSONSubcolumn(const QueryTreeNodePtr & left_resolved_identifier,
+                                                  const QueryTreeNodePtr & right_resolved_identifier)
+{
+    if (left_resolved_identifier && right_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT
+        && right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
+    {
+        auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
+        auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
+        if (left_resolved_column.getValueStringRepresentation() == "NULL" && right_resolved_column.getValueStringRepresentation() == "NULL")
+            return left_resolved_identifier;
+    }
+    else if (left_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
+    {
+        auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
+        if (left_resolved_column.getValueStringRepresentation() == "NULL")
+            return left_resolved_identifier;
+    }
+    else if (right_resolved_identifier && right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
+    {
+        auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
+        if (right_resolved_column.getValueStringRepresentation() == "NULL")
+            return right_resolved_identifier;
+    }
+    return {};
+}
+
 QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLookup & identifier_lookup,
    const QueryTreeNodePtr & table_expression_node,
    IdentifierResolveScope & scope)
@ -3358,28 +3401,8 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo

    /// If columns from left or right table were missed Object(Nullable('json')) subcolumns, they will be replaced
    /// to ConstantNode(NULL), which can't be cast to ColumnNode, so we resolve it here.
-    if (left_resolved_identifier && right_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT
-        && right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
-    {
-        auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
-        auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
-        if (left_resolved_column.getValueStringRepresentation() == "NULL" && right_resolved_column.getValueStringRepresentation() == "NULL")
-            return left_resolved_identifier;
-    }
-    else if (left_resolved_identifier && left_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
-    {
-        resolved_side = JoinTableSide::Left;
-        auto & left_resolved_column = left_resolved_identifier->as<ConstantNode &>();
-        if (left_resolved_column.getValueStringRepresentation() == "NULL")
-            return left_resolved_identifier;
-    }
-    else if (right_resolved_identifier && right_resolved_identifier->getNodeType() == QueryTreeNodeType::CONSTANT)
-    {
-        resolved_side = JoinTableSide::Right;
-        auto & right_resolved_column = right_resolved_identifier->as<ConstantNode &>();
-        if (right_resolved_column.getValueStringRepresentation() == "NULL")
-            return right_resolved_identifier;
-    }
+    if (auto missed_subcolumn_identifier = checkIsMissedObjectJSONSubcolumn(left_resolved_identifier, right_resolved_identifier))
+        return missed_subcolumn_identifier;

    if (left_resolved_identifier && right_resolved_identifier)
    {
@ -3521,8 +3544,9 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveIdentifierFromJoin(const IdentifierLoo

    if (scope.join_use_nulls)
    {
-        resolved_identifier = resolved_identifier->clone();
-        convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side);
+        auto nullable_resolved_identifier = convertJoinedColumnTypeToNullIfNeeded(resolved_identifier, join_kind, resolved_side, scope);
+        if (nullable_resolved_identifier)
+            resolved_identifier = nullable_resolved_identifier;
    }

    return resolved_identifier;
@ -4402,6 +4426,28 @@ QueryAnalyzer::QueryTreeNodesWithNames QueryAnalyzer::resolveUnqualifiedMatcher(
                    const auto & join_using_column_nodes_list = join_using_column_node.getExpressionOrThrow()->as<ListNode &>();
                    const auto & join_using_column_nodes = join_using_column_nodes_list.getNodes();

+                    /** If column doesn't exists in the table, then do not match column from USING clause.
+                      * Example: SELECT a + 1 AS id, * FROM (SELECT 1 AS a) AS t1 JOIN (SELECT 2 AS id) AS t2 USING (id);
+                      * In this case `id` is not present in the left table expression,
+                      * so asterisk should return `id` from the right table expression.
+                      */
+                    auto is_column_from_parent_scope = [&scope](const QueryTreeNodePtr & using_node_from_table)
+                    {
+                        const auto & using_column_from_table = using_node_from_table->as<ColumnNode &>();
+                        auto table_expression_data_it = scope.table_expression_node_to_data.find(using_column_from_table.getColumnSource());
+                        if (table_expression_data_it != scope.table_expression_node_to_data.end())
+                        {
+                            const auto & table_expression_data = table_expression_data_it->second;
+                            const auto & column_name = using_column_from_table.getColumnName();
+                            return !table_expression_data.column_name_to_column_node.contains(column_name);
+                        }
+                        return false;
+                    };
+
+                    if (is_column_from_parent_scope(join_using_column_nodes.at(0)) ||
+                        is_column_from_parent_scope(join_using_column_nodes.at(1)))
+                        continue;
+
                    QueryTreeNodePtr matched_column_node;

                    if (isRight(join_node->getKind()))
@ -4523,7 +4569,15 @@ ProjectionNames QueryAnalyzer::resolveMatcher(QueryTreeNodePtr & matcher_node, I
            for (auto & [node, node_name] : matched_expression_nodes_with_names)
            {
                auto join_identifier_side = getColumnSideFromJoinTree(node, *nearest_scope_join_node);
-                convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side);
+                auto projection_name_it = node_to_projection_name.find(node);
+                auto nullable_node = convertJoinedColumnTypeToNullIfNeeded(node, nearest_scope_join_node->getKind(), join_identifier_side, scope);
+                if (nullable_node)
+                {
+                    node = nullable_node;
+                    /// Set the same projection name for new nullable node
+                    if (projection_name_it != node_to_projection_name.end())
+                        node_to_projection_name.emplace(node, projection_name_it->second);
+                }
            }
        }
    }
@ -7304,8 +7358,64 @@ void QueryAnalyzer::resolveJoin(QueryTreeNodePtr & join_node, IdentifierResolveS

            join_using_identifiers.insert(identifier_full_name);

+            const auto & settings = scope.context->getSettingsRef();
+
+            /** While resolving JOIN USING identifier, try to resolve identifier from parent subquery projection.
+              * Example: SELECT a + 1 AS b FROM (SELECT 1 AS a) t1 JOIN (SELECT 2 AS b) USING b
+              * In this case `b` is not in the left table expression, but it is in the parent subquery projection.
+              */
+            auto try_resolve_identifier_from_query_projection = [this](const String & identifier_full_name_,
+                                                                       const QueryTreeNodePtr & left_table_expression,
+                                                                       const IdentifierResolveScope & scope_) -> QueryTreeNodePtr
+            {
+                const QueryNode * query_node = scope_.scope_node ? scope_.scope_node->as<QueryNode>() : nullptr;
+                if (!query_node)
+                    return nullptr;
+
+                const auto & projection_list = query_node->getProjection();
+                for (const auto & projection_node : projection_list.getNodes())
+                {
+                    if (projection_node->hasAlias() && identifier_full_name_ == projection_node->getAlias())
+                    {
+                        auto left_subquery = std::make_shared<QueryNode>(query_node->getMutableContext());
+                        left_subquery->getProjection().getNodes().push_back(projection_node->clone());
+                        left_subquery->getJoinTree() = left_table_expression;
+
+                        IdentifierResolveScope left_subquery_scope(left_subquery, nullptr /*parent_scope*/);
+                        resolveQuery(left_subquery, left_subquery_scope);
+
+                        const auto & resolved_nodes = left_subquery->getProjection().getNodes();
+                        if (resolved_nodes.size() == 1)
+                        {
+                            /// Create ColumnNode with expression from parent projection
+                            return std::make_shared<ColumnNode>(
+                                NameAndTypePair{identifier_full_name_, resolved_nodes.front()->getResultType()},
+                                resolved_nodes.front(), left_table_expression);
+                        }
+                    }
+                }
+                return nullptr;
+            };
+
+            QueryTreeNodePtr result_left_table_expression = nullptr;
+            /** With `analyzer_compatibility_join_using_top_level_identifier` alias in projection has higher priority than column from left table.
+              * But if aliased expression cannot be resolved from left table, we get UNKNOW_IDENTIFIER error,
+              * despite the fact that column from USING could be resolved from left table.
+              * It's compatibility with a default behavior for old analyzer.
+              */
+            if (settings.analyzer_compatibility_join_using_top_level_identifier)
+                result_left_table_expression = try_resolve_identifier_from_query_projection(identifier_full_name, join_node_typed.getLeftTableExpression(), scope);
+
            IdentifierLookup identifier_lookup{identifier_node->getIdentifier(), IdentifierLookupContext::EXPRESSION};
-            auto result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getLeftTableExpression(), scope);
+            if (!result_left_table_expression)
+                result_left_table_expression = tryResolveIdentifierFromJoinTreeNode(identifier_lookup, join_node_typed.getLeftTableExpression(), scope);
+
+            /// Here we may try to resolve identifier from projection in case it's not resolved from left table expression
+            /// and analyzer_compatibility_join_using_top_level_identifier is disabled.
+            /// For now we do not do this, because not all corner cases are clear.
+            /// if (!settings.analyzer_compatibility_join_using_top_level_identifier && !result_left_table_expression)
+            ///     result_left_table_expression = try_resolve_identifier_from_query_projection(identifier_full_name, join_node_typed.getLeftTableExpression(), scope);
+
            if (!result_left_table_expression)
                throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
                    "JOIN {} using identifier '{}' cannot be resolved from left table expression. In scope {}",
@ -7448,6 +7558,29 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
    scope.table_expressions_in_resolve_process.erase(join_tree_node.get());
 }

+class ReplaceColumnsVisitor : public InDepthQueryTreeVisitor<ReplaceColumnsVisitor>
+{
+public:
+    explicit ReplaceColumnsVisitor(const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map_, const ContextPtr & context_)
+        : replacement_map(replacement_map_)
+        , context(context_)
+    {}
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        if (auto it = replacement_map.find(node); it != replacement_map.end())
+            node = it->second;
+        if (auto * function_node = node->as<FunctionNode>())
+            rerunFunctionResolve(function_node, context);
+    }
+
+    bool shouldTraverseTopToBottom() const { return false; }
+
+private:
+    const QueryTreeNodePtrWithHashMap<QueryTreeNodePtr> & replacement_map;
+    const ContextPtr & context;
+};
+
 /** Resolve query.
  * This function modifies query node during resolve. It is caller responsibility to clone query node before resolve
  * if it is needed for later use.
@ -7635,21 +7768,23 @@ void QueryAnalyzer::resolveQuery(const QueryTreeNodePtr & query_node, Identifier
                scope.scope_node->formatASTForErrorMessage());
    }

-    if (query_node_typed.getPrewhere())
+    if (auto & prewhere_node = query_node_typed.getPrewhere())
    {
-        /** Expression in PREWHERE with JOIN should not be modified by join_use_nulls.
-          * Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE a = 1
-          * Column `a` should be resolved from table and should not change its type to Nullable.
-          */
-        bool join_use_nulls = scope.join_use_nulls;
-        bool use_identifier_lookup_to_result_cache = scope.use_identifier_lookup_to_result_cache;
-        scope.join_use_nulls = false;
-        scope.use_identifier_lookup_to_result_cache = false;
+        resolveExpressionNode(prewhere_node, scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);

-        resolveExpressionNode(query_node_typed.getPrewhere(), scope, false /*allow_lambda_expression*/, false /*allow_table_expression*/);
-
-        scope.join_use_nulls = join_use_nulls;
-        scope.use_identifier_lookup_to_result_cache = use_identifier_lookup_to_result_cache;
+        if (scope.join_use_nulls)
+        {
+            /** Expression in PREWHERE with JOIN should not be modified by join_use_nulls.
+              * Example: SELECT * FROM t1 JOIN t2 USING (id) PREWHERE b = 1
+              * Column `a` should be resolved from table and should not change its type to Nullable.
+              * More complicated example when column is somewhere inside an expression:
+              * SELECT a + 1 as b FROM t1 JOIN t2 USING (id) PREWHERE b = 1
+              * expression `a + 1 as b` in projection and in PREWHERE should have different `a`.
+              */
+            prewhere_node = prewhere_node->clone();
+            ReplaceColumnsVisitor replace_visitor(scope.nullable_join_columns, scope.context);
+            replace_visitor.visit(prewhere_node);
+        }
    }

    if (query_node_typed.getWhere())
--- a/src/Backups/BackupEntriesCollector.cpp
+++ b/src/Backups/BackupEntriesCollector.cpp
@ -6,6 +6,7 @@
 #include <Backups/DDLAdjustingForBackupVisitor.h>
 #include <Databases/IDatabase.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/formatAST.h>
 #include <Storages/IStorage.h>
--- a/src/Backups/BackupIO_Default.h
+++ b/src/Backups/BackupIO_Default.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Backups/BackupIO.h>
+#include <Common/Logger.h>
 #include <IO/ReadSettings.h>
 #include <IO/WriteSettings.h>

--- a/src/Backups/BackupIO_Disk.h
+++ b/src/Backups/BackupIO_Disk.h
@ -1,7 +1,9 @@
 #pragma once

 #include <Backups/BackupIO_Default.h>
+#include <Common/Logger.h>
 #include <Disks/DiskType.h>
+
 #include <filesystem>


--- a/src/Backups/BackupIO_S3.h
+++ b/src/Backups/BackupIO_S3.h
@ -4,6 +4,7 @@

 #if USE_AWS_S3
 #include <Backups/BackupIO_Default.h>
+#include <Common/Logger.h>
 #include <Disks/DiskType.h>
 #include <IO/S3Common.h>
 #include <Storages/StorageS3Settings.h>
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -9,6 +9,7 @@
 #include <base/safeExit.h>
 #include <base/scope_guard.h>
 #include <Core/Block.h>
+#include <Core/BaseSettingsProgramOptions.h>
 #include <Core/Protocol.h>
 #include <Common/DateLUT.h>
 #include <Common/MemoryTracker.h>
@ -2714,9 +2715,9 @@ private:
 void ClientBase::parseAndCheckOptions(OptionsDescription & options_description, po::variables_map & options, Arguments & arguments)
 {
    if (allow_repeated_settings)
-        cmd_settings.addProgramOptionsAsMultitokens(options_description.main_description.value());
+        addProgramOptionsAsMultitokens(cmd_settings, options_description.main_description.value());
    else
-        cmd_settings.addProgramOptions(options_description.main_description.value());
+        addProgramOptions(cmd_settings, options_description.main_description.value());

    if (allow_merge_tree_settings)
    {
@ -2737,9 +2738,9 @@ void ClientBase::parseAndCheckOptions(OptionsDescription & options_description,
                    return;

                if (allow_repeated_settings)
-                    cmd_merge_tree_settings.addProgramOptionAsMultitoken(main_options, name, setting);
+                    addProgramOptionAsMultitoken(cmd_merge_tree_settings, main_options, name, setting);
                else
-                    cmd_merge_tree_settings.addProgramOption(main_options, name, setting);
+                    addProgramOption(cmd_merge_tree_settings, main_options, name, setting);
            };

            const auto & setting_name = setting.getName();
--- a/src/Client/LocalConnection.cpp
+++ b/src/Client/LocalConnection.cpp
@ -1,13 +1,14 @@
 #include "LocalConnection.h"
+#include <Core/Protocol.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/executeQuery.h>
 #include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Processors/Executors/PullingAsyncPipelineExecutor.h>
-#include <Processors/Executors/PushingPipelineExecutor.h>
 #include <Processors/Executors/PushingAsyncPipelineExecutor.h>
+#include <Processors/Executors/PushingPipelineExecutor.h>
 #include <Storages/IStorage.h>
 #include <Common/ConcurrentBoundedQueue.h>
 #include <Common/CurrentThread.h>
-#include <Core/Protocol.h>


 namespace DB
--- a/src/Common/HTTPConnectionPool.cpp
+++ b/src/Common/HTTPConnectionPool.cpp
@ -10,14 +10,15 @@
 #include <Common/MemoryTrackerSwitcher.h>
 #include <Common/SipHash.h>

-#include <Poco/Net/HTTPClientSession.h>
-#include <Poco/Net/HTTPStream.h>
-#include <Poco/Net/HTTPFixedLengthStream.h>
 #include <Poco/Net/HTTPChunkedStream.h>
+#include <Poco/Net/HTTPClientSession.h>
+#include <Poco/Net/HTTPFixedLengthStream.h>
+#include <Poco/Net/HTTPRequest.h>
+#include <Poco/Net/HTTPResponse.h>
+#include <Poco/Net/HTTPStream.h>
 #include <Poco/Timespan.h>

-#include <Poco/Net/HTTPResponse.h>
-#include <Poco/Net/HTTPRequest.h>
+#include <queue>

 #include "config.h"

--- a/src/Common/IntervalKind.cpp
+++ b/src/Common/IntervalKind.cpp
@ -1,6 +1,8 @@
 #include <Common/IntervalKind.h>
 #include <Common/Exception.h>

+#include <base/EnumReflection.h>
+

 namespace DB
 {
@ -10,6 +12,11 @@ namespace ErrorCodes
    extern const int BAD_ARGUMENTS;
 }

+std::string_view IntervalKind::toString() const
+{
+    return magic_enum::enum_name(kind);
+}
+
 Int64 IntervalKind::toAvgNanoseconds() const
 {
    static constexpr Int64 NANOSECONDS_PER_MICROSECOND = 1000;
--- a/src/Common/IntervalKind.h
+++ b/src/Common/IntervalKind.h
@ -1,7 +1,6 @@
 #pragma once

 #include <base/types.h>
-#include <base/EnumReflection.h>

 namespace DB
 {
@ -27,7 +26,7 @@ struct IntervalKind
    IntervalKind(Kind kind_ = Kind::Second) : kind(kind_) {} /// NOLINT
    operator Kind() const { return kind; } /// NOLINT

-    constexpr std::string_view toString() const { return magic_enum::enum_name(kind); }
+    std::string_view toString() const;

    /// Returns number of nanoseconds in one interval.
    /// For `Month`, `Quarter` and `Year` the function returns an average number of nanoseconds.
--- a/src/Common/Scheduler/IResourceManager.h
+++ b/src/Common/Scheduler/IResourceManager.h
@ -12,6 +12,9 @@
 namespace DB
 {

+class ISchedulerNode;
+using SchedulerNodePtr = std::shared_ptr<ISchedulerNode>;
+
 /*
 * Instance of derived class holds everything required for resource consumption,
 * including resources currently registered at `SchedulerRoot`. This is required to avoid
--- a/src/Common/Scheduler/ResouceLink.cpp
+++ b/src/Common/Scheduler/ResouceLink.cpp
@ -0,0 +1,25 @@
+#include <Common/Scheduler/ISchedulerQueue.h>
+#include <Common/Scheduler/ResourceLink.h>
+#include <Common/Scheduler/ResourceRequest.h>
+
+namespace DB
+{
+void ResourceLink::adjust(ResourceCost estimated_cost, ResourceCost real_cost) const
+{
+    if (queue)
+        queue->adjustBudget(estimated_cost, real_cost);
+}
+
+void ResourceLink::consumed(ResourceCost cost) const
+{
+    if (queue)
+        queue->consumeBudget(cost);
+}
+
+void ResourceLink::accumulate(DB::ResourceCost cost) const
+{
+    if (queue)
+        queue->accumulateBudget(cost);
+}
+}
+
--- a/src/Common/Scheduler/ResourceGuard.h
+++ b/src/Common/Scheduler/ResourceGuard.h
@ -2,9 +2,10 @@

 #include <base/types.h>

+#include <Common/Scheduler/ISchedulerConstraint.h>
+#include <Common/Scheduler/ISchedulerQueue.h>
 #include <Common/Scheduler/ResourceRequest.h>
 #include <Common/Scheduler/ResourceLink.h>
-#include <Common/Scheduler/ISchedulerConstraint.h>

 #include <condition_variable>
 #include <mutex>
--- a/src/Common/Scheduler/ResourceLink.h
+++ b/src/Common/Scheduler/ResourceLink.h
@ -2,12 +2,10 @@

 #include <base/types.h>

-#include <Common/Scheduler/ResourceRequest.h>
-#include <Common/Scheduler/ISchedulerQueue.h>
-
-
 namespace DB
 {
+class ISchedulerQueue;
+using ResourceCost = Int64;

 /*
 * Everything required for resource consumption. Connection to a specific resource queue.
@ -17,23 +15,11 @@ struct ResourceLink
    ISchedulerQueue * queue = nullptr;
    bool operator==(const ResourceLink &) const = default;

-    void adjust(ResourceCost estimated_cost, ResourceCost real_cost) const
-    {
-        if (queue)
-            queue->adjustBudget(estimated_cost, real_cost);
-    }
+    void adjust(ResourceCost estimated_cost, ResourceCost real_cost) const;

-    void consumed(ResourceCost cost) const
-    {
-        if (queue)
-            queue->consumeBudget(cost);
-    }
+    void consumed(ResourceCost cost) const;

-    void accumulate(ResourceCost cost) const
-    {
-        if (queue)
-            queue->accumulateBudget(cost);
-    }
+    void accumulate(ResourceCost cost) const;
 };

 }
--- a/src/Common/SymbolIndex.cpp
+++ b/src/Common/SymbolIndex.cpp
@ -2,6 +2,8 @@

 #include <Common/SymbolIndex.h>
 #include <Common/MemorySanitizer.h>
+#include <base/hex.h>
+#include <base/sort.h>

 #include <algorithm>
 #include <optional>
@ -11,8 +13,6 @@

 #include <filesystem>

-#include <base/sort.h>
-
 /**

 ELF object can contain three different places with symbol names and addresses:
--- a/src/Common/ThreadStatus.h
+++ b/src/Common/ThreadStatus.h
@ -1,12 +1,12 @@
 #pragma once

-#include <Core/SettingsEnums.h>
-#include <Interpreters/Context_fwd.h>
+#include <Core/LogsLevel.h>
 #include <IO/Progress.h>
+#include <Interpreters/Context_fwd.h>
+#include <base/StringRef.h>
 #include <Common/MemoryTracker.h>
 #include <Common/ProfileEvents.h>
 #include <Common/Stopwatch.h>
-#include <base/StringRef.h>

 #include <boost/noncopyable.hpp>

--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@ -1,13 +1,12 @@
 #pragma once

-#include <base/types.h>
-#include <cstring>
 #include <algorithm>
+#include <cstring>
 #include <type_traits>
 #include <utility>
-#include <base/range.h>
-#include <base/unaligned.h>
 #include <base/hex.h>
+#include <base/types.h>
+#include <base/unaligned.h>
 #include <Common/StringUtils/StringUtils.h>

 constexpr size_t IPV4_BINARY_LENGTH = 4;
--- a/src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp
+++ b/src/Common/tests/gtest_proxy_configuration_resolver_provider.cpp
@ -4,6 +4,8 @@
 #include <Common/tests/gtest_global_context.h>
 #include <Common/tests/gtest_helper_functions.h>

+#include <Poco/Util/MapConfiguration.h>
+
 using ConfigurationPtr = Poco::AutoPtr<Poco::Util::AbstractConfiguration>;

 class ProxyConfigurationResolverProviderTests : public ::testing::Test
--- a/src/Compression/CompressionFactory.cpp
+++ b/src/Compression/CompressionFactory.cpp
@ -7,6 +7,8 @@
 #include <Poco/String.h>
 #include <IO/ReadBuffer.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ExpressionElementParsers.h>
 #include <Compression/CompressionCodecMultiple.h>
 #include <Compression/CompressionCodecNone.h>
 #include <IO/WriteHelpers.h>
@ -44,6 +46,12 @@ CompressionCodecPtr CompressionCodecFactory::get(const String & family_name, std
    }
 }

+CompressionCodecPtr CompressionCodecFactory::get(const String & compression_codec) const
+{
+    ParserCodec codec_parser;
+    auto ast = parseQuery(codec_parser, "(" + Poco::toUpper(compression_codec) + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS);
+    return CompressionCodecFactory::instance().get(ast, nullptr);
+}

 CompressionCodecPtr CompressionCodecFactory::get(
    const ASTPtr & ast, const IDataType * column_type, CompressionCodecPtr current_default, bool only_generic) const
--- a/src/Compression/CompressionFactory.h
+++ b/src/Compression/CompressionFactory.h
@ -68,6 +68,9 @@ public:
    /// For backward compatibility with config settings
    CompressionCodecPtr get(const String & family_name, std::optional<int> level) const;

+    /// Get codec by name with optional params. Example: LZ4, ZSTD(3)
+    CompressionCodecPtr get(const String & compression_codec) const;
+
    /// Register codec with parameters and column type
    void registerCompressionCodecWithType(const String & family_name, std::optional<uint8_t> byte_code, CreatorWithType creator);
    /// Register codec with parameters
--- a/src/Core/BaseSettings.h
+++ b/src/Core/BaseSettings.h
@ -7,7 +7,6 @@
 #include <base/range.h>
 #include <boost/blank.hpp>
 #include <unordered_map>
-#include <boost/program_options/options_description.hpp>


 namespace boost::program_options
@ -129,18 +128,6 @@ public:
        std::conditional_t<Traits::allow_custom_settings, const CustomSettingMap::mapped_type*, boost::blank> custom_setting;
    };

-    /// Adds program options to set the settings from a command line.
-    /// (Don't forget to call notify() on the `variables_map` after parsing it!)
-    void addProgramOptions(boost::program_options::options_description & options);
-
-    /// Adds program options as to set the settings from a command line.
-    /// Allows to set one setting multiple times, the last value will be used.
-    /// (Don't forget to call notify() on the `variables_map` after parsing it!)
-    void addProgramOptionsAsMultitokens(boost::program_options::options_description & options);
-
-    void addProgramOption(boost::program_options::options_description & options, std::string_view name, const SettingFieldRef & field);
-    void addProgramOptionAsMultitoken(boost::program_options::options_description & options, std::string_view name, const SettingFieldRef & field);
-
    enum SkipFlags
    {
        SKIP_NONE = 0,
@ -561,57 +548,6 @@ String BaseSettings<TTraits>::toString() const
    return out.str();
 }

-template <typename TTraits>
-void BaseSettings<TTraits>::addProgramOptions(boost::program_options::options_description & options)
-{
-    const auto & settings_to_aliases = TTraits::settingsToAliases();
-    for (const auto & field : all())
-    {
-        std::string_view name = field.getName();
-        addProgramOption(options, name, field);
-
-        if (auto it = settings_to_aliases.find(name); it != settings_to_aliases.end())
-        {
-            for (const auto alias : it->second)
-                addProgramOption(options, alias, field);
-        }
-    }
-}
-
-template <typename TTraits>
-void BaseSettings<TTraits>::addProgramOptionsAsMultitokens(boost::program_options::options_description & options)
-{
-    const auto & settings_to_aliases = TTraits::settingsToAliases();
-    for (const auto & field : all())
-    {
-        std::string_view name = field.getName();
-        addProgramOptionAsMultitoken(options, name, field);
-
-        if (auto it = settings_to_aliases.find(name); it != settings_to_aliases.end())
-        {
-            for (const auto alias : it->second)
-                addProgramOptionAsMultitoken(options, alias, field);
-        }
-    }
-}
-
-
-template <typename TTraits>
-void BaseSettings<TTraits>::addProgramOption(boost::program_options::options_description & options, std::string_view name, const SettingFieldRef & field)
-{
-    auto on_program_option = boost::function1<void, const std::string &>([this, name](const std::string & value) { set(name, value); });
-    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
-        name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
-}
-
-template <typename TTraits>
-void BaseSettings<TTraits>::addProgramOptionAsMultitoken(boost::program_options::options_description & options, std::string_view name, const SettingFieldRef & field)
-{
-    auto on_program_option = boost::function1<void, const Strings &>([this, name](const Strings & values) { set(name, values.back()); });
-    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
-        name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
-}
-
 template <typename TTraits>
 bool operator==(const BaseSettings<TTraits> & left, const BaseSettings<TTraits> & right)
 {
--- a/src/Core/BaseSettingsProgramOptions.h
+++ b/src/Core/BaseSettingsProgramOptions.h
@ -0,0 +1,60 @@
+#pragma once
+
+#include <Core/Settings.h>
+#include <Core/Types_fwd.h>
+
+#include <boost/program_options.hpp>
+
+namespace DB
+{
+
+template <typename T>
+void addProgramOptionAsMultitoken(T &cmd_settings, boost::program_options::options_description & options, std::string_view name, const typename T::SettingFieldRef & field)
+{
+    auto on_program_option = boost::function1<void, const Strings &>([&cmd_settings, name](const Strings & values) { cmd_settings.set(name, values.back()); });
+    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
+            name.data(), boost::program_options::value<Strings>()->multitoken()->composing()->notifier(on_program_option), field.getDescription())));
+}
+
+template <typename T>
+void addProgramOptionsAsMultitokens(T &cmd_settings, boost::program_options::options_description & options)
+{
+    const auto & settings_to_aliases = T::Traits::settingsToAliases();
+    for (const auto & field : cmd_settings.all())
+    {
+        std::string_view name = field.getName();
+        addProgramOptionAsMultitoken(cmd_settings, options, name, field);
+
+        if (auto it = settings_to_aliases.find(name); it != settings_to_aliases.end())
+            for (const auto alias : it->second)
+                addProgramOptionAsMultitoken(cmd_settings, options, alias, field);
+    }
+}
+
+/// Adds program options to set the settings from a command line.
+/// (Don't forget to call notify() on the `variables_map` after parsing it!)
+template <typename T>
+void addProgramOption(T &cmd_settings, boost::program_options::options_description & options, std::string_view name, const typename T::SettingFieldRef  & field)
+{
+    auto on_program_option = boost::function1<void, const std::string &>([&cmd_settings, name](const std::string & value) { cmd_settings.set(name, value); });
+    options.add(boost::shared_ptr<boost::program_options::option_description>(new boost::program_options::option_description(
+            name.data(), boost::program_options::value<std::string>()->composing()->notifier(on_program_option), field.getDescription())));
+}
+
+template <typename T>
+void addProgramOptions(T &cmd_settings, boost::program_options::options_description & options)
+{
+    const auto & settings_to_aliases = T::Traits::settingsToAliases();
+    for (const auto & field : cmd_settings.all())
+    {
+        std::string_view name = field.getName();
+        addProgramOption(cmd_settings, options, name, field);
+
+        if (auto it = settings_to_aliases.find(name); it != settings_to_aliases.end())
+            for (const auto alias : it->second)
+                addProgramOption(cmd_settings, options, alias, field);
+    }
+}
+
+
+}
--- a/src/Core/LogsLevel.h
+++ b/src/Core/LogsLevel.h
@ -0,0 +1,16 @@
+#pragma once
+
+namespace DB
+{
+enum class LogsLevel
+{
+    none = 0, /// Disable
+    fatal,
+    error,
+    warning,
+    information,
+    debug,
+    trace,
+    test,
+};
+}
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -114,6 +114,7 @@ class IColumn;
    M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
    M(String, s3queue_default_zookeeper_path, "/clickhouse/s3queue/", "Default zookeeper path prefix for S3Queue engine", 0) \
    M(Bool, s3queue_enable_logging_to_s3queue_log, false, "Enable writing to system.s3queue_log. The value can be overwritten per table with table settings", 0) \
+    M(Bool, s3queue_allow_experimental_sharded_mode, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten", 0) \
    M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
    M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
    M(Bool, hdfs_create_new_file_on_insert, false, "Enables or disables creating a new file on each insert in hdfs engine tables", 0) \
@ -372,6 +373,7 @@ class IColumn;
    M(Bool, opentelemetry_trace_processors, false, "Collect OpenTelemetry spans for processors.", 0) \
    M(Bool, prefer_column_name_to_alias, false, "Prefer using column names instead of aliases if possible.", 0) \
    M(Bool, allow_experimental_analyzer, false, "Allow experimental analyzer", 0) \
+    M(Bool, analyzer_compatibility_join_using_top_level_identifier, false, "Force to resolve identifier in JOIN USING from projection (for example, in `SELECT a + 1 AS b FROM t1 JOIN t2 USING (b)` join will be performed by `t1.a + 1 = t2.b`, rather then `t1.b = t2.b`).", 0) \
    M(Bool, prefer_global_in_and_join, false, "If enabled, all IN/JOIN operators will be rewritten as GLOBAL IN/JOIN. It's useful when the to-be-joined tables are only available on the initiator and we need to always scatter their data on-the-fly during distributed processing with the GLOBAL keyword. It's also useful to reduce the need to access the external sources joining external tables.", 0) \
    M(Bool, enable_vertical_final, true, "If enable, remove duplicated rows during FINAL by marking rows as deleted and filtering them later instead of merging rows", 0) \
    \
@ -450,7 +452,7 @@ class IColumn;
    \
    M(Bool, compatibility_ignore_collation_in_create_table, true, "Compatibility ignore collation in create table", 0) \
    \
-    M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files (sort and join on disk). I.e. LZ4, NONE.", 0) \
+    M(String, temporary_files_codec, "LZ4", "Set compression codec for temporary files produced by (JOINs, external GROUP BY, external ORDER BY). I.e. LZ4, NONE.", 0) \
    \
    M(UInt64, max_rows_to_transfer, 0, "Maximum size (in rows) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \
    M(UInt64, max_bytes_to_transfer, 0, "Maximum size (in uncompressed bytes) of the transmitted external table obtained when the GLOBAL IN/JOIN section is executed.", 0) \
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -97,9 +97,11 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
              {"function_locate_has_mysql_compatible_argument_order", false, true, "Increase compatibility with MySQL's locate function."},
              {"filesystem_cache_reserve_space_wait_lock_timeout_milliseconds", 1000, 1000, "Wait time to lock cache for sapce reservation in filesystem cache"},
              {"max_parser_backtracks", 0, 1000000, "Limiting the complexity of parsing"},
+              {"analyzer_compatibility_join_using_top_level_identifier", false, false, "Force to resolve identifier in JOIN USING from projection"},
              {"keeper_max_retries", 10, 10, "Max retries for general keeper operations"},
              {"keeper_retry_initial_backoff_ms", 100, 100, "Initial backoff timeout for general keeper operations"},
              {"keeper_retry_max_backoff_ms", 5000, 5000, "Max backoff timeout for general keeper operations"},
+              {"s3queue_allow_experimental_sharded_mode", false, false, "Enable experimental sharded mode of S3Queue table engine. It is experimental because it will be rewritten"},
              }},
    {"24.2", {{"allow_suspicious_variant_types", true, false, "Don't allow creating Variant type with suspicious variants by default"},
              {"validate_experimental_and_suspicious_types_inside_nested_types", false, true, "Validate usage of experimental and suspicious types inside nested types"},
@ -109,7 +111,7 @@ static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> sett
              {"query_plan_optimize_prewhere", true, true, "Allow to push down filter to PREWHERE expression for supported storages"},
              {"async_insert_max_data_size", 1000000, 10485760, "The previous value appeared to be too small."},
              {"async_insert_poll_timeout_ms", 10, 10, "Timeout in milliseconds for polling data from asynchronous insert queue"},
-              {"async_insert_use_adaptive_busy_timeout", true, true, "Use adaptive asynchronous insert timeout"},
+              {"async_insert_use_adaptive_busy_timeout", false, true, "Use adaptive asynchronous insert timeout"},
              {"async_insert_busy_timeout_min_ms", 50, 50, "The minimum value of the asynchronous insert timeout in milliseconds; it also serves as the initial value, which may be increased later by the adaptive algorithm"},
              {"async_insert_busy_timeout_max_ms", 200, 200, "The minimum value of the asynchronous insert timeout in milliseconds; async_insert_busy_timeout_ms is aliased to async_insert_busy_timeout_max_ms"},
              {"async_insert_busy_timeout_increase_rate", 0.2, 0.2, "The exponential growth rate at which the adaptive asynchronous insert timeout increases"},
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -1,12 +1,13 @@
 #pragma once

-#include <Core/SettingsFields.h>
 #include <Core/Joins.h>
-#include <QueryPipeline/SizeLimits.h>
+#include <Core/LogsLevel.h>
+#include <Core/SettingsFields.h>
 #include <Formats/FormatSettings.h>
 #include <IO/ReadSettings.h>
-#include <Common/ShellCommandSettings.h>
 #include <Parsers/ASTSQLSecurity.h>
+#include <QueryPipeline/SizeLimits.h>
+#include <Common/ShellCommandSettings.h>


 namespace DB
@ -90,18 +91,6 @@ DECLARE_SETTING_ENUM_WITH_RENAME(IntervalOutputFormat, FormatSettings::IntervalO

 DECLARE_SETTING_ENUM_WITH_RENAME(ParquetVersion, FormatSettings::ParquetVersion)

-enum class LogsLevel
-{
-    none = 0,    /// Disable
-    fatal,
-    error,
-    warning,
-    information,
-    debug,
-    trace,
-    test,
-};
-
 DECLARE_SETTING_ENUM(LogsLevel)


--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@ -12,6 +12,7 @@
 #include <Common/StackTrace.h>
 #include <Common/getNumberOfPhysicalCPUCores.h>
 #include <Core/ServerUUID.h>
+#include <IO/WriteHelpers.h>

 #include "config.h"
 #include <Common/config_version.h>
--- a/src/DataTypes/DataTypeDomainBool.cpp
+++ b/src/DataTypes/DataTypeDomainBool.cpp
@ -1,6 +1,7 @@
-#include <DataTypes/Serializations/SerializationBool.h>
-#include <DataTypes/DataTypeFactory.h>
 #include <DataTypes/DataTypeCustom.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/Serializations/SerializationBool.h>

 namespace DB
 {
--- a/src/DataTypes/DataTypeFactory.h
+++ b/src/DataTypes/DataTypeFactory.h
@ -1,6 +1,5 @@
 #pragma once

-#include <DataTypes/IDataType.h>
 #include <Parsers/IAST_fwd.h>
 #include <Common/IFactoryWithAliases.h>
 #include <DataTypes/DataTypeCustom.h>
--- a/src/DataTypes/DataTypeTuple.cpp
+++ b/src/DataTypes/DataTypeTuple.cpp
@ -346,7 +346,7 @@ SerializationPtr DataTypeTuple::getSerialization(const SerializationInfo & info)
    return std::make_shared<SerializationTuple>(std::move(serializations), have_explicit_names);
 }

-MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const SerializationInfo::Settings & settings) const
+MutableSerializationInfoPtr DataTypeTuple::createSerializationInfo(const SerializationInfoSettings & settings) const
 {
    MutableSerializationInfos infos;
    infos.reserve(elems.size());
--- a/src/DataTypes/DataTypeTuple.h
+++ b/src/DataTypes/DataTypeTuple.h
@ -58,7 +58,7 @@ public:

    SerializationPtr doGetDefaultSerialization() const override;
    SerializationPtr getSerialization(const SerializationInfo & info) const override;
-    MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const override;
+    MutableSerializationInfoPtr createSerializationInfo(const SerializationInfoSettings & settings) const override;
    SerializationInfoPtr getSerializationInfo(const IColumn & column) const override;

    const DataTypePtr & getElement(size_t i) const { return elems[i]; }
--- a/src/DataTypes/IDataType.cpp
+++ b/src/DataTypes/IDataType.cpp
@ -202,7 +202,7 @@ void IDataType::setCustomization(DataTypeCustomDescPtr custom_desc_) const
        custom_serialization = std::move(custom_desc_->serialization);
 }

-MutableSerializationInfoPtr IDataType::createSerializationInfo(const SerializationInfo::Settings & settings) const
+MutableSerializationInfoPtr IDataType::createSerializationInfo(const SerializationInfoSettings & settings) const
 {
    return std::make_shared<SerializationInfo>(ISerialization::Kind::DEFAULT, settings);
 }
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@ -7,8 +7,6 @@
 #include <Common/COW.h>
 #include <DataTypes/DataTypeCustom.h>
 #include <DataTypes/Serializations/ISerialization.h>
-#include <DataTypes/Serializations/SerializationInfo.h>
-

 namespace DB
 {
@ -38,6 +36,11 @@ struct DataTypeWithConstInfo

 using DataTypesWithConstInfo = std::vector<DataTypeWithConstInfo>;

+class SerializationInfo;
+using SerializationInfoPtr = std::shared_ptr<const SerializationInfo>;
+using MutableSerializationInfoPtr = std::shared_ptr<SerializationInfo>;
+struct SerializationInfoSettings;
+
 /** Properties of data type.
  *
  * Contains methods for getting serialization instances.
@ -117,7 +120,7 @@ public:

    Names getSubcolumnNames() const;

-    virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfo::Settings & settings) const;
+    virtual MutableSerializationInfoPtr createSerializationInfo(const SerializationInfoSettings & settings) const;
    virtual SerializationInfoPtr getSerializationInfo(const IColumn & column) const;

    /// TODO: support more types.
--- a/src/DataTypes/ObjectUtils.h
+++ b/src/DataTypes/ObjectUtils.h
@ -13,6 +13,7 @@ namespace DB

 struct StorageSnapshot;
 using StorageSnapshotPtr = std::shared_ptr<StorageSnapshot>;
+class ColumnsDescription;

 /// Returns number of dimensions in Array type. 0 if type is not array.
 size_t getNumberOfDimensions(const IDataType & type);
--- a/src/DataTypes/Serializations/SerializationInfo.h
+++ b/src/DataTypes/Serializations/SerializationInfo.h
@ -2,6 +2,8 @@

 #include <Core/Types_fwd.h>
 #include <DataTypes/Serializations/ISerialization.h>
+#include <DataTypes/Serializations/SerializationInfoSettings.h>
+
 #include <Poco/JSON/Object.h>


@ -28,6 +30,8 @@ constexpr auto SERIALIZATION_INFO_VERSION = 0;
 class SerializationInfo
 {
 public:
+    using Settings = SerializationInfoSettings;
+
    struct Data
    {
        size_t num_rows = 0;
@ -38,16 +42,8 @@ public:
        void addDefaults(size_t length);
    };

-    struct Settings
-    {
-        const double ratio_of_defaults_for_sparse = 1.0;
-        const bool choose_kind = false;
-
-        bool isAlwaysDefault() const { return ratio_of_defaults_for_sparse >= 1.0; }
-    };
-
-    SerializationInfo(ISerialization::Kind kind_, const Settings & settings_);
-    SerializationInfo(ISerialization::Kind kind_, const Settings & settings_, const Data & data_);
+    SerializationInfo(ISerialization::Kind kind_, const SerializationInfoSettings & settings_);
+    SerializationInfo(ISerialization::Kind kind_, const SerializationInfoSettings & settings_, const Data & data_);

    virtual ~SerializationInfo() = default;

@ -64,7 +60,7 @@ public:
    virtual std::shared_ptr<SerializationInfo> createWithType(
        const IDataType & old_type,
        const IDataType & new_type,
-        const Settings & new_settings) const;
+        const SerializationInfoSettings & new_settings) const;

    virtual void serialializeKindBinary(WriteBuffer & out) const;
    virtual void deserializeFromKindsBinary(ReadBuffer & in);
@ -73,14 +69,14 @@ public:
    virtual void fromJSON(const Poco::JSON::Object & object);

    void setKind(ISerialization::Kind kind_) { kind = kind_; }
-    const Settings & getSettings() const { return settings; }
+    const SerializationInfoSettings & getSettings() const { return settings; }
    const Data & getData() const { return data; }
    ISerialization::Kind getKind() const { return kind; }

-    static ISerialization::Kind chooseKind(const Data & data, const Settings & settings);
+    static ISerialization::Kind chooseKind(const Data & data, const SerializationInfoSettings & settings);

 protected:
-    const Settings settings;
+    const SerializationInfoSettings settings;

    ISerialization::Kind kind;
    Data data;
@ -96,7 +92,7 @@ using MutableSerializationInfos = std::vector<MutableSerializationInfoPtr>;
 class SerializationInfoByName : public std::map<String, MutableSerializationInfoPtr>
 {
 public:
-    using Settings = SerializationInfo::Settings;
+    using Settings = SerializationInfoSettings;

    SerializationInfoByName() = default;
    SerializationInfoByName(const NamesAndTypesList & columns, const Settings & settings);
--- a/src/DataTypes/Serializations/SerializationInfoSettings.h
+++ b/src/DataTypes/Serializations/SerializationInfoSettings.h
@ -0,0 +1,14 @@
+#pragma once
+
+namespace DB
+{
+
+struct SerializationInfoSettings
+{
+    const double ratio_of_defaults_for_sparse = 1.0;
+    const bool choose_kind = false;
+
+    bool isAlwaysDefault() const { return ratio_of_defaults_for_sparse >= 1.0; }
+};
+
+}
--- a/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp
+++ b/src/DataTypes/tests/gtest_DataType_deserializeAsText.cpp
@ -1,6 +1,7 @@
 #include <Columns/IColumn.h>
 #include <Core/Field.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/IDataType.h>
 #include <Formats/FormatSettings.h>
 #include <IO/ReadBuffer.h>

--- a/src/Databases/DatabaseAtomic.cpp
+++ b/src/Databases/DatabaseAtomic.cpp
@ -11,6 +11,7 @@
 #include <Common/filesystemHelpers.h>
 #include <Storages/StorageMaterializedView.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <filesystem>
 #include <Interpreters/DDLTask.h>
--- a/src/Databases/DatabaseMemory.cpp
+++ b/src/Databases/DatabaseMemory.cpp
@ -6,6 +6,7 @@
 #include <Databases/DDLDependencyVisitor.h>
 #include <Databases/DDLLoadingDependencyVisitor.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/formatAST.h>
--- a/src/Databases/DatabaseOnDisk.cpp
+++ b/src/Databases/DatabaseOnDisk.cpp
@ -5,8 +5,9 @@
 #include <IO/WriteBufferFromFile.h>
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/ApplyWithSubqueryVisitor.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/InterpreterCreateQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ParserCreateQuery.h>
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -20,6 +20,7 @@
 #include <Databases/TablesDependencyGraph.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/DDLTask.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/executeDDLQueryOnCluster.h>
--- a/src/Databases/DatabasesCommon.cpp
+++ b/src/Databases/DatabasesCommon.cpp
@ -1,6 +1,7 @@
 #include <Databases/DatabasesCommon.h>
 #include <Interpreters/InterpreterCreateQuery.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
 #include <Parsers/ParserCreateQuery.h>
--- a/src/Databases/IDatabase.cpp
+++ b/src/Databases/IDatabase.cpp
@ -1,11 +1,12 @@
 #include <memory>
 #include <Databases/IDatabase.h>
-#include <Storages/IStorage.h>
-#include <Parsers/ASTCreateQuery.h>
-#include <Common/quoteString.h>
 #include <Interpreters/DatabaseCatalog.h>
-#include <Common/NamePrompter.h>
+#include <Interpreters/TableNameHints.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Storages/IStorage.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/NamePrompter.h>
+#include <Common/quoteString.h>


 namespace CurrentMetrics
--- a/src/Disks/ObjectStorages/DiskObjectStorage.cpp
+++ b/src/Disks/ObjectStorages/DiskObjectStorage.cpp
@ -9,6 +9,7 @@
 #include <Common/logger_useful.h>
 #include <Common/filesystemHelpers.h>
 #include <Common/CurrentMetrics.h>
+#include <Common/Scheduler/IResourceManager.h>
 #include <Disks/ObjectStorages/DiskObjectStorageRemoteMetadataRestoreHelper.h>
 #include <Disks/ObjectStorages/DiskObjectStorageTransaction.h>
 #include <Disks/FakeDiskTransaction.h>
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@ -23,6 +23,7 @@ namespace DB
 class Block;
 struct Settings;
 struct FormatFactorySettings;
+struct ReadSettings;

 class ReadBuffer;
 class WriteBuffer;
--- a/src/Formats/FormatSchemaInfo.h
+++ b/src/Formats/FormatSchemaInfo.h
@ -8,6 +8,7 @@
 namespace DB
 {
 class Context;
+class Block;

 /// Extracts information about where the format schema file is from passed context and keep it.
 class FormatSchemaInfo
--- a/src/Formats/ReadSchemaUtils.cpp
+++ b/src/Formats/ReadSchemaUtils.cpp
@ -3,6 +3,7 @@
 #include <IO/PeekableReadBuffer.h>
 #include <IO/WithFileSize.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Processors/Formats/ISchemaReader.h>
 #include <Storages/IStorage.h>
 #include <Common/assert_cast.h>
--- a/src/Formats/SchemaInferenceUtils.h
+++ b/src/Formats/SchemaInferenceUtils.h
@ -3,9 +3,15 @@
 #include <DataTypes/IDataType.h>
 #include <IO/ReadBuffer.h>

+#include <vector>
+
 namespace DB
 {

+class Block;
+class NamesAndTypesList;
+using NamesAndTypesLists = std::vector<NamesAndTypesList>;
+
 /// Struct with some additional information about inferred types for JSON formats.
 struct JSONInferenceInfo
 {
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@ -37,6 +37,7 @@ list (APPEND PUBLIC_LIBS
        clickhouse_dictionaries_embedded
        clickhouse_parsers
        ch_contrib::consistent_hashing
+        common
        dbms
        ch_contrib::metrohash
        ch_contrib::murmurhash
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@ -11,6 +11,7 @@
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnVector.h>
 #include <Columns/ColumnDecimal.h>
+#include <Formats/FormatSettings.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
 #include <Functions/extractTimeZoneFromFunctionArguments.h>
--- a/src/Functions/FunctionJoinGet.cpp
+++ b/src/Functions/FunctionJoinGet.cpp
@ -1,12 +1,13 @@
 #include <Columns/ColumnString.h>
+#include <Core/Block.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
-#include <Interpreters/Context.h>
-#include <Interpreters/HashJoin.h>
 #include <Functions/IFunction.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/HashJoin.h>
 #include <Storages/StorageJoin.h>
 #include <Storages/TableLockHolder.h>
-#include <Core/Block.h>


 namespace DB
--- a/src/Functions/FunctionsStringHashFixedString.cpp
+++ b/src/Functions/FunctionsStringHashFixedString.cpp
@ -274,7 +274,7 @@ public:
            const typename ColumnIPv6::Container & data = col_from_ip->getData();
            const auto size = col_from_ip->size();
            auto & chars_to = col_to->getChars();
-            const auto length = IPV6_BINARY_LENGTH;
+            const auto length = sizeof(IPv6::UnderlyingType);
            chars_to.resize(size * Impl::length);
            for (size_t i = 0; i < size; ++i)
            {
--- a/src/Functions/appendTrailingCharIfAbsent.cpp
+++ b/src/Functions/appendTrailingCharIfAbsent.cpp
@ -1,9 +1,10 @@
 #include <Columns/ColumnString.h>
-#include <Common/assert_cast.h>
 #include <DataTypes/DataTypeString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>
+#include <base/range.h>
+#include <Common/assert_cast.h>


 namespace DB
--- a/src/Functions/array/arrayDistance.cpp
+++ b/src/Functions/array/arrayDistance.cpp
@ -18,11 +18,11 @@ namespace DB
 {
 namespace ErrorCodes
 {
+    extern const int ARGUMENT_OUT_OF_BOUND;
    extern const int ILLEGAL_COLUMN;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int LOGICAL_ERROR;
    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
-    extern const int ARGUMENT_OUT_OF_BOUND;
 }

 struct L1Distance
@ -357,7 +357,7 @@ public:
                throw Exception(
                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                    "Arguments of function {} has nested type {}. "
-                    "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
+                    "Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
                    getName(),
                    common_type->getName());
        }
@ -379,17 +379,17 @@ public:
    }


-#define SUPPORTED_TYPES(action) \
-    action(UInt8)   \
-    action(UInt16)  \
-    action(UInt32)  \
-    action(UInt64)  \
-    action(Int8)    \
-    action(Int16)   \
-    action(Int32)   \
-    action(Int64)   \
-    action(Float32) \
-    action(Float64)
+#define SUPPORTED_TYPES(ACTION) \
+    ACTION(UInt8)   \
+    ACTION(UInt16)  \
+    ACTION(UInt32)  \
+    ACTION(UInt64)  \
+    ACTION(Int8)    \
+    ACTION(Int16)   \
+    ACTION(Int32)   \
+    ACTION(Int64)   \
+    ACTION(Float32) \
+    ACTION(Float64)


 private:
@ -398,12 +398,11 @@ private:
    {
        DataTypePtr type_x = typeid_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();

-        /// Dynamic disaptch based on the 1st argument type
        switch (type_x->getTypeId())
        {
        #define ON_TYPE(type) \
            case TypeIndex::type: \
-                return executeWithFirstType<ResultType, type>(arguments, input_rows_count); \
+                return executeWithResultTypeAndLeftType<ResultType, type>(arguments, input_rows_count); \
                break;

            SUPPORTED_TYPES(ON_TYPE)
@ -413,23 +412,22 @@ private:
                throw Exception(
                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                    "Arguments of function {} has nested type {}. "
-                    "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
+                    "Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
                    getName(),
                    type_x->getName());
        }
    }

-    template <typename ResultType, typename FirstArgType>
-    ColumnPtr executeWithFirstType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
+    template <typename ResultType, typename LeftType>
+    ColumnPtr executeWithResultTypeAndLeftType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
    {
        DataTypePtr type_y = typeid_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();

-        /// Dynamic disaptch based on the 2nd argument type
        switch (type_y->getTypeId())
        {
        #define ON_TYPE(type) \
            case TypeIndex::type: \
-                return executeWithTypes<ResultType, FirstArgType, type>(arguments[0].column, arguments[1].column, input_rows_count, arguments); \
+                return executeWithResultTypeAndLeftTypeAndRightType<ResultType, LeftType, type>(arguments[0].column, arguments[1].column, input_rows_count, arguments); \
                break;

            SUPPORTED_TYPES(ON_TYPE)
@ -439,59 +437,43 @@ private:
                throw Exception(
                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                    "Arguments of function {} has nested type {}. "
-                    "Support: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
+                    "Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
                    getName(),
                    type_y->getName());
        }
    }

-    template <typename ResultType, typename FirstArgType, typename SecondArgType>
-    ColumnPtr executeWithTypes(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
+    template <typename ResultType, typename LeftType, typename RightType>
+    ColumnPtr executeWithResultTypeAndLeftTypeAndRightType(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
    {
        if (typeid_cast<const ColumnConst *>(col_x.get()))
        {
-            return executeWithTypesFirstArgConst<ResultType, FirstArgType, SecondArgType>(col_x, col_y, input_rows_count, arguments);
+            return executeWithLeftArgConst<ResultType, LeftType, RightType>(col_x, col_y, input_rows_count, arguments);
        }
        else if (typeid_cast<const ColumnConst *>(col_y.get()))
        {
-            return executeWithTypesFirstArgConst<ResultType, SecondArgType, FirstArgType>(col_y, col_x, input_rows_count, arguments);
+            return executeWithLeftArgConst<ResultType, RightType, LeftType>(col_y, col_x, input_rows_count, arguments);
        }

-        col_x = col_x->convertToFullColumnIfConst();
-        col_y = col_y->convertToFullColumnIfConst();
-
        const auto & array_x = *assert_cast<const ColumnArray *>(col_x.get());
        const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());

-        const auto & data_x = typeid_cast<const ColumnVector<FirstArgType> &>(array_x.getData()).getData();
-        const auto & data_y = typeid_cast<const ColumnVector<SecondArgType> &>(array_y.getData()).getData();
+        const auto & data_x = typeid_cast<const ColumnVector<LeftType> &>(array_x.getData()).getData();
+        const auto & data_y = typeid_cast<const ColumnVector<RightType> &>(array_y.getData()).getData();

        const auto & offsets_x = array_x.getOffsets();
-        const auto & offsets_y = array_y.getOffsets();

-        /// Check that arrays in both columns are the sames size
-        for (size_t row = 0; row < offsets_x.size(); ++row)
-        {
-            if (offsets_x[row] != offsets_y[row]) [[unlikely]]
-            {
-                ColumnArray::Offset prev_offset = row > 0 ? offsets_x[row] : 0;
-                throw Exception(
-                    ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
-                    "Arguments of function {} have different array sizes: {} and {}",
-                    getName(),
-                    offsets_x[row] - prev_offset,
-                    offsets_y[row] - prev_offset);
-            }
-        }
+        if (!array_x.hasEqualOffsets(array_y))
+            throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName());

        const typename Kernel::ConstParams kernel_params = initConstParams(arguments);

-        auto result = ColumnVector<ResultType>::create(input_rows_count);
-        auto & result_data = result->getData();
+        auto col_res = ColumnVector<ResultType>::create(input_rows_count);
+        auto & result_data = col_res->getData();

-        /// Do the actual computation
        ColumnArray::Offset prev = 0;
        size_t row = 0;
+
        for (auto off : offsets_x)
        {
            /// Process chunks in vectorized manner
@ -517,12 +499,12 @@ private:
            result_data[row] = Kernel::finalize(state, kernel_params);
            row++;
        }
-        return result;
+        return col_res;
    }

    /// Special case when the 1st parameter is Const
-    template <typename ResultType, typename FirstArgType, typename SecondArgType>
-    ColumnPtr executeWithTypesFirstArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
+    template <typename ResultType, typename LeftType, typename RightType>
+    ColumnPtr executeWithLeftArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count, const ColumnsWithTypeAndName & arguments) const
    {
        col_x = assert_cast<const ColumnConst *>(col_x.get())->getDataColumnPtr();
        col_y = col_y->convertToFullColumnIfConst();
@ -530,26 +512,25 @@ private:
        const auto & array_x = *assert_cast<const ColumnArray *>(col_x.get());
        const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());

-        const auto & data_x = typeid_cast<const ColumnVector<FirstArgType> &>(array_x.getData()).getData();
-        const auto & data_y = typeid_cast<const ColumnVector<SecondArgType> &>(array_y.getData()).getData();
+        const auto & data_x = typeid_cast<const ColumnVector<LeftType> &>(array_x.getData()).getData();
+        const auto & data_y = typeid_cast<const ColumnVector<RightType> &>(array_y.getData()).getData();

        const auto & offsets_x = array_x.getOffsets();
        const auto & offsets_y = array_y.getOffsets();

-        /// Check that arrays in both columns are the sames size
        ColumnArray::Offset prev_offset = 0;
-        for (size_t row : collections::range(0, offsets_y.size()))
+        for (auto offset_y : offsets_y)
        {
-            if (offsets_x[0] != offsets_y[row] - prev_offset) [[unlikely]]
+            if (offsets_x[0] != offset_y - prev_offset) [[unlikely]]
            {
                throw Exception(
                    ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
                    "Arguments of function {} have different array sizes: {} and {}",
                    getName(),
                    offsets_x[0],
-                    offsets_y[row] - prev_offset);
+                    offset_y - prev_offset);
            }
-            prev_offset = offsets_y[row];
+            prev_offset = offset_y;
        }

        const typename Kernel::ConstParams kernel_params = initConstParams(arguments);
@ -557,7 +538,6 @@ private:
        auto result = ColumnVector<ResultType>::create(input_rows_count);
        auto & result_data = result->getData();

-        /// Do the actual computation
        size_t prev = 0;
        size_t row = 0;

@ -574,7 +554,7 @@ private:
            /// - the two most common metrics L2 and cosine distance,
            /// - the most powerful SIMD instruction set (AVX-512F).
 #if USE_MULTITARGET_CODE
-            if constexpr (std::is_same_v<ResultType, FirstArgType> && std::is_same_v<ResultType, SecondArgType>) /// ResultType is Float32 or Float64
+            if constexpr (std::is_same_v<ResultType, LeftType> && std::is_same_v<ResultType, RightType>) /// ResultType is Float32 or Float64
            {
                if constexpr (std::is_same_v<Kernel, L2Distance>
                           || std::is_same_v<Kernel, CosineDistance>)
--- a/src/Functions/array/arrayDotProduct.cpp
+++ b/src/Functions/array/arrayDotProduct.cpp
@ -18,10 +18,9 @@ namespace DB

 namespace ErrorCodes
 {
-    extern const int BAD_ARGUMENTS;
-    extern const int ILLEGAL_COLUMN;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int LOGICAL_ERROR;
+    extern const int SIZES_OF_ARRAYS_DONT_MATCH;
 }


@ -141,6 +140,7 @@ public:
    static FunctionPtr create(ContextPtr) { return std::make_shared<FunctionArrayScalarProduct>(); }
    size_t getNumberOfArguments() const override { return 2; }
    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return true; }
+    bool useDefaultImplementationForConstants() const override { return true; }

    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
    {
@ -163,26 +163,29 @@ public:
        return Kernel::getReturnType(nested_types[0], nested_types[1]);
    }

-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
+#define SUPPORTED_TYPES(ACTION) \
+    ACTION(UInt8) \
+    ACTION(UInt16) \
+    ACTION(UInt32) \
+    ACTION(UInt64) \
+    ACTION(Int8) \
+    ACTION(Int16) \
+    ACTION(Int32) \
+    ACTION(Int64) \
+    ACTION(Float32) \
+    ACTION(Float64)
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
    {
        switch (result_type->getTypeId())
        {
-        #define SUPPORTED_TYPE(type) \
+        #define ON_TYPE(type) \
            case TypeIndex::type: \
-                return executeWithResultType<type>(arguments); \
+                return executeWithResultType<type>(arguments, input_rows_count); \
                break;

-            SUPPORTED_TYPE(UInt8)
-            SUPPORTED_TYPE(UInt16)
-            SUPPORTED_TYPE(UInt32)
-            SUPPORTED_TYPE(UInt64)
-            SUPPORTED_TYPE(Int8)
-            SUPPORTED_TYPE(Int16)
-            SUPPORTED_TYPE(Int32)
-            SUPPORTED_TYPE(Int64)
-            SUPPORTED_TYPE(Float32)
-            SUPPORTED_TYPE(Float64)
-        #undef SUPPORTED_TYPE
+            SUPPORTED_TYPES(ON_TYPE)
+        #undef ON_TYPE

            default:
                throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected result type {}", result_type->getName());
@ -191,90 +194,150 @@ public:

 private:
    template <typename ResultType>
-    ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments) const
+    ColumnPtr executeWithResultType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
    {
-        ColumnPtr res;
-        if (!((res = executeWithResultTypeAndLeft<ResultType, UInt8>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, UInt16>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, UInt32>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, UInt64>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, Int8>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, Int16>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, Int32>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, Int64>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, Float32>(arguments))
-            || (res = executeWithResultTypeAndLeft<ResultType, Float64>(arguments))))
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN,
-                "Illegal column {} of first argument of function {}", arguments[0].column->getName(), getName());
+        DataTypePtr type_x = typeid_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();

-        return res;
+        switch (type_x->getTypeId())
+        {
+#define ON_TYPE(type) \
+            case TypeIndex::type: \
+                return executeWithResultTypeAndLeftType<ResultType, type>(arguments, input_rows_count); \
+                break;
+
+            SUPPORTED_TYPES(ON_TYPE)
+#undef ON_TYPE
+
+            default:
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Arguments of function {} has nested type {}. "
+                    "Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
+                    getName(),
+                    type_x->getName());
+        }
    }

    template <typename ResultType, typename LeftType>
-    ColumnPtr executeWithResultTypeAndLeft(const ColumnsWithTypeAndName & arguments) const
+    ColumnPtr executeWithResultTypeAndLeftType(const ColumnsWithTypeAndName & arguments, size_t input_rows_count) const
    {
-        ColumnPtr res;
-        if (   (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt8>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt16>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt32>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, UInt64>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int8>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int16>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int32>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Int64>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Float32>(arguments))
-            || (res = executeWithResultTypeAndLeftAndRight<ResultType, LeftType, Float64>(arguments)))
-            return res;
+        DataTypePtr type_y = typeid_cast<const DataTypeArray *>(arguments[1].type.get())->getNestedType();

-       return nullptr;
+        switch (type_y->getTypeId())
+        {
+        #define ON_TYPE(type) \
+            case TypeIndex::type: \
+                return executeWithResultTypeAndLeftTypeAndRightType<ResultType, LeftType, type>(arguments[0].column, arguments[1].column, input_rows_count); \
+                break;
+
+            SUPPORTED_TYPES(ON_TYPE)
+        #undef ON_TYPE
+
+            default:
+                throw Exception(
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                    "Arguments of function {} has nested type {}. "
+                    "Supported types: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64, Float32, Float64.",
+                    getName(),
+                    type_y->getName());
+        }
    }

    template <typename ResultType, typename LeftType, typename RightType>
-    ColumnPtr executeWithResultTypeAndLeftAndRight(const ColumnsWithTypeAndName & arguments) const
+    ColumnPtr executeWithResultTypeAndLeftTypeAndRightType(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const
    {
-        ColumnPtr col_left = arguments[0].column->convertToFullColumnIfConst();
-        ColumnPtr col_right = arguments[1].column->convertToFullColumnIfConst();
-        if (!col_left || !col_right)
-            return nullptr;
+        if (typeid_cast<const ColumnConst *>(col_x.get()))
+        {
+            return executeWithLeftArgConst<ResultType, LeftType, RightType>(col_x, col_y, input_rows_count);
+        }
+        else if (typeid_cast<const ColumnConst *>(col_y.get()))
+        {
+            return executeWithLeftArgConst<ResultType, RightType, LeftType>(col_y, col_x, input_rows_count);
+        }

-        const ColumnArray * col_arr_left = checkAndGetColumn<ColumnArray>(col_left.get());
-        const ColumnArray * cokl_arr_right = checkAndGetColumn<ColumnArray>(col_right.get());
-        if (!col_arr_left || !cokl_arr_right)
-            return nullptr;
+        const auto & array_x = *assert_cast<const ColumnArray *>(col_x.get());
+        const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());

-        const ColumnVector<LeftType> * col_arr_nested_left = checkAndGetColumn<ColumnVector<LeftType>>(col_arr_left->getData());
-        const ColumnVector<RightType> * col_arr_nested_right = checkAndGetColumn<ColumnVector<RightType>>(cokl_arr_right->getData());
-        if (!col_arr_nested_left || !col_arr_nested_right)
-            return nullptr;
+        const auto & data_x = typeid_cast<const ColumnVector<LeftType> &>(array_x.getData()).getData();
+        const auto & data_y = typeid_cast<const ColumnVector<RightType> &>(array_y.getData()).getData();

-        if (!col_arr_left->hasEqualOffsets(*cokl_arr_right))
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Array arguments for function {} must have equal sizes", getName());
+        const auto & offsets_x = array_x.getOffsets();

-        auto col_res = ColumnVector<ResultType>::create();
+        if (!array_x.hasEqualOffsets(array_y))
+            throw Exception(ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH, "Array arguments for function {} must have equal sizes", getName());

-        vector(
-            col_arr_nested_left->getData(),
-            col_arr_nested_right->getData(),
-            col_arr_left->getOffsets(),
-            col_res->getData());
+        auto col_res = ColumnVector<ResultType>::create(input_rows_count);
+        auto & result_data = col_res->getData();
+
+        ColumnArray::Offset current_offset = 0;
+        for (size_t row = 0; row < input_rows_count; ++row)
+        {
+            const size_t array_size = offsets_x[row] - current_offset;
+
+            size_t i = 0;
+
+            /// Process chunks in vectorized manner
+            static constexpr size_t VEC_SIZE = 4;
+            typename Kernel::template State<ResultType> states[VEC_SIZE];
+            for (; i + VEC_SIZE < array_size; i += VEC_SIZE)
+            {
+                for (size_t j = 0; j < VEC_SIZE; ++j)
+                    Kernel::template accumulate<ResultType>(states[j], static_cast<ResultType>(data_x[current_offset + i + j]), static_cast<ResultType>(data_y[current_offset + i + j]));
+            }
+
+            typename Kernel::template State<ResultType> state;
+            for (const auto & other_state : states)
+                Kernel::template combine<ResultType>(state, other_state);
+
+            /// Process the tail
+            for (; i < array_size; ++i)
+                Kernel::template accumulate<ResultType>(state, static_cast<ResultType>(data_x[current_offset + i]), static_cast<ResultType>(data_y[current_offset + i]));
+
+            result_data[row] = Kernel::template finalize<ResultType>(state);
+
+            current_offset = offsets_x[row];
+        }

        return col_res;
    }

    template <typename ResultType, typename LeftType, typename RightType>
-    static void vector(
-        const PaddedPODArray<LeftType> & left,
-        const PaddedPODArray<RightType> & right,
-        const ColumnArray::Offsets & offsets,
-        PaddedPODArray<ResultType> & result)
+    ColumnPtr executeWithLeftArgConst(ColumnPtr col_x, ColumnPtr col_y, size_t input_rows_count) const
    {
-        size_t size = offsets.size();
-        result.resize(size);
+        col_x = assert_cast<const ColumnConst *>(col_x.get())->getDataColumnPtr();
+        col_y = col_y->convertToFullColumnIfConst();
+
+        const auto & array_x = *assert_cast<const ColumnArray *>(col_x.get());
+        const auto & array_y = *assert_cast<const ColumnArray *>(col_y.get());
+
+        const auto & data_x = typeid_cast<const ColumnVector<LeftType> &>(array_x.getData()).getData();
+        const auto & data_y = typeid_cast<const ColumnVector<RightType> &>(array_y.getData()).getData();
+
+        const auto & offsets_x = array_x.getOffsets();
+        const auto & offsets_y = array_y.getOffsets();
+
+        ColumnArray::Offset prev_offset = 0;
+        for (auto offset_y : offsets_y)
+        {
+            if (offsets_x[0] != offset_y - prev_offset) [[unlikely]]
+            {
+                throw Exception(
+                    ErrorCodes::SIZES_OF_ARRAYS_DONT_MATCH,
+                    "Arguments of function {} have different array sizes: {} and {}",
+                    getName(),
+                    offsets_x[0],
+                    offset_y - prev_offset);
+            }
+            prev_offset = offset_y;
+        }
+
+        auto col_res = ColumnVector<ResultType>::create(input_rows_count);
+        auto & result = col_res->getData();

        ColumnArray::Offset current_offset = 0;
-        for (size_t row = 0; row < size; ++row)
+        for (size_t row = 0; row < input_rows_count; ++row)
        {
-            size_t array_size = offsets[row] - current_offset;
+            const size_t array_size = offsets_x[0];

            typename Kernel::template State<ResultType> state;
            size_t i = 0;
@ -283,13 +346,14 @@ private:
            /// To avoid combinatorial explosion of SIMD kernels, focus on
            /// - the two most common input/output types (Float32 x Float32) --> Float32 and (Float64 x Float64) --> Float64 instead of 10 x
            ///   10 input types x 8 output types,
+            /// - const/non-const inputs instead of non-const/non-const inputs
            /// - the most powerful SIMD instruction set (AVX-512F).
 #if USE_MULTITARGET_CODE
            if constexpr ((std::is_same_v<ResultType, Float32> || std::is_same_v<ResultType, Float64>)
                            && std::is_same_v<ResultType, LeftType> && std::is_same_v<LeftType, RightType>)
            {
                if (isArchSupported(TargetArch::AVX512F))
-                    Kernel::template accumulateCombine<ResultType>(&left[current_offset], &right[current_offset], array_size, i, state);
+                    Kernel::template accumulateCombine<ResultType>(&data_x[0], &data_y[current_offset], array_size, i, state);
            }
 #else
            /// Process chunks in vectorized manner
@ -298,7 +362,7 @@ private:
            for (; i + VEC_SIZE < array_size; i += VEC_SIZE)
            {
                for (size_t j = 0; j < VEC_SIZE; ++j)
-                    Kernel::template accumulate<ResultType>(states[j], static_cast<ResultType>(left[i + j]), static_cast<ResultType>(right[i + j]));
+                    Kernel::template accumulate<ResultType>(states[j], static_cast<ResultType>(data_x[i + j]), static_cast<ResultType>(data_y[current_offset + i + j]));
            }

            for (const auto & other_state : states)
@ -307,13 +371,14 @@ private:

            /// Process the tail
            for (; i < array_size; ++i)
-                Kernel::template accumulate<ResultType>(state, static_cast<ResultType>(left[i]), static_cast<ResultType>(right[i]));
+                Kernel::template accumulate<ResultType>(state, static_cast<ResultType>(data_x[i]), static_cast<ResultType>(data_y[current_offset + i]));

-            /// ResultType res = Kernel::template finalize<ResultType>(state);
            result[row] = Kernel::template finalize<ResultType>(state);

-            current_offset = offsets[row];
+            current_offset = offsets_y[row];
        }
+
+        return col_res;
    }
 };

--- a/src/Functions/array/arrayIntersect.cpp
+++ b/src/Functions/array/arrayIntersect.cpp
@ -20,6 +20,7 @@
 #include <Columns/ColumnTuple.h>
 #include <Common/HashTable/ClearableHashMap.h>
 #include <Common/assert_cast.h>
+#include <base/range.h>
 #include <base/TypeLists.h>
 #include <Interpreters/castColumn.h>

--- a/src/Functions/array/arrayNorm.cpp
+++ b/src/Functions/array/arrayNorm.cpp
@ -175,8 +175,7 @@ public:
        }
    }

-    ColumnPtr
-    executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
    {
        DataTypePtr type = typeid_cast<const DataTypeArray *>(arguments[0].type.get())->getNestedType();
        ColumnPtr column = arguments[0].column->convertToFullColumnIfConst();
--- a/src/Functions/array/arraySort.cpp
+++ b/src/Functions/array/arraySort.cpp
@ -46,7 +46,10 @@ ColumnPtr ArraySortImpl<positive, is_partial>::execute(
                    ErrorCodes::LOGICAL_ERROR,
                    "Expected fixed arguments to get the limit for partial array sort"
                );
-            return fixed_arguments[0].column.get()->getUInt(0);
+
+            /// During dryRun the input column might be empty
+            if (!fixed_arguments[0].column->empty())
+                return fixed_arguments[0].column->getUInt(0);
        }
        return 0;
    }();
--- a/src/Functions/countDigits.cpp
+++ b/src/Functions/countDigits.cpp
@ -20,6 +20,40 @@ namespace ErrorCodes
 namespace
 {

+template <typename T>
+int digits10(T x)
+{
+    if (x < 10ULL)
+        return 1;
+    if (x < 100ULL)
+        return 2;
+    if (x < 1000ULL)
+        return 3;
+
+    if (x < 1000000000000ULL)
+    {
+        if (x < 100000000ULL)
+        {
+            if (x < 1000000ULL)
+            {
+                if (x < 10000ULL)
+                    return 4;
+                else
+                    return 5 + (x >= 100000ULL);
+            }
+
+            return 7 + (x >= 10000000ULL);
+        }
+
+        if (x < 10000000000ULL)
+            return 9 + (x >= 1000000000ULL);
+
+        return 11 + (x >= 100000000000ULL);
+    }
+
+    return 12 + digits10(x / 1000000000000ULL);
+}
+
 /// Returns number of decimal digits you need to represent the value.
 /// For Decimal values takes in account their scales: calculates result over underlying int type which is (value * scale).
 /// countDigits(42) = 2, countDigits(42.000) = 5, countDigits(0.04200) = 4.
--- a/src/Functions/dateDiff.cpp
+++ b/src/Functions/dateDiff.cpp
@ -6,7 +6,7 @@
 #include <Columns/ColumnsDateTime.h>
 #include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnDecimal.h>
-
+#include <Formats/FormatSettings.h>
 #include <Functions/IFunction.h>
 #include <Functions/FunctionHelpers.h>
 #include <Functions/FunctionFactory.h>
--- a/src/Functions/date_trunc.cpp
+++ b/src/Functions/date_trunc.cpp
@ -1,9 +1,10 @@
 #include <Columns/ColumnConst.h>
-#include <Columns/ColumnsNumber.h>
 #include <Columns/ColumnString.h>
+#include <Columns/ColumnsNumber.h>
 #include <DataTypes/DataTypeDate.h>
 #include <DataTypes/DataTypeDateTime.h>
 #include <DataTypes/DataTypeInterval.h>
+#include <Formats/FormatSettings.h>
 #include <Functions/DateTimeTransforms.h>
 #include <Functions/FunctionFactory.h>

--- a/src/Functions/hasColumnInTable.cpp
+++ b/src/Functions/hasColumnInTable.cpp
@ -7,6 +7,7 @@
 #include <Storages/IStorage.h>
 #include <Interpreters/Cluster.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Storages/getStructureOfRemoteTable.h>


--- a/src/Functions/keyvaluepair/ArgumentExtractor.h
+++ b/src/Functions/keyvaluepair/ArgumentExtractor.h
@ -4,6 +4,7 @@
 #include <Columns/ColumnsNumber.h>
 #include <Core/ColumnsWithTypeAndName.h>

+#include <list>
 #include <optional>

 namespace DB
--- a/src/Functions/tupleConcat.cpp
+++ b/src/Functions/tupleConcat.cpp
@ -4,6 +4,8 @@
 #include <Functions/FunctionHelpers.h>
 #include <Functions/IFunction.h>

+#include <base/range.h>
+
 namespace DB
 {
 namespace ErrorCodes
--- a/src/IO/CachedInMemoryReadBufferFromFile.h
+++ b/src/IO/CachedInMemoryReadBufferFromFile.h
@ -1,7 +1,8 @@
 #pragma once

-#include <Common/PageCache.h>
 #include <IO/ReadBufferFromFileBase.h>
+#include <IO/ReadSettings.h>
+#include <Common/PageCache.h>

 namespace DB
 {
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@ -154,9 +154,12 @@ inline void readIPv6Binary(IPv6 & ip, ReadBuffer & buf)
    size_t size = 0;
    readVarUInt(size, buf);

-    if (size != IPV6_BINARY_LENGTH)
-        throw Exception(ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH,
-                        "Size of the string {} doesn't match size of binary IPv6 {}", size, IPV6_BINARY_LENGTH);
+    if (size != sizeof(IPv6::UnderlyingType))
+        throw Exception(
+            ErrorCodes::SIZE_OF_FIXED_STRING_DOESNT_MATCH,
+            "Size of the string {} doesn't match size of binary IPv6 {}",
+            size,
+            sizeof(IPv6::UnderlyingType));

    buf.readStrict(reinterpret_cast<char*>(&ip.toUnderType()), size);
 }
--- a/src/IO/S3/copyS3File.h
+++ b/src/IO/S3/copyS3File.h
@ -14,6 +14,7 @@

 namespace DB
 {
+struct ReadSettings;
 class SeekableReadBuffer;

 using CreateReadBuffer = std::function<std::unique_ptr<SeekableReadBuffer>()>;
--- a/src/IO/examples/read_buffer_from_hdfs.cpp
+++ b/src/IO/examples/read_buffer_from_hdfs.cpp
@ -6,6 +6,8 @@
 #include <base/types.h>
 #include <Common/Config/ConfigProcessor.h>

+#include <Poco/Util/MapConfiguration.h>
+
 using namespace DB;

 int main()
--- a/src/Interpreters/ActionLocksManager.cpp
+++ b/src/Interpreters/ActionLocksManager.cpp
@ -1,5 +1,6 @@
 #include "ActionLocksManager.h"
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Databases/IDatabase.h>
 #include <Storages/IStorage.h>

--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@ -44,6 +44,7 @@
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/misc.h>
 #include <Interpreters/ActionsVisitor.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/Set.h>
 #include <Interpreters/evaluateConstantExpression.h>
 #include <Interpreters/convertFieldToType.h>
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@ -1,13 +1,14 @@
 #pragma once

 #include <string_view>
+#include <Core/ColumnNumbers.h>
+#include <Core/ColumnWithTypeAndName.h>
 #include <Core/NamesAndTypes.h>
 #include <Interpreters/Context_fwd.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Interpreters/PreparedSets.h>
 #include <Parsers/IAST.h>
-#include <Core/ColumnNumbers.h>
-#include <Core/ColumnWithTypeAndName.h>
+#include <QueryPipeline/SizeLimits.h>

 namespace DB
 {
--- a/src/Interpreters/AsynchronousInsertQueue.cpp
+++ b/src/Interpreters/AsynchronousInsertQueue.cpp
@ -11,6 +11,7 @@
 #include <IO/copyData.h>
 #include <Interpreters/AsynchronousInsertLog.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/InterpreterInsertQuery.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/executeQuery.h>
--- a/src/Interpreters/CollectJoinOnKeysVisitor.h
+++ b/src/Interpreters/CollectJoinOnKeysVisitor.h
@ -1,11 +1,12 @@
 #pragma once

+#include <Core/Joins.h>
 #include <Core/Names.h>
+#include <Interpreters/Aliases.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Interpreters/InDepthNodeVisitor.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/queryToString.h>
-#include <Interpreters/InDepthNodeVisitor.h>
-#include <Interpreters/DatabaseAndTableWithAlias.h>
-#include <Interpreters/Aliases.h>


 namespace DB
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -1073,7 +1073,9 @@ void Context::setTemporaryStoragePath(const String & path, size_t max_size)
        setupTmpPath(shared->log, disk->getPath());
    }

-    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
+    TemporaryDataOnDiskSettings temporary_data_on_disk_settings;
+    temporary_data_on_disk_settings.max_size_on_disk = max_size;
+    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(std::move(volume), std::move(temporary_data_on_disk_settings));
 }

 void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_size)
@ -1093,7 +1095,7 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
    VolumePtr volume = tmp_policy->getVolume(0);

    if (volume->getDisks().empty())
-         throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files");
+        throw Exception(ErrorCodes::NO_ELEMENTS_IN_CONFIG, "No disks volume for temporary files");

    for (const auto & disk : volume->getDisks())
    {
@ -1119,7 +1121,9 @@ void Context::setTemporaryStoragePolicy(const String & policy_name, size_t max_s
    if (shared->root_temp_data_on_disk)
        throw Exception(ErrorCodes::LOGICAL_ERROR, "Temporary storage is already set");

-    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, max_size);
+    TemporaryDataOnDiskSettings temporary_data_on_disk_settings;
+    temporary_data_on_disk_settings.max_size_on_disk = max_size;
+    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(std::move(volume), std::move(temporary_data_on_disk_settings));
 }

 void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t max_size)
@ -1140,7 +1144,10 @@ void Context::setTemporaryStorageInCache(const String & cache_disk_name, size_t

    shared->tmp_path = file_cache->getBasePath();
    VolumePtr volume = createLocalSingleDiskVolume(shared->tmp_path, shared->getConfigRefWithLock(lock));
-    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(volume, file_cache.get(), max_size);
+
+    TemporaryDataOnDiskSettings temporary_data_on_disk_settings;
+    temporary_data_on_disk_settings.max_size_on_disk = max_size;
+    shared->root_temp_data_on_disk = std::make_shared<TemporaryDataOnDiskScope>(std::move(volume), file_cache.get(), std::move(temporary_data_on_disk_settings));
 }

 void Context::setFlagsPath(const String & path)
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -19,9 +19,8 @@
 #include <Disks/IO/getThreadPoolReader.h>
 #include <Interpreters/ClientInfo.h>
 #include <Interpreters/Context_fwd.h>
-#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/StorageID.h>
 #include <Interpreters/MergeTreeTransactionHolder.h>
-#include <Common/Scheduler/IResourceManager.h>
 #include <Parsers/IAST_fwd.h>
 #include <Server/HTTP/HTTPContext.h>
 #include <Storages/ColumnsDescription.h>
@ -149,6 +148,18 @@ template <class Queue>
 class MergeTreeBackgroundExecutor;
 class AsyncLoader;

+struct TemporaryTableHolder;
+using TemporaryTablesMapping = std::map<String, std::shared_ptr<TemporaryTableHolder>>;
+
+class LoadTask;
+using LoadTaskPtr = std::shared_ptr<LoadTask>;
+using LoadTaskPtrs = std::vector<LoadTaskPtr>;
+
+class IClassifier;
+using ClassifierPtr = std::shared_ptr<IClassifier>;
+class IResourceManager;
+using ResourceManagerPtr = std::shared_ptr<IResourceManager>;
+
 /// Scheduling policy can be changed using `background_merges_mutations_scheduling_policy` config option.
 /// By default concurrent merges are scheduled using "round_robin" to ensure fair and starvation-free operation.
 /// Previously in heavily overloaded shards big merges could possibly be starved by smaller
--- a/src/Interpreters/DDLTask.cpp
+++ b/src/Interpreters/DDLTask.cpp
@ -2,6 +2,8 @@
 #include <base/sort.h>
 #include <Common/DNSResolver.h>
 #include <Common/isLocalAddress.h>
+#include <Databases/DatabaseReplicated.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 #include <IO/Operators.h>
@ -14,7 +16,6 @@
 #include <Parsers/parseQuery.h>
 #include <Parsers/queryToString.h>
 #include <Parsers/ASTQueryWithTableAndOutput.h>
-#include <Databases/DatabaseReplicated.h>


 namespace DB
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@ -2,6 +2,7 @@
 #include <mutex>
 #include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/TableNameHints.h>
 #include <Interpreters/loadMetadata.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/InterpreterCreateQuery.h>
@ -1142,7 +1143,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id)
    TableMarkedAsDropped dropped_table;
    {
        std::lock_guard lock(tables_marked_dropped_mutex);
-        auto latest_drop_time = std::numeric_limits<time_t>::min();
+        time_t latest_drop_time = std::numeric_limits<time_t>::min();
        auto it_dropped_table = tables_marked_dropped.end();
        for (auto it = tables_marked_dropped.begin(); it != tables_marked_dropped.end(); ++it)
        {
@ -1167,7 +1168,7 @@ void DatabaseCatalog::dequeueDroppedTableCleanup(StorageID table_id)
        }
        if (it_dropped_table == tables_marked_dropped.end())
            throw Exception(ErrorCodes::UNKNOWN_TABLE,
-                "Table {} is being dropped, has been dropped, or the database engine does not support UNDROP",
+                "The drop task of table {} is in progress, has been dropped or the database engine doesn't support it",
                table_id.getNameForLogs());
        latest_metadata_dropped_path = it_dropped_table->metadata_path;
        String table_metadata_path = getPathForMetadata(it_dropped_table->table_id);
@ -1705,4 +1706,43 @@ DDLGuard::~DDLGuard()
    releaseTableLock();
 }

+std::pair<String, String> TableNameHints::getHintForTable(const String & table_name) const
+{
+    auto results = this->getHints(table_name, getAllRegisteredNames());
+    if (results.empty())
+        return getExtendedHintForTable(table_name);
+    return std::make_pair(database->getDatabaseName(), results[0]);
+}
+
+std::pair<String, String> TableNameHints::getExtendedHintForTable(const String & table_name) const
+{
+    /// load all available databases from the DatabaseCatalog instance
+    auto & database_catalog = DatabaseCatalog::instance();
+    auto all_databases = database_catalog.getDatabases();
+
+    for (const auto & [db_name, db] : all_databases)
+    {
+        /// this case should be covered already by getHintForTable
+        if (db_name == database->getDatabaseName())
+            continue;
+
+        TableNameHints hints(db, context);
+        auto results = hints.getHints(table_name);
+
+        /// if the results are not empty, return the first instance of the table_name
+        /// and the corresponding database_name that was found.
+        if (!results.empty())
+            return std::make_pair(db_name, results[0]);
+    }
+    return {};
+}
+
+Names TableNameHints::getAllRegisteredNames() const
+{
+    Names result;
+    if (database)
+        for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next())
+            result.emplace_back(table_it->name());
+    return result;
+}
 }
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@ -1,13 +1,11 @@
 #pragma once

 #include <Core/UUID.h>
-#include <Databases/IDatabase.h>
 #include <Databases/TablesDependencyGraph.h>
 #include <Interpreters/Context_fwd.h>
 #include <Interpreters/StorageID.h>
 #include <Parsers/IAST_fwd.h>
 #include <Storages/IStorage_fwd.h>
-#include <Common/NamePrompter.h>
 #include <Common/SharedMutex.h>

 #include <boost/noncopyable.hpp>
@ -365,68 +363,6 @@ private:
    static constexpr time_t DBMS_DEFAULT_DISK_RELOAD_PERIOD_SEC = 5;
 };

-class TableNameHints : public IHints<>
-{
-public:
-    TableNameHints(ConstDatabasePtr database_, ContextPtr context_)
-        : context(context_),
-        database(database_)
-    {
-    }
-
-    /// getHintForTable tries to get a hint for the provided table_name in the provided
-    /// database. If the results are empty, it goes for extended hints for the table
-    /// with getExtendedHintForTable which looks for the table name in every database that's
-    /// available in the database catalog. It finally returns a single hint which is the database
-    /// name and table_name pair which is similar to the table_name provided. Perhaps something to
-    /// consider is should we return more than one pair of hint?
-    std::pair<String, String> getHintForTable(const String & table_name) const
-    {
-        auto results = this->getHints(table_name, getAllRegisteredNames());
-        if (results.empty())
-            return getExtendedHintForTable(table_name);
-        return std::make_pair(database->getDatabaseName(), results[0]);
-    }
-
-    /// getExtendedHintsForTable tries to get hint for the given table_name across all
-    /// the databases that are available in the database catalog.
-    std::pair<String, String> getExtendedHintForTable(const String & table_name) const
-    {
-        /// load all available databases from the DatabaseCatalog instance
-        auto & database_catalog = DatabaseCatalog::instance();
-        auto all_databases = database_catalog.getDatabases();
-
-        for (const auto & [db_name, db] : all_databases)
-        {
-            /// this case should be covered already by getHintForTable
-            if (db_name == database->getDatabaseName())
-                continue;
-
-            TableNameHints hints(db, context);
-            auto results = hints.getHints(table_name);
-
-            /// if the results are not empty, return the first instance of the table_name
-            /// and the corresponding database_name that was found.
-            if (!results.empty())
-                return std::make_pair(db_name, results[0]);
-        }
-        return {};
-    }
-
-    Names getAllRegisteredNames() const override
-    {
-        Names result;
-        if (database)
-            for (auto table_it = database->getTablesIterator(context); table_it->isValid(); table_it->next())
-                result.emplace_back(table_it->name());
-        return result;
-    }
-
-private:
-    ContextPtr context;
-    ConstDatabasePtr database;
-};
-

 /// This class is useful when creating a table or database.
 /// Usually we create IStorage/IDatabase object first and then add it to IDatabase/DatabaseCatalog.
--- a/src/Interpreters/IKeyValueEntity.h
+++ b/src/Interpreters/IKeyValueEntity.h
@ -1,5 +1,6 @@
 #pragma once

+#include <Core/Block.h>
 #include <Core/Names.h>
 #include <Processors/Chunk.h>

--- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
+++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
@ -1,6 +1,7 @@
 #include <Interpreters/InJoinSubqueriesPreprocessor.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/InDepthNodeVisitor.h>
 #include <Storages/StorageDistributed.h>
--- a/src/Interpreters/InterpreterCheckQuery.cpp
+++ b/src/Interpreters/InterpreterCheckQuery.cpp
@ -16,6 +16,7 @@
 #include <DataTypes/DataTypeString.h>

 #include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/ProcessList.h>

 #include <Parsers/ASTCheckQuery.h>
--- a/Show More
+++ b/Show More