Merge branch 'master' into vdimir/fix_race_in_remove_objects

2024-11-22 07:31:57 +00:00 · 2023-07-11 11:22:04 +02:00 · 2023-07-11 11:22:04 +02:00 · ab6356f9bc
commit ab6356f9bc
parent bd5f393515 2ab41ed665
399 changed files with 4994 additions and 3251 deletions
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -75,51 +75,6 @@ jobs:
  Codebrowser:
    needs: [DockerHubPush]
    uses: ./.github/workflows/woboq.yml
-  BuilderCoverity:
-    needs: DockerHubPush
-    runs-on: [self-hosted, builder]
-    steps:
-      - name: Set envs
-        run: |
-          cat >> "$GITHUB_ENV" << 'EOF'
-          BUILD_NAME=coverity
-          CACHES_PATH=${{runner.temp}}/../ccaches
-          IMAGES_PATH=${{runner.temp}}/images_path
-          REPO_COPY=${{runner.temp}}/build_check/ClickHouse
-          TEMP_PATH=${{runner.temp}}/build_check
-          EOF
-          echo "COVERITY_TOKEN=${{ secrets.COVERITY_TOKEN }}" >> "$GITHUB_ENV"
-      - name: Download changed images
-        uses: actions/download-artifact@v3
-        with:
-          name: changed_images
-          path: ${{ env.IMAGES_PATH }}
-      - name: Check out repository code
-        uses: ClickHouse/checkout@v1
-        with:
-          clear-repository: true
-          submodules: true
-      - name: Build
-        run: |
-          sudo rm -fr "$TEMP_PATH"
-          mkdir -p "$TEMP_PATH"
-          cp -r "$GITHUB_WORKSPACE" "$TEMP_PATH"
-          cd "$REPO_COPY/tests/ci" && python3 build_check.py "$BUILD_NAME"
-      - name: Upload Coverity Analysis
-        if: ${{ success() || failure() }}
-        run: |
-          curl --form token="${COVERITY_TOKEN}" \
-            --form email='security+coverity@clickhouse.com' \
-            --form file="@$TEMP_PATH/$BUILD_NAME/coverity-scan.tar.gz" \
-            --form version="${GITHUB_REF#refs/heads/}-${GITHUB_SHA::6}" \
-            --form description="Nighly Scan: $(date +'%Y-%m-%dT%H:%M:%S')" \
-            https://scan.coverity.com/builds?project=ClickHouse%2FClickHouse
-      - name: Cleanup
-        if: always()
-        run: |
-          docker ps --quiet | xargs --no-run-if-empty docker kill ||:
-          docker ps --all --quiet | xargs --no-run-if-empty docker rm -f ||:
-          sudo rm -fr "$TEMP_PATH" "$CACHES_PATH"
  SonarCloud:
    runs-on: [self-hosted, builder]
    env:
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -87,7 +87,6 @@ if (ENABLE_FUZZING)
    set (ENABLE_CLICKHOUSE_ODBC_BRIDGE OFF)
    set (ENABLE_LIBRARIES 0)
    set (ENABLE_SSL 1)
-    set (USE_UNWIND ON)
    set (ENABLE_EMBEDDED_COMPILER 0)
    set (ENABLE_EXAMPLES 0)
    set (ENABLE_UTILS 0)
@ -344,9 +343,9 @@ if (COMPILER_CLANG)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fdiagnostics-absolute-paths")
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fdiagnostics-absolute-paths")

-    if (NOT ENABLE_TESTS AND NOT SANITIZE)
+    if (NOT ENABLE_TESTS AND NOT SANITIZE AND OS_LINUX)
        # https://clang.llvm.org/docs/ThinLTO.html
-        # Applies to clang only.
+        # Applies to clang and linux only.
        # Disabled when building with tests or sanitizers.
        option(ENABLE_THINLTO "Clang-specific link time optimization" ON)
    endif()
--- a/base/base/getThreadId.cpp
+++ b/base/base/getThreadId.cpp
@ -15,25 +15,34 @@


 static thread_local uint64_t current_tid = 0;
+
+static void setCurrentThreadId()
+{
+#if defined(OS_ANDROID)
+    current_tid = gettid();
+#elif defined(OS_LINUX)
+    current_tid = static_cast<uint64_t>(syscall(SYS_gettid)); /// This call is always successful. - man gettid
+#elif defined(OS_FREEBSD)
+    current_tid = pthread_getthreadid_np();
+#elif defined(OS_SUNOS)
+    // On Solaris-derived systems, this returns the ID of the LWP, analogous
+    // to a thread.
+    current_tid = static_cast<uint64_t>(pthread_self());
+#else
+    if (0 != pthread_threadid_np(nullptr, &current_tid))
+        throw std::logic_error("pthread_threadid_np returned error");
+#endif
+}
+
 uint64_t getThreadId()
 {
    if (!current_tid)
-    {
-#if defined(OS_ANDROID)
-        current_tid = gettid();
-#elif defined(OS_LINUX)
-        current_tid = static_cast<uint64_t>(syscall(SYS_gettid)); /// This call is always successful. - man gettid
-#elif defined(OS_FREEBSD)
-        current_tid = pthread_getthreadid_np();
-#elif defined(OS_SUNOS)
-        // On Solaris-derived systems, this returns the ID of the LWP, analogous
-        // to a thread.
-        current_tid = static_cast<uint64_t>(pthread_self());
-#else
-        if (0 != pthread_threadid_np(nullptr, &current_tid))
-            throw std::logic_error("pthread_threadid_np returned error");
-#endif
-    }
+        setCurrentThreadId();

    return current_tid;
 }
+
+void updateCurrentThreadIdAfterFork()
+{
+    setCurrentThreadId();
+}
--- a/base/base/getThreadId.h
+++ b/base/base/getThreadId.h
@ -3,3 +3,5 @@

 /// Obtain thread id from OS. The value is cached in thread local variable.
 uint64_t getThreadId();
+
+void updateCurrentThreadIdAfterFork();
--- a/cmake/darwin/default_libs.cmake
+++ b/cmake/darwin/default_libs.cmake
@ -15,6 +15,7 @@ set(CMAKE_OSX_DEPLOYMENT_TARGET 10.15)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)

+include (cmake/unwind.cmake)
 include (cmake/cxx.cmake)
 link_libraries(global-group)

--- a/cmake/limit_jobs.cmake
+++ b/cmake/limit_jobs.cmake
@ -18,6 +18,9 @@ if (NOT PARALLEL_COMPILE_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_COMPILER_MEMORY)
    if (NOT PARALLEL_COMPILE_JOBS)
        set (PARALLEL_COMPILE_JOBS 1)
    endif ()
+    if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+        set (PARALLEL_COMPILE_JOBS_LESS TRUE)
+    endif()
 endif ()

 if (PARALLEL_COMPILE_JOBS AND (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_COMPILE_JOBS LESS NUMBER_OF_LOGICAL_CORES))
@ -33,6 +36,9 @@ if (NOT PARALLEL_LINK_JOBS AND TOTAL_PHYSICAL_MEMORY AND MAX_LINKER_MEMORY)
    if (NOT PARALLEL_LINK_JOBS)
        set (PARALLEL_LINK_JOBS 1)
    endif ()
+    if (NOT NUMBER_OF_LOGICAL_CORES OR PARALLEL_LINK_JOBS LESS NUMBER_OF_LOGICAL_CORES)
+        set (PARALLEL_LINK_JOBS_LESS TRUE)
+    endif()
 endif ()

 # ThinLTO provides its own parallel linking
@ -56,4 +62,10 @@ if (PARALLEL_COMPILE_JOBS OR PARALLEL_LINK_JOBS)
    message(STATUS
        "${CMAKE_CURRENT_SOURCE_DIR}: Have ${TOTAL_PHYSICAL_MEMORY} megabytes of memory.
        Limiting concurrent linkers jobs to ${PARALLEL_LINK_JOBS} and compiler jobs to ${PARALLEL_COMPILE_JOBS} (system has ${NUMBER_OF_LOGICAL_CORES} logical cores)")
+    if (PARALLEL_COMPILE_JOBS_LESS)
+        message(WARNING "The autocalculated compile jobs limit (${PARALLEL_COMPILE_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_COMPILE_JOBS to override.")
+    endif()
+    if (PARALLEL_LINK_JOBS_LESS)
+        message(WARNING "The autocalculated link jobs limit (${PARALLEL_LINK_JOBS}) underutilizes CPU cores (${NUMBER_OF_LOGICAL_CORES}). Set PARALLEL_LINK_JOBS to override.")
+    endif()
 endif ()
--- a/cmake/target.cmake
+++ b/cmake/target.cmake
@ -40,7 +40,6 @@ if (CMAKE_CROSSCOMPILING)
            set (OPENSSL_NO_ASM ON CACHE INTERNAL "")
            set (ENABLE_JEMALLOC ON CACHE INTERNAL "")
            set (ENABLE_PARQUET OFF CACHE INTERNAL "")
-            set (USE_UNWIND OFF CACHE INTERNAL "")
            set (ENABLE_GRPC OFF CACHE INTERNAL "")
            set (ENABLE_HDFS OFF CACHE INTERNAL "")
            set (ENABLE_MYSQL OFF CACHE INTERNAL "")
--- a/cmake/unwind.cmake
+++ b/cmake/unwind.cmake
@ -1,13 +1 @@
-option (USE_UNWIND "Enable libunwind (better stacktraces)" ${ENABLE_LIBRARIES})
-
-if (USE_UNWIND)
-    add_subdirectory(contrib/libunwind-cmake)
-    set (UNWIND_LIBRARIES unwind)
-    set (EXCEPTION_HANDLING_LIBRARY ${UNWIND_LIBRARIES})
-
-    message (STATUS "Using libunwind: ${UNWIND_LIBRARIES}")
-else ()
-    set (EXCEPTION_HANDLING_LIBRARY gcc_eh)
-endif ()
-
-message (STATUS "Using exception handler: ${EXCEPTION_HANDLING_LIBRARY}")
+add_subdirectory(contrib/libunwind-cmake)
--- a/contrib/jemalloc-cmake/CMakeLists.txt
+++ b/contrib/jemalloc-cmake/CMakeLists.txt
@ -170,16 +170,13 @@ endif ()

 target_compile_definitions(_jemalloc PRIVATE -DJEMALLOC_PROF=1)

-if (USE_UNWIND)
-    # jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++.
-    # The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`.
-    # At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing.
-
-    # ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1).
-
-    target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1)
-    target_link_libraries (_jemalloc PRIVATE unwind)
-endif ()
+# jemalloc provides support for two different libunwind flavors: the original HP libunwind and the one coming with gcc / g++ / libstdc++.
+# The latter is identified by `JEMALLOC_PROF_LIBGCC` and uses `_Unwind_Backtrace` method instead of `unw_backtrace`.
+# At the time ClickHouse uses LLVM libunwind which follows libgcc's way of backtracing.
+#
+# ClickHouse has to provide `unw_backtrace` method by the means of [commit 8e2b31e](https://github.com/ClickHouse/libunwind/commit/8e2b31e766dd502f6df74909e04a7dbdf5182eb1).
+target_compile_definitions (_jemalloc PRIVATE -DJEMALLOC_PROF_LIBGCC=1)
+target_link_libraries (_jemalloc PRIVATE unwind)

 # for RTLD_NEXT
 target_compile_options(_jemalloc PRIVATE -D_GNU_SOURCE)
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -61,9 +61,7 @@ target_include_directories(cxx SYSTEM BEFORE PUBLIC  $<$<COMPILE_LANGUAGE:CXX>:$
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)

 # Enable capturing stack traces for all exceptions.
-if (USE_UNWIND)
-    target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)
-endif ()
+target_compile_definitions(cxx PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)

 if (USE_MUSL)
    target_compile_definitions(cxx PUBLIC -D_LIBCPP_HAS_MUSL_LIBC=1)
--- a/contrib/libcxxabi-cmake/CMakeLists.txt
+++ b/contrib/libcxxabi-cmake/CMakeLists.txt
@ -35,12 +35,10 @@ target_include_directories(cxxabi SYSTEM BEFORE
 )
 target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
 target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
-target_link_libraries(cxxabi PUBLIC ${EXCEPTION_HANDLING_LIBRARY})
+target_link_libraries(cxxabi PUBLIC unwind)

 # Enable capturing stack traces for all exceptions.
-if (USE_UNWIND)
-    target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)
-endif ()
+target_compile_definitions(cxxabi PUBLIC -DSTD_EXCEPTION_HAS_STACK_TRACE=1)

 install(
    TARGETS cxxabi
--- a/docker/keeper/Dockerfile
+++ b/docker/keeper/Dockerfile
@ -32,7 +32,7 @@ RUN arch=${TARGETARCH:-amd64} \
    esac

 ARG REPOSITORY="https://s3.amazonaws.com/clickhouse-builds/22.4/31c367d3cd3aefd316778601ff6565119fe36682/package_release"
-ARG VERSION="23.6.1.1524"
+ARG VERSION="23.6.2.18"
 ARG PACKAGES="clickhouse-keeper"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -49,8 +49,8 @@ ENV CARGO_HOME=/rust/cargo
 ENV PATH="/rust/cargo/bin:${PATH}"
 RUN curl https://sh.rustup.rs -sSf | bash -s -- -y && \
    chmod 777 -R /rust && \
-    rustup toolchain install nightly && \
-    rustup default nightly && \
+    rustup toolchain install nightly-2023-07-04 && \
+    rustup default nightly-2023-07-04 && \
    rustup component add rust-src && \
    rustup target add aarch64-unknown-linux-gnu && \
    rustup target add x86_64-apple-darwin && \
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -33,7 +33,7 @@ RUN arch=${TARGETARCH:-amd64} \
 # lts / testing / prestable / etc
 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="https://packages.clickhouse.com/tgz/${REPO_CHANNEL}"
-ARG VERSION="23.6.1.1524"
+ARG VERSION="23.6.2.18"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # user/group precreated explicitly with fixed uid/gid on purpose.
--- a/docker/server/Dockerfile.ubuntu
+++ b/docker/server/Dockerfile.ubuntu
@ -23,7 +23,7 @@ RUN sed -i "s|http://archive.ubuntu.com|${apt_archive}|g" /etc/apt/sources.list

 ARG REPO_CHANNEL="stable"
 ARG REPOSITORY="deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb ${REPO_CHANNEL} main"
-ARG VERSION="23.6.1.1524"
+ARG VERSION="23.6.2.18"
 ARG PACKAGES="clickhouse-client clickhouse-server clickhouse-common-static"

 # set non-empty deb_location_url url to create a docker image
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -166,7 +166,6 @@ function run_cmake
        "-DENABLE_UTILS=0"
        "-DENABLE_EMBEDDED_COMPILER=0"
        "-DENABLE_THINLTO=0"
-        "-DUSE_UNWIND=1"
        "-DENABLE_NURAFT=1"
        "-DENABLE_SIMDJSON=1"
        "-DENABLE_JEMALLOC=1"
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -291,7 +291,7 @@ quit
    if [ "$server_died" == 1 ]
    then
        # The server has died.
-        if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*' server.log > description.txt
+        if ! rg --text -o 'Received signal.*|Logical error.*|Assertion.*failed|Failed assertion.*|.*runtime error: .*|.*is located.*|(SUMMARY|ERROR): [a-zA-Z]+Sanitizer:.*|.*_LIBCPP_ASSERT.*|.*Child process was terminated by signal 9.*' server.log > description.txt
        then
            echo "Lost connection to server. See the logs." > description.txt
        fi
--- a/docker/test/sqllogic/run.sh
+++ b/docker/test/sqllogic/run.sh
@ -92,8 +92,8 @@ sudo clickhouse stop ||:

 for _ in $(seq 1 60); do if [[ $(wget --timeout=1 -q 'localhost:8123' -O-) == 'Ok.' ]]; then sleep 1 ; else break; fi ; done

-grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
-pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
+rg -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
+zstd < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.zst &

 # Compressed (FIXME: remove once only github actions will be left)
 rm /var/log/clickhouse-server/clickhouse-server.log
--- a/docker/test/stateless/Dockerfile
+++ b/docker/test/stateless/Dockerfile
@ -33,7 +33,6 @@ RUN apt-get update -y \
            qemu-user-static \
            sqlite3 \
            sudo \
-            telnet \
            tree \
            unixodbc \
            wget \
--- a/docker/test/stress/Dockerfile
+++ b/docker/test/stress/Dockerfile
@ -8,8 +8,6 @@ RUN apt-get update -y \
        apt-get install --yes --no-install-recommends \
            bash \
            tzdata \
-            fakeroot \
-            debhelper \
            parallel \
            expect \
            python3 \
@ -20,7 +18,6 @@ RUN apt-get update -y \
            sudo \
            openssl \
            netcat-openbsd \
-            telnet \
            brotli \
    && apt-get clean

--- a/docker/test/upgrade/Dockerfile
+++ b/docker/test/upgrade/Dockerfile
@ -8,8 +8,6 @@ RUN apt-get update -y \
        apt-get install --yes --no-install-recommends \
            bash \
            tzdata \
-            fakeroot \
-            debhelper \
            parallel \
            expect \
            python3 \
@ -20,7 +18,6 @@ RUN apt-get update -y \
            sudo \
            openssl \
            netcat-openbsd \
-            telnet \
            brotli \
    && apt-get clean

--- a/docker/test/util/Dockerfile
+++ b/docker/test/util/Dockerfile
@ -44,7 +44,6 @@ RUN apt-get update \
        clang-${LLVM_VERSION} \
        clang-tidy-${LLVM_VERSION} \
        cmake \
-        fakeroot \
        gdb \
        git \
        gperf \
--- a/docs/_includes/install/universal.sh
+++ b/docs/_includes/install/universal.sh
@ -33,6 +33,9 @@ then
    elif [ "${ARCH}" = "powerpc64le" -o "${ARCH}" = "ppc64le" ]
    then
        DIR="powerpc64le"
+    elif [ "${ARCH}" = "riscv64" ]
+    then
+        DIR="riscv64"
    fi
 elif [ "${OS}" = "FreeBSD" ]
 then
--- a/docs/changelogs/v22.8.20.11-lts.md
+++ b/docs/changelogs/v22.8.20.11-lts.md
@ -0,0 +1,20 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.8.20.11-lts (c9ca79e24e8) FIXME as compared to v22.8.19.10-lts (989bc2fe8b0)
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix broken index analysis when binary operator contains a null constant argument [#50177](https://github.com/ClickHouse/ClickHouse/pull/50177) ([Amos Bird](https://github.com/amosbird)).
+* Fix incorrect constant folding [#50536](https://github.com/ClickHouse/ClickHouse/pull/50536) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix fuzzer failure in ActionsDAG [#51301](https://github.com/ClickHouse/ClickHouse/pull/51301) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix segfault in MathUnary [#51499](https://github.com/ClickHouse/ClickHouse/pull/51499) ([Ilya Yatsishin](https://github.com/qoega)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Decoupled commits from [#51180](https://github.com/ClickHouse/ClickHouse/issues/51180) for backports [#51561](https://github.com/ClickHouse/ClickHouse/pull/51561) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/changelogs/v23.6.2.18-stable.md
+++ b/docs/changelogs/v23.6.2.18-stable.md
@ -0,0 +1,25 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v23.6.2.18-stable (89f39a7ccfe) FIXME as compared to v23.6.1.1524-stable (d1c7e13d088)
+
+#### Build/Testing/Packaging Improvement
+* Backported in [#51888](https://github.com/ClickHouse/ClickHouse/issues/51888): Update cargo dependencies. [#51721](https://github.com/ClickHouse/ClickHouse/pull/51721) ([Raúl Marín](https://github.com/Algunenano)).
+
+#### Bug Fix (user-visible misbehavior in an official stable release)
+
+* Fix reading from empty column in `parseSipHashKey` [#51804](https://github.com/ClickHouse/ClickHouse/pull/51804) ([Nikita Taranov](https://github.com/nickitat)).
+* Allow parametric UDFs [#51964](https://github.com/ClickHouse/ClickHouse/pull/51964) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Remove the usage of Analyzer setting in the client [#51578](https://github.com/ClickHouse/ClickHouse/pull/51578) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
+* Fix 02116_tuple_element with Analyzer [#51669](https://github.com/ClickHouse/ClickHouse/pull/51669) ([Robert Schulze](https://github.com/rschu1ze)).
+* Fix SQLLogic docker images [#51719](https://github.com/ClickHouse/ClickHouse/pull/51719) ([Antonio Andelic](https://github.com/antonio2368)).
+* Fix source image for sqllogic [#51728](https://github.com/ClickHouse/ClickHouse/pull/51728) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+* Pin for docker-ce [#51743](https://github.com/ClickHouse/ClickHouse/pull/51743) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
--- a/docs/en/development/build-cross-riscv.md
+++ b/docs/en/development/build-cross-riscv.md
@ -23,7 +23,7 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"
 ``` bash
 cd ClickHouse
 mkdir build-riscv64
-CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF  -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DUSE_UNWIND=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
+CC=clang-16 CXX=clang++-16 cmake . -Bbuild-riscv64 -G Ninja -DCMAKE_TOOLCHAIN_FILE=cmake/linux/toolchain-riscv64.cmake -DGLIBC_COMPATIBILITY=OFF -DENABLE_LDAP=OFF  -DOPENSSL_NO_ASM=ON -DENABLE_JEMALLOC=ON -DENABLE_PARQUET=OFF -DENABLE_GRPC=OFF -DENABLE_HDFS=OFF -DENABLE_MYSQL=OFF
 ninja -C build-riscv64
 ```

--- a/docs/en/engines/table-engines/integrations/mongodb.md
+++ b/docs/en/engines/table-engines/integrations/mongodb.md
@ -33,6 +33,15 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name

 - `options` — MongoDB connection string options (optional parameter).

+:::tip
+If you are using the MongoDB Atlas cloud offering please add these options:
+
+```
+'connectTimeoutMS=10000&ssl=true&authSource=admin'
+```
+
+:::
+
 ## Usage Example {#usage-example}

 Create a table in ClickHouse which allows to read data from MongoDB collection:
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -37,8 +37,8 @@ The [Merge](/docs/en/engines/table-engines/special/merge.md/#merge) engine does
 ``` sql
 CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 (
-    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
-    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr1] [TTL expr1] [CODEC(codec1)] [[NOT] NULL|PRIMARY KEY],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS|EPHEMERAL expr2] [TTL expr2] [CODEC(codec2)] [[NOT] NULL|PRIMARY KEY],
    ...
    INDEX index_name1 expr1 TYPE type1(...) [GRANULARITY value1],
    INDEX index_name2 expr2 TYPE type2(...) [GRANULARITY value2],
@ -439,41 +439,41 @@ Syntax: `ngrambf_v1(n, size_of_bloom_filter_in_bytes, number_of_hash_functions,
 - `number_of_hash_functions` — The number of hash functions used in the Bloom filter.
 - `random_seed` — The seed for Bloom filter hash functions.

-Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows:  
+Users can create [UDF](/docs/en/sql-reference/statements/create/function.md) to estimate the parameters set of `ngrambf_v1`. Query statements are as follows:

 ```sql
-CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster]   
-AS  
-(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2));   
-  
-CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster]   
-AS  
-(total_nubmer_of_all_grams,  probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));  
-    
-CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster]  
-AS   
-(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions);  
-  
-CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster]   
-AS  
+CREATE FUNCTION bfEstimateFunctions [ON CLUSTER cluster]
+AS
+(total_nubmer_of_all_grams, size_of_bloom_filter_in_bits) -> round((size_of_bloom_filter_in_bits / total_nubmer_of_all_grams) * log(2));
+
+CREATE FUNCTION bfEstimateBmSize [ON CLUSTER cluster]
+AS
+(total_nubmer_of_all_grams,  probability_of_false_positives) -> ceil((total_nubmer_of_all_grams * log(probability_of_false_positives)) / log(1 / pow(2, log(2))));
+
+CREATE FUNCTION bfEstimateFalsePositive [ON CLUSTER cluster]
+AS
+(total_nubmer_of_all_grams, number_of_hash_functions, size_of_bloom_filter_in_bytes) -> pow(1 - exp(-number_of_hash_functions/ (size_of_bloom_filter_in_bytes / total_nubmer_of_all_grams)), number_of_hash_functions);
+
+CREATE FUNCTION bfEstimateGramNumber [ON CLUSTER cluster]
+AS
 (number_of_hash_functions, probability_of_false_positives, size_of_bloom_filter_in_bytes) -> ceil(size_of_bloom_filter_in_bytes / (-number_of_hash_functions / log(1 - exp(log(probability_of_false_positives) / number_of_hash_functions))))

-```  
+```
 To use those functions,we need to specify two parameter at least.
-For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries:   
-  
+For example, if there 4300 ngrams in the granule and we expect false positives to be less than 0.0001. The other parameters can be estimated by executing following queries:
+

 ```sql
 --- estimate number of bits in the filter
-SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes;  
+SELECT bfEstimateBmSize(4300, 0.0001) / 8 as size_of_bloom_filter_in_bytes;

 ┌─size_of_bloom_filter_in_bytes─┐
 │                         10304 │
 └───────────────────────────────┘
-  
+
 --- estimate number of hash functions
 SELECT bfEstimateFunctions(4300, bfEstimateBmSize(4300, 0.0001)) as number_of_hash_functions
-  
+
 ┌─number_of_hash_functions─┐
 │                       13 │
 └──────────────────────────┘
@ -991,7 +991,7 @@ use a local disk to cache data from a table stored at a URL. Neither the cache d
 nor the web storage is configured in the ClickHouse configuration files; both are
 configured in the CREATE/ATTACH query settings.

-In the settings highlighted below notice that the disk of `type=web` is nested within 
+In the settings highlighted below notice that the disk of `type=web` is nested within
 the disk of `type=cache`.

 ```sql
@ -1308,7 +1308,7 @@ configuration file.
 In this sample configuration:
 - the disk is of type `web`
 - the data is hosted at `http://nginx:80/test1/`
- a cache on local storage is used 
+- a cache on local storage is used

 ```xml
 <clickhouse>
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -471,6 +471,7 @@ The CSV format supports the output of totals and extremes the same way as `TabSe
 - [input_format_csv_skip_trailing_empty_lines](/docs/en/operations/settings/settings-formats.md/#input_format_csv_skip_trailing_empty_lines) - skip trailing empty lines at the end of data. Default value - `false`.
 - [input_format_csv_trim_whitespaces](/docs/en/operations/settings/settings-formats.md/#input_format_csv_trim_whitespaces) - trim spaces and tabs in non-quoted CSV strings. Default value - `true`.
 - [input_format_csv_allow_whitespace_or_tab_as_delimiter](/docs/en/operations/settings/settings-formats.md/# input_format_csv_allow_whitespace_or_tab_as_delimiter) - Allow to use whitespace or tab as field delimiter in CSV strings. Default value - `false`.
+- [input_format_csv_allow_variable_number_of_columns](/docs/en/operations/settings/settings-formats.md/#input_format_csv_allow_variable_number_of_columns) - ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values. Default value - `false`.

 ## CSVWithNames {#csvwithnames}

--- a/docs/en/operations/settings/settings-formats.md
+++ b/docs/en/operations/settings/settings-formats.md
@ -931,6 +931,11 @@ Result
 ```text
 "  string  "
 ```
+### input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
+
+ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values.
+
+Disabled by default.

 ### input_format_csv_allow_whitespace_or_tab_as_delimiter {#input_format_csv_allow_whitespace_or_tab_as_delimiter}

--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -17,7 +17,8 @@ Default value: 0.
 **Example**

 ``` sql
-insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
+INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
+SELECT * FROM table_1;
 ```
 ```response
 ┌─x─┬─y────┐
@ -30,7 +31,7 @@ insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
 ```sql
 SELECT *
 FROM table_1
-SETTINGS additional_table_filters = (('table_1', 'x != 2'))
+SETTINGS additional_table_filters = {'table_1': 'x != 2'}
 ```
 ```response
 ┌─x─┬─y────┐
@ -50,7 +51,8 @@ Default value: `''`.
 **Example**

 ``` sql
-insert into table_1 values (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
+INSERT INTO table_1 VALUES (1, 'a'), (2, 'bb'), (3, 'ccc'), (4, 'dddd');
+SElECT * FROM table_1;
 ```
 ```response
 ┌─x─┬─y────┐
@ -3535,7 +3537,7 @@ Possible values:
 - Any positive integer.
 - 0 - Disabled (infinite timeout).

-Default value: 180.
+Default value: 30.

 ## http_receive_timeout {#http_receive_timeout}

@ -3546,7 +3548,7 @@ Possible values:
 - Any positive integer.
 - 0 - Disabled (infinite timeout).

-Default value: 180.
+Default value: 30.

 ## check_query_single_value_result {#check_query_single_value_result}

--- a/docs/en/operations/system-tables/jemalloc_bins.md
+++ b/docs/en/operations/system-tables/jemalloc_bins.md
@ -0,0 +1,45 @@
+---
+slug: /en/operations/system-tables/jemalloc_bins
+---
+# jemalloc_bins
+
+Contains information about memory allocations done via jemalloc allocator in different size classes (bins) aggregated from all arenas.
+These statistics might not be absolutely accurate because of thread local caching in jemalloc.
+
+Columns:
+
+- `index` (UInt64) — Index of the bin ordered by size
+- `large` (Bool) — True for large allocations and False for small
+- `size` (UInt64) — Size of allocations in this bin
+- `allocations` (UInt64) — Number of allocations
+- `deallocations` (UInt64) — Number of deallocations
+
+**Example**
+
+Find the sizes of allocations that contributed the most to the current overall memory usage.
+
+``` sql
+SELECT
+    *,
+    allocations - deallocations AS active_allocations,
+    size * active_allocations AS allocated_bytes
+FROM system.jemalloc_bins
+WHERE allocated_bytes > 0
+ORDER BY allocated_bytes DESC
+LIMIT 10
+```
+
+``` text
+┌─index─┬─large─┬─────size─┬─allocactions─┬─deallocations─┬─active_allocations─┬─allocated_bytes─┐
+│    82 │     1 │ 50331648 │            1 │             0 │                  1 │        50331648 │
+│    10 │     0 │      192 │       512336 │        370710 │             141626 │        27192192 │
+│    69 │     1 │  5242880 │            6 │             2 │                  4 │        20971520 │
+│     3 │     0 │       48 │     16938224 │      16559484 │             378740 │        18179520 │
+│    28 │     0 │     4096 │       122924 │        119142 │               3782 │        15491072 │
+│    61 │     1 │  1310720 │        44569 │         44558 │                 11 │        14417920 │
+│    39 │     1 │    28672 │         1285 │           913 │                372 │        10665984 │
+│     4 │     0 │       64 │      2837225 │       2680568 │             156657 │        10026048 │
+│     6 │     0 │       96 │      2617803 │       2531435 │              86368 │         8291328 │
+│    36 │     1 │    16384 │        22431 │         21970 │                461 │         7553024 │
+└───────┴───────┴──────────┴──────────────┴───────────────┴────────────────────┴─────────────────┘
+```
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -718,7 +718,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d

 ## age

-Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 second.
+Returns the `unit` component of the difference between `startdate` and `enddate`. The difference is calculated using a precision of 1 microsecond.
 E.g. the difference between `2021-12-29` and `2022-01-01` is 3 days for `day` unit, 0 months for `month` unit, 0 years for `year` unit.

 For an alternative to `age`, see function `date\_diff`.
@ -734,6 +734,8 @@ age('unit', startdate, enddate, [timezone])
 - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
    Possible values:

+    - `microsecond` (possible abbreviations: `us`, `u`)
+    - `millisecond` (possible abbreviations: `ms`)
    - `second` (possible abbreviations: `ss`, `s`)
    - `minute` (possible abbreviations: `mi`, `n`)
    - `hour` (possible abbreviations: `hh`, `h`)
@ -809,6 +811,8 @@ Aliases: `dateDiff`, `DATE_DIFF`, `timestampDiff`, `timestamp_diff`, `TIMESTAMP_
 - `unit` — The type of interval for result. [String](../../sql-reference/data-types/string.md).
    Possible values:

+    - `microsecond` (possible abbreviations: `us`, `u`)
+    - `millisecond` (possible abbreviations: `ms`)
    - `second` (possible abbreviations: `ss`, `s`)
    - `minute` (possible abbreviations: `mi`, `n`)
    - `hour` (possible abbreviations: `hh`, `h`)
--- a/docs/en/sql-reference/functions/udf.md
+++ b/docs/en/sql-reference/functions/udf.md
@ -171,12 +171,13 @@ Result:
 └──────────────────────────────┘
 ```

-Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type).
+Executable user defined functions can take constant parameters configured in `command` setting (works only for user defined functions with `executable` type). It also requires the `execute_direct` option (to ensure no shell argument expansion vulnerability).
 File `test_function_parameter_python.xml` (`/etc/clickhouse-server/test_function_parameter_python.xml` with default path settings).
 ```xml
 <functions>
    <function>
        <type>executable</type>
+        <execute_direct>true</execute_direct>
        <name>test_function_parameter_python</name>
        <return_type>String</return_type>
        <argument>
--- a/docs/en/sql-reference/statements/alter/sample-by.md
+++ b/docs/en/sql-reference/statements/alter/sample-by.md
@ -5,15 +5,28 @@ sidebar_label: SAMPLE BY
 title: "Manipulating Sampling-Key Expressions"
 ---

-Syntax:
+# Manipulating SAMPLE BY expression
+
+The following operations are available:
+
+## MODIFY

 ``` sql
 ALTER TABLE [db].name [ON CLUSTER cluster] MODIFY SAMPLE BY new_expression
 ```

-The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions).
+The command changes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table to `new_expression` (an expression or a tuple of expressions). The primary key must contain the new sample key.

-The command is lightweight in the sense that it only changes metadata. The primary key must contain the new sample key.
+## REMOVE
+
+``` sql
+ALTER TABLE [db].name [ON CLUSTER cluster] REMOVE SAMPLE BY
+```
+
+The command removes the [sampling key](../../../engines/table-engines/mergetree-family/mergetree.md) of the table.
+
+
+The commands `MODIFY` and `REMOVE` are lightweight in the sense that they only change metadata or remove files.

 :::note    
 It only works for tables in the [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md) family (including [replicated](../../../engines/table-engines/mergetree-family/replication.md) tables).
--- a/docs/en/sql-reference/table-functions/mongodb.md
+++ b/docs/en/sql-reference/table-functions/mongodb.md
@ -30,6 +30,14 @@ mongodb(host:port, database, collection, user, password, structure [, options])

 - `options` - MongoDB connection string options (optional parameter).

+:::tip
+If you are using the MongoDB Atlas cloud offering please add these options:
+
+```
+'connectTimeoutMS=10000&ssl=true&authSource=admin'
+```
+
+:::

 **Returned Value**

--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@ -401,8 +401,8 @@ $ clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FOR
 - [output_format_csv_crlf_end_of_line](../operations/settings/settings.md#output_format_csv_crlf_end_of_line) - если установлено значение true, конец строки в формате вывода CSV будет `\r\n` вместо `\n`. Значение по умолчанию - `false`.
 - [input_format_csv_skip_first_lines](../operations/settings/settings.md#input_format_csv_skip_first_lines) - пропустить указанное количество строк в начале данных. Значение по умолчанию - `0`.
 - [input_format_csv_detect_header](../operations/settings/settings.md#input_format_csv_detect_header) - обнаружить заголовок с именами и типами в формате CSV. Значение по умолчанию - `true`.
- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек.
-Значение по умолчанию - `true`.
+- [input_format_csv_trim_whitespaces](../operations/settings/settings.md#input_format_csv_trim_whitespaces) - удалить пробелы и символы табуляции из строк без кавычек. Значение по умолчанию - `true`.
+- [input_format_csv_allow_variable_number_of_columns](../operations/settings/settings.md/#input_format_csv_allow_variable_number_of_columns) - игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию. Значение по умолчанию - `false`.

 ## CSVWithNames {#csvwithnames}

--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -1686,7 +1686,7 @@ SELECT * FROM table_with_enum_column_for_csv_insert;
 ## input_format_csv_detect_header {#input_format_csv_detect_header}

 Обнаружить заголовок с именами и типами в формате CSV.
- 
+
 Значение по умолчанию - `true`.

 ## input_format_csv_skip_first_lines {#input_format_csv_skip_first_lines}
@ -1727,6 +1727,12 @@ echo '  string  ' | ./clickhouse local -q  "select * from table FORMAT CSV" --in
 "  string  "
 ```

+## input_format_csv_allow_variable_number_of_columns {#input_format_csv_allow_variable_number_of_columns}
+
+Игнорировать дополнительные столбцы (если файл содержит больше столбцов чем ожидается) и рассматривать отсутствующие поля в CSV в качестве значений по умолчанию.
+
+Выключено по умолчанию.
+
 ## output_format_tsv_crlf_end_of_line {#settings-output-format-tsv-crlf-end-of-line}

 Использовать в качестве разделителя строк для TSV формата CRLF (DOC/Windows стиль) вместо LF (Unix стиль).
--- a/docs/ru/sql-reference/functions/date-time-functions.md
+++ b/docs/ru/sql-reference/functions/date-time-functions.md
@ -621,7 +621,7 @@ SELECT toDate('2016-12-27') AS date, toYearWeek(date) AS yearWeek0, toYearWeek(d

 ## age

-Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 секунду.
+Вычисляет компонент `unit` разницы между `startdate` и `enddate`. Разница вычисляется с точностью в 1 микросекунду.
 Например, разница между `2021-12-29` и `2022-01-01` 3 дня для единицы `day`, 0 месяцев для единицы `month`, 0 лет для единицы `year`.

 **Синтаксис**
@ -635,6 +635,8 @@ age('unit', startdate, enddate, [timezone])
 -   `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
    Возможные значения:

+    - `microsecond` (возможные сокращения: `us`, `u`)
+    - `millisecond` (возможные сокращения: `ms`)
    - `second` (возможные сокращения: `ss`, `s`)
    - `minute` (возможные сокращения: `mi`, `n`)
    - `hour` (возможные сокращения: `hh`, `h`)
@ -708,6 +710,8 @@ date_diff('unit', startdate, enddate, [timezone])
 -   `unit` — единица измерения времени, в которой будет выражено возвращаемое значение функции. [String](../../sql-reference/data-types/string.md).
    Возможные значения:

+    - `microsecond` (возможные сокращения: `us`, `u`)
+    - `millisecond` (возможные сокращения: `ms`)
    - `second` (возможные сокращения: `ss`, `s`)
    - `minute` (возможные сокращения: `mi`, `n`)
    - `hour` (возможные сокращения: `hh`, `h`)
--- a/docs/zh/development/build.md
+++ b/docs/zh/development/build.md
@ -3,13 +3,6 @@ slug: /zh/development/build
 ---
 # 如何构建 ClickHouse 发布包 {#ru-he-gou-jian-clickhouse-fa-bu-bao}

-## 安装 Git 和 Pbuilder {#an-zhuang-git-he-pbuilder}
-
-``` bash
-sudo apt-get update
-sudo apt-get install git pbuilder debhelper lsb-release fakeroot sudo debian-archive-keyring debian-keyring
-```
-
 ## 拉取 ClickHouse 源码 {#la-qu-clickhouse-yuan-ma}

 ``` bash
--- a/docs/zh/sql-reference/functions/date-time-functions.md
+++ b/docs/zh/sql-reference/functions/date-time-functions.md
@ -643,6 +643,8 @@ date_diff('unit', startdate, enddate, [timezone])
 -   `unit` — `value`对应的时间单位。类型为[String](../../sql-reference/data-types/string.md)。
    可能的值：

+    - `microsecond`
+    - `millisecond`
    - `second`
    - `minute`
    - `hour`
--- a/programs/diagnostics/internal/platform/data/file_test.go
+++ b/programs/diagnostics/internal/platform/data/file_test.go
@ -135,7 +135,7 @@ func TestConfigFileFrameCopy(t *testing.T) {
 		sizes := map[string]int64{
 			"users.xml":            int64(2017),
 			"default-password.xml": int64(188),
-			"config.xml":           int64(61662),
+			"config.xml":           int64(59506),
 			"server-include.xml":   int64(168),
 			"user-include.xml":     int64(559),
 		}
@ -189,7 +189,7 @@ func TestConfigFileFrameCopy(t *testing.T) {
 		sizes := map[string]int64{
 			"users.yaml":            int64(1023),
 			"default-password.yaml": int64(132),
-			"config.yaml":           int64(42512),
+			"config.yaml":           int64(41633),
 			"server-include.yaml":   int64(21),
 			"user-include.yaml":     int64(120),
 		}
--- a/programs/diagnostics/testdata/configs/xml/config.xml
+++ b/programs/diagnostics/testdata/configs/xml/config.xml
@ -649,73 +649,6 @@
                </replica>
            </shard>
        </test_shard_localhost>
-        <test_cluster_two_shards_localhost>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards_localhost>
-        <test_cluster_two_shards>
-            <shard>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards>
-        <test_cluster_two_shards_internal_replication>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards_internal_replication>
-        <test_shard_localhost_secure>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9440</port>
-                    <secure>1</secure>
-                </replica>
-            </shard>
-        </test_shard_localhost_secure>
-        <test_unavailable_shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>1</port>
-                </replica>
-            </shard>
-        </test_unavailable_shard>
    </remote_servers>

    <!-- The list of hosts allowed to use in URL-related storage engines and table functions.
--- a/programs/diagnostics/testdata/configs/yaml/config.yaml
+++ b/programs/diagnostics/testdata/configs/yaml/config.yaml
@ -547,46 +547,6 @@ remote_servers:
        port: 9000
        # Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority).
        # priority: 1
-  test_cluster_two_shards_localhost:
-    shard:
-      - replica:
-          host: localhost
-          port: 9000
-      - replica:
-          host: localhost
-          port: 9000
-  test_cluster_two_shards:
-    shard:
-      - replica:
-          host: 127.0.0.1
-          port: 9000
-      - replica:
-          host: 127.0.0.2
-          port: 9000
-  test_cluster_two_shards_internal_replication:
-    shard:
-      - internal_replication: true
-        replica:
-          host: 127.0.0.1
-          port: 9000
-      - internal_replication: true
-        replica:
-          host: 127.0.0.2
-          port: 9000
-  test_shard_localhost_secure:
-    shard:
-      replica:
-        host: localhost
-        port: 9440
-        secure: 1
-  test_unavailable_shard:
-    shard:
-      - replica:
-          host: localhost
-          port: 9000
-      - replica:
-          host: localhost
-          port: 1

 # The list of hosts allowed to use in URL-related storage engines and table functions.
 # If this section is not present in configuration, all hosts are allowed.
--- a/programs/diagnostics/testdata/configs/yandex_xml/config.xml
+++ b/programs/diagnostics/testdata/configs/yandex_xml/config.xml
@ -649,73 +649,6 @@
                </replica>
            </shard>
        </test_shard_localhost>
-        <test_cluster_two_shards_localhost>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards_localhost>
-        <test_cluster_two_shards>
-            <shard>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards>
-        <test_cluster_two_shards_internal_replication>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards_internal_replication>
-        <test_shard_localhost_secure>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9440</port>
-                    <secure>1</secure>
-                </replica>
-            </shard>
-        </test_shard_localhost_secure>
-        <test_unavailable_shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>1</port>
-                </replica>
-            </shard>
-        </test_unavailable_shard>
    </remote_servers>

    <!-- The list of hosts allowed to use in URL-related storage engines and table functions.
--- a/programs/disks/CommandCopy.cpp
+++ b/programs/disks/CommandCopy.cpp
@ -59,7 +59,7 @@ public:
        String relative_path_from = validatePathAndGetAsRelative(path_from);
        String relative_path_to = validatePathAndGetAsRelative(path_to);

-        disk_from->copy(relative_path_from, disk_to, relative_path_to);
+        disk_from->copyDirectoryContent(relative_path_from, disk_to, relative_path_to);
    }
 };
 }
--- a/programs/keeper-converter/KeeperConverter.cpp
+++ b/programs/keeper-converter/KeeperConverter.cpp
@ -42,7 +42,7 @@ int mainEntryClickHouseKeeperConverter(int argc, char ** argv)
    {
        auto keeper_context = std::make_shared<KeeperContext>(true);
        keeper_context->setDigestEnabled(true);
-        keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>(), 0));
+        keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("Keeper-snapshots", options["output-dir"].as<std::string>()));

        DB::KeeperStorage storage(/* tick_time_ms */ 500, /* superdigest */ "", keeper_context, /* initialize_system_nodes */ false);

--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -485,7 +485,7 @@ try
            LOG_INFO(log, "Closed all listening sockets.");

        if (current_connections > 0)
-            current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5));
+            current_connections = waitServersToFinish(*servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5));

        if (current_connections)
            LOG_INFO(log, "Closed connections to Keeper. But {} remain. Probably some users cannot finish their connections after context shutdown.", current_connections);
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -75,6 +75,15 @@ namespace ErrorCodes
    extern const int FILE_ALREADY_EXISTS;
 }

+void applySettingsOverridesForLocal(ContextMutablePtr context)
+{
+    Settings settings = context->getSettings();
+
+    settings.allow_introspection_functions = true;
+    settings.storage_file_read_method = LocalFSReadMethod::mmap;
+
+    context->setSettings(settings);
+}

 void LocalServer::processError(const String &) const
 {
@ -668,6 +677,12 @@ void LocalServer::processConfig()
    CompiledExpressionCacheFactory::instance().init(compiled_expression_cache_size, compiled_expression_cache_elements_size);
 #endif

+    /// NOTE: it is important to apply any overrides before
+    /// setDefaultProfiles() calls since it will copy current context (i.e.
+    /// there is separate context for Buffer tables).
+    applySettingsOverridesForLocal(global_context);
+    applyCmdOptions(global_context);
+
    /// Load global settings from default_profile and system_profile.
    global_context->setDefaultProfiles(config());

@ -682,7 +697,6 @@ void LocalServer::processConfig()
    std::string default_database = config().getString("default_database", "_local");
    DatabaseCatalog::instance().attachDatabase(default_database, createClickHouseLocalDatabaseOverlay(default_database, global_context));
    global_context->setCurrentDatabase(default_database);
-    applyCmdOptions(global_context);

    if (config().has("path"))
    {
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -1146,7 +1146,16 @@ try
            size_t merges_mutations_memory_usage_soft_limit = server_settings_.merges_mutations_memory_usage_soft_limit;

            size_t default_merges_mutations_server_memory_usage = static_cast<size_t>(memory_amount * server_settings_.merges_mutations_memory_usage_to_ram_ratio);
-            if (merges_mutations_memory_usage_soft_limit == 0 || merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage)
+            if (merges_mutations_memory_usage_soft_limit == 0)
+            {
+                merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage;
+                LOG_INFO(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}"
+                    " ({} available * {:.2f} merges_mutations_memory_usage_to_ram_ratio)",
+                    formatReadableSizeWithBinarySuffix(merges_mutations_memory_usage_soft_limit),
+                    formatReadableSizeWithBinarySuffix(memory_amount),
+                    server_settings_.merges_mutations_memory_usage_to_ram_ratio);
+            }
+            else if (merges_mutations_memory_usage_soft_limit > default_merges_mutations_server_memory_usage)
            {
                merges_mutations_memory_usage_soft_limit = default_merges_mutations_server_memory_usage;
                LOG_WARNING(log, "Setting merges_mutations_memory_usage_soft_limit was set to {}"
@ -1523,7 +1532,7 @@ try
                LOG_INFO(log, "Closed all listening sockets.");

            if (current_connections > 0)
-                current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5));
+                current_connections = waitServersToFinish(servers_to_start_before_tables, servers_lock, config().getInt("shutdown_wait_unfinished", 5));

            if (current_connections)
                LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
@ -1581,6 +1590,15 @@ try
        /// After attaching system databases we can initialize system log.
        global_context->initializeSystemLogs();
        global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
+        /// Build loggers before tables startup to make log messages from tables
+        /// attach available in system.text_log
+        {
+            String level_str = config().getString("text_log.level", "");
+            int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
+            setTextLog(global_context->getTextLog(), level);
+
+            buildLoggers(config(), logger());
+        }
        /// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
        attachSystemTablesServer(global_context, *database_catalog.getSystemDatabase(), has_zookeeper);
        attachInformationSchema(global_context, *database_catalog.getDatabase(DatabaseCatalog::INFORMATION_SCHEMA));
@ -1609,7 +1627,7 @@ try

    /// Init trace collector only after trace_log system table was created
    /// Disable it if we collect test coverage information, because it will work extremely slow.
-#if USE_UNWIND && !WITH_COVERAGE
+#if !WITH_COVERAGE
    /// Profilers cannot work reliably with any other libunwind or without PHDR cache.
    if (hasPHDRCache())
    {
@ -1632,10 +1650,6 @@ try

    /// Describe multiple reasons when query profiler cannot work.

-#if !USE_UNWIND
-    LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they cannot work without bundled unwind (stack unwinding) library.");
-#endif
-
 #if WITH_COVERAGE
    LOG_INFO(log, "Query Profiler and TraceCollector are disabled because they work extremely slow with test coverage.");
 #endif
@ -1707,14 +1721,6 @@ try
        /// Must be done after initialization of `servers`, because async_metrics will access `servers` variable from its thread.
        async_metrics.start();

-        {
-            String level_str = config().getString("text_log.level", "");
-            int level = level_str.empty() ? INT_MAX : Poco::Logger::parseLevel(level_str);
-            setTextLog(global_context->getTextLog(), level);
-        }
-
-        buildLoggers(config(), logger());
-
        main_config_reloader->start();
        access_control.startPeriodicReloading();

@ -1827,7 +1833,7 @@ try
                global_context->getProcessList().killAllQueries();

            if (current_connections)
-                current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5));
+                current_connections = waitServersToFinish(servers, servers_lock, config().getInt("shutdown_wait_unfinished", 5));

            if (current_connections)
                LOG_WARNING(log, "Closed connections. But {} remain."
--- a/programs/server/config.d/more_clusters.xml
+++ b/programs/server/config.d/more_clusters.xml
@ -1,49 +0,0 @@
-<clickhouse>
-    <remote_servers>
-
-        <![CDATA[
-            You can run additional servers simply as
-             ./clickhouse-server -- --path=9001 --tcp_port=9001
-        ]]>
-
-        <single_remote_shard_at_port_9001>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9001</port>
-                </replica>
-            </shard>
-        </single_remote_shard_at_port_9001>
-
-        <two_remote_shards_at_port_9001_9002>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9001</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9002</port>
-                </replica>
-            </shard>
-        </two_remote_shards_at_port_9001_9002>
-
-        <two_shards_one_local_one_remote_at_port_9001>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9001</port>
-                </replica>
-            </shard>
-        </two_shards_one_local_one_remote_at_port_9001>
-
-    </remote_servers>
-</clickhouse>
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -805,7 +805,7 @@
      -->
    <remote_servers>
        <!-- Test only shard config for testing distributed storage -->
-        <test_shard_localhost>
+        <default>
            <!-- Inter-server per-cluster secret for Distributed queries
                 default: no secret (no authentication will be performed)

@ -838,158 +838,11 @@
                    <port>9000</port>
                    <!-- Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority). -->
                    <!-- <priority>1</priority> -->
+                    <!-- Use SSL? Default: no -->
+                    <!-- <secure>0</secure> -->
                </replica>
            </shard>
-        </test_shard_localhost>
-        <test_cluster_one_shard_three_replicas_localhost>
-            <shard>
-                <internal_replication>false</internal_replication>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>127.0.0.3</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <!--shard>
-                <internal_replication>false</internal_replication>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-                <replica>
-                    <host>127.0.0.3</host>
-                    <port>9000</port>
-                </replica>
-            </shard-->
-        </test_cluster_one_shard_three_replicas_localhost>
-	<parallel_replicas>
-		<shard>
-            <internal_replication>false</internal_replication>
-            <replica>
-                <host>127.0.0.1</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.2</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.3</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.4</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.5</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.6</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.7</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.8</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.9</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>127.0.0.10</host>
-                <port>9000</port>
-            </replica>
-            <!-- Unavailable replica -->
-            <replica>
-                <host>127.0.0.11</host>
-                <port>1234</port>
-            </replica>
-        </shard>
-	</parallel_replicas>
-        <test_cluster_two_shards_localhost>
-             <shard>
-                 <replica>
-                     <host>localhost</host>
-                     <port>9000</port>
-                 </replica>
-             </shard>
-             <shard>
-                 <replica>
-                     <host>localhost</host>
-                     <port>9000</port>
-                 </replica>
-             </shard>
-        </test_cluster_two_shards_localhost>
-        <test_cluster_two_shards>
-            <shard>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards>
-        <test_cluster_two_shards_internal_replication>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>127.0.0.1</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <internal_replication>true</internal_replication>
-                <replica>
-                    <host>127.0.0.2</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-        </test_cluster_two_shards_internal_replication>
-        <test_shard_localhost_secure>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9440</port>
-                    <secure>1</secure>
-                </replica>
-            </shard>
-        </test_shard_localhost_secure>
-        <test_unavailable_shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>9000</port>
-                </replica>
-            </shard>
-            <shard>
-                <replica>
-                    <host>localhost</host>
-                    <port>1</port>
-                </replica>
-            </shard>
-        </test_unavailable_shard>
+        </default>
    </remote_servers>

    <!-- The list of hosts allowed to use in URL-related storage engines and table functions.
--- a/programs/server/config.yaml.example
+++ b/programs/server/config.yaml.example
@ -515,7 +515,7 @@ remap_executable: false
 # https://clickhouse.com/docs/en/operations/table_engines/distributed/
 remote_servers:
    # Test only shard config for testing distributed storage
-    test_shard_localhost:
+    default:
        # Inter-server per-cluster secret for Distributed queries
        # default: no secret (no authentication will be performed)

@ -546,46 +546,8 @@ remote_servers:
                port: 9000
                # Optional. Priority of the replica for load_balancing. Default: 1 (less value has more priority).
                # priority: 1
-    test_cluster_two_shards_localhost:
-        shard:
-            - replica:
-                  host: localhost
-                  port: 9000
-            - replica:
-                  host: localhost
-                  port: 9000
-    test_cluster_two_shards:
-        shard:
-            - replica:
-                  host: 127.0.0.1
-                  port: 9000
-            - replica:
-                  host: 127.0.0.2
-                  port: 9000
-    test_cluster_two_shards_internal_replication:
-        shard:
-            - internal_replication: true
-              replica:
-                  host: 127.0.0.1
-                  port: 9000
-            - internal_replication: true
-              replica:
-                  host: 127.0.0.2
-                  port: 9000
-    test_shard_localhost_secure:
-        shard:
-            replica:
-                host: localhost
-                port: 9440
-                secure: 1
-    test_unavailable_shard:
-        shard:
-            - replica:
-                  host: localhost
-                  port: 9000
-            - replica:
-                  host: localhost
-                  port: 1
+                # Use SSL? Default: no
+                # secure: 0

 # The list of hosts allowed to use in URL-related storage engines and table functions.
 # If this section is not present in configuration, all hosts are allowed.
--- a/src/Access/Common/AccessType.h
+++ b/src/Access/Common/AccessType.h
@ -158,7 +158,6 @@ enum class AccessType
    M(SYSTEM_DROP_CACHE, "DROP CACHE", GROUP, SYSTEM) \
    M(SYSTEM_RELOAD_CONFIG, "RELOAD CONFIG", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_USERS, "RELOAD USERS", GLOBAL, SYSTEM_RELOAD) \
-    M(SYSTEM_RELOAD_SYMBOLS, "RELOAD SYMBOLS", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_DICTIONARY, "SYSTEM RELOAD DICTIONARIES, RELOAD DICTIONARY, RELOAD DICTIONARIES", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_MODEL, "SYSTEM RELOAD MODELS, RELOAD MODEL, RELOAD MODELS", GLOBAL, SYSTEM_RELOAD) \
    M(SYSTEM_RELOAD_FUNCTION, "SYSTEM RELOAD FUNCTIONS, RELOAD FUNCTION, RELOAD FUNCTIONS", GLOBAL, SYSTEM_RELOAD) \
--- a/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
+++ b/src/AggregateFunctions/AggregateFunctionMinMaxAny.h
@ -51,7 +51,8 @@ private:
    T value = T{};

 public:
-    static constexpr bool is_nullable = false;
+    static constexpr bool result_is_nullable = false;
+    static constexpr bool should_skip_null_arguments = true;
    static constexpr bool is_any = false;

    bool has() const
@ -501,7 +502,8 @@ private:
    char small_data[MAX_SMALL_STRING_SIZE]; /// Including the terminating zero.

 public:
-    static constexpr bool is_nullable = false;
+    static constexpr bool result_is_nullable = false;
+    static constexpr bool should_skip_null_arguments = true;
    static constexpr bool is_any = false;

    bool has() const
@ -769,7 +771,7 @@ static_assert(


 /// For any other value types.
-template <bool IS_NULLABLE = false>
+template <bool RESULT_IS_NULLABLE = false>
 struct SingleValueDataGeneric
 {
 private:
@ -779,12 +781,13 @@ private:
    bool has_value = false;

 public:
-    static constexpr bool is_nullable = IS_NULLABLE;
+    static constexpr bool result_is_nullable = RESULT_IS_NULLABLE;
+    static constexpr bool should_skip_null_arguments = !RESULT_IS_NULLABLE;
    static constexpr bool is_any = false;

    bool has() const
    {
-        if constexpr (is_nullable)
+        if constexpr (result_is_nullable)
            return has_value;
        return !value.isNull();
    }
@ -820,14 +823,14 @@ public:
    void change(const IColumn & column, size_t row_num, Arena *)
    {
        column.get(row_num, value);
-        if constexpr (is_nullable)
+        if constexpr (result_is_nullable)
            has_value = true;
    }

    void change(const Self & to, Arena *)
    {
        value = to.value;
-        if constexpr (is_nullable)
+        if constexpr (result_is_nullable)
            has_value = true;
    }

@ -844,7 +847,7 @@ public:

    bool changeFirstTime(const Self & to, Arena * arena)
    {
-        if (!has() && (is_nullable || to.has()))
+        if (!has() && (result_is_nullable || to.has()))
        {
            change(to, arena);
            return true;
@ -879,7 +882,7 @@ public:
        }
        else
        {
-            if constexpr (is_nullable)
+            if constexpr (result_is_nullable)
            {
                Field new_value;
                column.get(row_num, new_value);
@ -910,7 +913,7 @@ public:
    {
        if (!to.has())
            return false;
-        if constexpr (is_nullable)
+        if constexpr (result_is_nullable)
        {
            if (!has())
            {
@ -945,7 +948,7 @@ public:
        }
        else
        {
-            if constexpr (is_nullable)
+            if constexpr (result_is_nullable)
            {
                Field new_value;
                column.get(row_num, new_value);
@ -975,7 +978,7 @@ public:
    {
        if (!to.has())
            return false;
-        if constexpr (is_nullable)
+        if constexpr (result_is_nullable)
        {
            if (!value.isNull() && (to.value.isNull() || value < to.value))
            {
@ -1138,13 +1141,20 @@ struct AggregateFunctionAnyLastData : Data
 #endif
 };

+
+/** The aggregate function 'singleValueOrNull' is used to implement subquery operators,
+  * such as x = ALL (SELECT ...)
+  * It checks if there is only one unique non-NULL value in the data.
+  * If there is only one unique value - returns it.
+  * If there are zero or at least two distinct values - returns NULL.
+  */
 template <typename Data>
 struct AggregateFunctionSingleValueOrNullData : Data
 {
-    static constexpr bool is_nullable = true;
-
    using Self = AggregateFunctionSingleValueOrNullData;

+    static constexpr bool result_is_nullable = true;
+
    bool first_value = true;
    bool is_null = false;

@ -1166,7 +1176,7 @@ struct AggregateFunctionSingleValueOrNullData : Data
        if (!to.has())
            return;

-        if (first_value)
+        if (first_value && !to.first_value)
        {
            first_value = false;
            this->change(to, arena);
@ -1311,7 +1321,7 @@ public:

    static DataTypePtr createResultType(const DataTypePtr & type_)
    {
-        if constexpr (Data::is_nullable)
+        if constexpr (Data::result_is_nullable)
            return makeNullable(type_);
        return type_;
    }
@ -1431,13 +1441,13 @@ public:
    }

    AggregateFunctionPtr getOwnNullAdapter(
-        const AggregateFunctionPtr & nested_function,
+        const AggregateFunctionPtr & original_function,
        const DataTypes & /*arguments*/,
        const Array & /*params*/,
        const AggregateFunctionProperties & /*properties*/) const override
    {
-        if (Data::is_nullable)
-            return nested_function;
+        if (Data::result_is_nullable && !Data::should_skip_null_arguments)
+            return original_function;
        return nullptr;
    }

--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@ -116,7 +116,6 @@ namespace ErrorCodes
    extern const int UNKNOWN_TABLE;
    extern const int ILLEGAL_COLUMN;
    extern const int NUMBER_OF_COLUMNS_DOESNT_MATCH;
-    extern const int FUNCTION_CANNOT_HAVE_PARAMETERS;
 }

 /** Query analyzer implementation overview. Please check documentation in QueryAnalysisPass.h first.
@ -4897,11 +4896,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
                    lambda_expression_untyped->formatASTForErrorMessage(),
                    scope.scope_node->formatASTForErrorMessage());

-            if (!parameters.empty())
-            {
-                throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_node.formatASTForErrorMessage());
-            }
-
            auto lambda_expression_clone = lambda_expression_untyped->clone();

            IdentifierResolveScope lambda_scope(lambda_expression_clone, &scope /*parent_scope*/);
@ -5018,12 +5012,9 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
    }

    FunctionOverloadResolverPtr function = UserDefinedExecutableFunctionFactory::instance().tryGet(function_name, scope.context, parameters);
-    bool is_executable_udf = false;

    if (!function)
        function = FunctionFactory::instance().tryGet(function_name, scope.context);
-    else
-        is_executable_udf = true;

    if (!function)
    {
@ -5074,12 +5065,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
        return result_projection_names;
    }

-    /// Executable UDFs may have parameters. They are checked in UserDefinedExecutableFunctionFactory.
-    if (!parameters.empty() && !is_executable_udf)
-    {
-        throw Exception(ErrorCodes::FUNCTION_CANNOT_HAVE_PARAMETERS, "Function {} is not parametric", function_name);
-    }
-
    /** For lambda arguments we need to initialize lambda argument types DataTypeFunction using `getLambdaArgumentTypes` function.
      * Then each lambda arguments are initialized with columns, where column source is lambda.
      * This information is important for later steps of query processing.
--- a/src/Backups/BackupIO_S3.cpp
+++ b/src/Backups/BackupIO_S3.cpp
@ -253,6 +253,7 @@ std::unique_ptr<WriteBuffer> BackupWriterS3::writeFile(const String & file_name)
 {
    return std::make_unique<WriteBufferFromS3>(
        client,
+        client, // already has long timeout
        s3_uri.bucket,
        fs::path(s3_uri.key) / file_name,
        DBMS_DEFAULT_BUFFER_SIZE,
--- a/src/Backups/tests/gtest_backup_entries.cpp
+++ b/src/Backups/tests/gtest_backup_entries.cpp
@ -24,7 +24,7 @@ protected:
        /// Make local disk.
        temp_dir = std::make_unique<Poco::TemporaryFile>();
        temp_dir->createDirectories();
-        local_disk = std::make_shared<DiskLocal>("local_disk", temp_dir->path() + "/", 0);
+        local_disk = std::make_shared<DiskLocal>("local_disk", temp_dir->path() + "/");

        /// Make encrypted disk.
        auto settings = std::make_unique<DiskEncryptedSettings>();
@ -38,7 +38,7 @@ protected:
        settings->current_key = key;
        settings->current_key_fingerprint = fingerprint;

-        encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings), true);
+        encrypted_disk = std::make_shared<DiskEncrypted>("encrypted_disk", std::move(settings));
    }

    void TearDown() override
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -206,11 +206,10 @@ add_library (clickhouse_new_delete STATIC Common/new_delete.cpp)
 target_link_libraries (clickhouse_new_delete PRIVATE clickhouse_common_io)
 if (TARGET ch_contrib::jemalloc)
    target_link_libraries (clickhouse_new_delete PRIVATE ch_contrib::jemalloc)
+    target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
+    target_link_libraries (clickhouse_storages_system PRIVATE ch_contrib::jemalloc)
 endif()

-if (TARGET ch_contrib::jemalloc)
-    target_link_libraries (clickhouse_common_io PRIVATE ch_contrib::jemalloc)
-endif()
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::sparsehash)

 add_subdirectory(Access/Common)
--- a/src/Client/ClientBase.cpp
+++ b/src/Client/ClientBase.cpp
@ -575,9 +575,11 @@ try
                }

                auto flags = O_WRONLY | O_EXCL;
-                if (query_with_output->is_outfile_append)
+
+                auto file_exists = fs::exists(out_file);
+                if (file_exists && query_with_output->is_outfile_append)
                    flags |= O_APPEND;
-                else if (query_with_output->is_outfile_truncate)
+                else if (file_exists && query_with_output->is_outfile_truncate)
                    flags |= O_TRUNC;
                else
                    flags |= O_CREAT;
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@ -107,8 +107,8 @@ struct FloatCompareHelper
    }
 };

-template <class U> struct CompareHelper<Float32, U> : public FloatCompareHelper<Float32> {};
-template <class U> struct CompareHelper<Float64, U> : public FloatCompareHelper<Float64> {};
+template <typename U> struct CompareHelper<Float32, U> : public FloatCompareHelper<Float32> {};
+template <typename U> struct CompareHelper<Float64, U> : public FloatCompareHelper<Float64> {};


 /** A template for columns that use a simple array to store.
--- a/src/Common/Allocator.cpp
+++ b/src/Common/Allocator.cpp
@ -8,7 +8,7 @@
  * See also: https://gcc.gnu.org/legacy-ml/gcc-help/2017-12/msg00021.html
  */
 #ifdef NDEBUG
-    __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 64 * (1ULL << 20);
+    __attribute__((__weak__)) extern const size_t MMAP_THRESHOLD = 128 * (1ULL << 20);
 #else
    /**
      * In debug build, use small mmap threshold to reproduce more memory
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -2,7 +2,7 @@


 /// Available metrics. Add something here as you wish.
-#define APPLY_FOR_METRICS(M) \
+#define APPLY_FOR_BUILTIN_METRICS(M) \
    M(Query, "Number of executing queries") \
    M(Merge, "Number of executing background merges") \
    M(Move, "Number of currently executing moves") \
@ -93,8 +93,8 @@
    M(ThreadPoolFSReaderThreadsActive, "Number of threads in the thread pool for local_filesystem_read_method=threadpool running a task.") \
    M(BackupsIOThreads, "Number of threads in the BackupsIO thread pool.") \
    M(BackupsIOThreadsActive, "Number of threads in the BackupsIO thread pool running a task.") \
-    M(DiskObjectStorageAsyncThreads, "Number of threads in the async thread pool for DiskObjectStorage.") \
-    M(DiskObjectStorageAsyncThreadsActive, "Number of threads in the async thread pool for DiskObjectStorage running a task.") \
+    M(DiskObjectStorageAsyncThreads, "Obsolete metric, shows nothing.") \
+    M(DiskObjectStorageAsyncThreadsActive, "Obsolete metric, shows nothing.") \
    M(StorageHiveThreads, "Number of threads in the StorageHive thread pool.") \
    M(StorageHiveThreadsActive, "Number of threads in the StorageHive thread pool running a task.") \
    M(TablesLoaderThreads, "Number of threads in the tables loader thread pool.") \
@ -141,6 +141,8 @@
    M(MergeTreeOutdatedPartsLoaderThreadsActive, "Number of active threads in the threadpool for loading Outdated data parts.") \
    M(MergeTreePartsCleanerThreads, "Number of threads in the MergeTree parts cleaner thread pool.") \
    M(MergeTreePartsCleanerThreadsActive, "Number of threads in the MergeTree parts cleaner thread pool running a task.") \
+    M(IDiskCopierThreads, "Number of threads for copying data between disks of different types.") \
+    M(IDiskCopierThreadsActive, "Number of threads for copying data between disks of different types running a task.") \
    M(SystemReplicasThreads, "Number of threads in the system.replicas thread pool.") \
    M(SystemReplicasThreadsActive, "Number of threads in the system.replicas thread pool running a task.") \
    M(RestartReplicaThreads, "Number of threads in the RESTART REPLICA thread pool.") \
@ -200,7 +202,13 @@
    M(MergeTreeReadTaskRequestsSent, "The current number of callback requests in flight from the remote server back to the initiator server to choose the read task (for MergeTree tables). Measured on the remote server side.") \
    M(MergeTreeAllRangesAnnouncementsSent, "The current number of announcement being sent in flight from the remote server to the initiator server about the set of data parts (for MergeTree tables). Measured on the remote server side.") \
    M(CreatedTimersInQueryProfiler, "Number of Created thread local timers in QueryProfiler") \
-    M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler")
+    M(ActiveTimersInQueryProfiler, "Number of Active thread local timers in QueryProfiler") \
+
+#ifdef APPLY_FOR_EXTERNAL_METRICS
+    #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M) APPLY_FOR_EXTERNAL_METRICS(M)
+#else
+    #define APPLY_FOR_METRICS(M) APPLY_FOR_BUILTIN_METRICS(M)
+#endif

 namespace CurrentMetrics
 {
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -13,7 +13,7 @@
  * - system.errors table
  */

-#define APPLY_FOR_ERROR_CODES(M) \
+#define APPLY_FOR_BUILTIN_ERROR_CODES(M) \
    M(0, OK) \
    M(1, UNSUPPORTED_METHOD) \
    M(2, UNSUPPORTED_PARAMETER) \
@ -589,6 +589,12 @@
    M(1002, UNKNOWN_EXCEPTION) \
 /* See END */

+#ifdef APPLY_FOR_EXTERNAL_ERROR_CODES
+    #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M) APPLY_FOR_EXTERNAL_ERROR_CODES(M)
+#else
+    #define APPLY_FOR_ERROR_CODES(M) APPLY_FOR_BUILTIN_ERROR_CODES(M)
+#endif
+
 namespace DB
 {
 namespace ErrorCodes
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@ -418,6 +418,18 @@ PreformattedMessage getCurrentExceptionMessageAndPattern(bool with_stacktrace, b
                << " (version " << VERSION_STRING << VERSION_OFFICIAL << ")";
        }
        catch (...) {}
+
+// #ifdef ABORT_ON_LOGICAL_ERROR
+//         try
+//         {
+//             throw;
+//         }
+//         catch (const std::logic_error &)
+//         {
+//             abortOnFailedAssertion(stream.str());
+//         }
+//         catch (...) {}
+// #endif
    }
    catch (...)
    {
--- a/src/Common/LoggingFormatStringHelpers.cpp
+++ b/src/Common/LoggingFormatStringHelpers.cpp
@ -1,3 +1,4 @@
+#include <Common/DateLUT.h>
 #include <Common/LoggingFormatStringHelpers.h>
 #include <Common/SipHash.h>
 #include <Common/thread_local_rng.h>
@ -74,3 +75,101 @@ void LogFrequencyLimiterIml::cleanup(time_t too_old_threshold_s)
    std::erase_if(logged_messages, [old](const auto & elem) { return elem.second.first < old; });
    last_cleanup = now;
 }
+
+
+std::mutex LogSeriesLimiter::mutex;
+time_t LogSeriesLimiter::last_cleanup = 0;
+
+LogSeriesLimiter::LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_, time_t interval_s_)
+    : logger(logger_)
+{
+    if (allowed_count_ == 0)
+    {
+        accepted = false;
+        return;
+    }
+
+    if (interval_s_ == 0)
+    {
+        accepted = true;
+        return;
+    }
+
+    time_t now = time(nullptr);
+    UInt128 name_hash = sipHash128(logger->name().c_str(), logger->name().size());
+
+    std::lock_guard lock(mutex);
+
+    if (last_cleanup == 0)
+        last_cleanup = now;
+
+    auto & series_records = getSeriesRecords();
+
+    static const time_t cleanup_delay_s = 600;
+    if (last_cleanup + cleanup_delay_s >= now)
+    {
+        time_t old = now - cleanup_delay_s;
+        std::erase_if(series_records, [old](const auto & elem) { return get<0>(elem.second) < old; });
+        last_cleanup = now;
+    }
+
+    auto register_as_first = [&] () TSA_REQUIRES(mutex)
+    {
+        assert(allowed_count_ > 0);
+        accepted = true;
+        series_records[name_hash] = std::make_tuple(now, 1, 1);
+    };
+
+    if (!series_records.contains(name_hash))
+    {
+        register_as_first();
+        return;
+    }
+
+    auto & [last_time, accepted_count, total_count] = series_records[name_hash];
+    if (last_time + interval_s_ <= now)
+    {
+        debug_message = fmt::format(
+            " (LogSeriesLimiter: on interval from {} to {} accepted series {} / {} for the logger {} : {})",
+            DateLUT::instance().timeToString(last_time),
+            DateLUT::instance().timeToString(now),
+            accepted_count,
+            total_count,
+            logger->name(),
+            double(name_hash));
+
+        register_as_first();
+        return;
+    }
+
+    if (accepted_count < allowed_count_)
+    {
+        accepted = true;
+        ++accepted_count;
+    }
+    ++total_count;
+}
+
+void LogSeriesLimiter::log(Poco::Message & message)
+{
+    std::string_view pattern = message.getFormatString();
+    if (pattern.empty())
+    {
+        /// Do not filter messages without a format string
+        if (auto * channel = logger->getChannel())
+            channel->log(message);
+        return;
+    }
+
+    if (!accepted)
+        return;
+
+    if (!debug_message.empty())
+    {
+        message.appendText(debug_message);
+        debug_message.clear();
+    }
+
+    if (auto * channel = logger->getChannel())
+        channel->log(message);
+}
--- a/src/Common/LoggingFormatStringHelpers.h
+++ b/src/Common/LoggingFormatStringHelpers.h
@ -191,6 +191,41 @@ public:
    Poco::Logger * getLogger() { return logger; }
 };

+/// This wrapper helps to avoid too noisy log messages from similar objects.
+/// Once an instance of LogSeriesLimiter type is created the decision is done
+/// All followed message which use this instance is either printed or muted all together.
+/// LogSeriesLimiter differs from LogFrequencyLimiterIml in a way that
+/// LogSeriesLimiter is useful for accept or mute series of logs when LogFrequencyLimiterIml works for each line independently.
+class LogSeriesLimiter
+{
+    static std::mutex mutex;
+    static time_t last_cleanup;
+
+    /// Hash(logger_name) -> (last_logged_time_s, accepted, muted)
+    using SeriesRecords = std::unordered_map<UInt64, std::tuple<time_t, size_t, size_t>>;
+
+    static SeriesRecords & getSeriesRecords() TSA_REQUIRES(mutex)
+    {
+        static SeriesRecords records;
+        return records;
+    }
+
+    Poco::Logger * logger = nullptr;
+    bool accepted = false;
+    String debug_message;
+public:
+    LogSeriesLimiter(Poco::Logger * logger_, size_t allowed_count_, time_t interval_s_);
+
+    LogSeriesLimiter & operator -> () { return *this; }
+    bool is(Poco::Message::Priority priority) { return logger->is(priority); }
+    LogSeriesLimiter * getChannel() {return this; }
+    const String & name() const { return logger->name(); }
+
+    void log(Poco::Message & message);
+
+    Poco::Logger * getLogger() { return logger; }
+};
+
 /// This wrapper is useful to save formatted message into a String before sending it to a logger
 class LogToStrImpl
 {
--- a/src/Common/MemoryTrackerSwitcher.h
+++ b/src/Common/MemoryTrackerSwitcher.h
@ -0,0 +1,42 @@
+#pragma once
+
+#include <Common/CurrentThread.h>
+#include <Common/MemoryTracker.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+struct MemoryTrackerSwitcher
+{
+    explicit MemoryTrackerSwitcher(MemoryTracker * new_tracker)
+    {
+        if (!current_thread)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "current_thread is not initialized");
+
+        auto * thread_tracker = CurrentThread::getMemoryTracker();
+        prev_untracked_memory = current_thread->untracked_memory;
+        prev_memory_tracker_parent = thread_tracker->getParent();
+
+        current_thread->untracked_memory = 0;
+        thread_tracker->setParent(new_tracker);
+    }
+
+    ~MemoryTrackerSwitcher()
+    {
+        CurrentThread::flushUntrackedMemory();
+        auto * thread_tracker = CurrentThread::getMemoryTracker();
+
+        current_thread->untracked_memory = prev_untracked_memory;
+        thread_tracker->setParent(prev_memory_tracker_parent);
+    }
+
+    MemoryTracker * prev_memory_tracker_parent = nullptr;
+    Int64 prev_untracked_memory = 0;
+};
+
+}
--- a/src/Common/PoolBase.h
+++ b/src/Common/PoolBase.h
@ -1,9 +1,11 @@
 #pragma once

-#include <mutex>
 #include <condition_variable>
-#include <Poco/Timespan.h>
+#include <mutex>
+#include <type_traits>
+#include <variant>
 #include <boost/noncopyable.hpp>
+#include <Poco/Timespan.h>

 #include <Common/logger_useful.h>
 #include <Common/Exception.h>
@ -15,14 +17,6 @@ namespace ProfileEvents
    extern const Event ConnectionPoolIsFullMicroseconds;
 }

-namespace DB
-{
-    namespace ErrorCodes
-    {
-        extern const int LOGICAL_ERROR;
-    }
-}
-
 /** A class from which you can inherit and get a pool of something. Used for database connection pools.
  * Descendant class must provide a method for creating a new object to place in the pool.
  */
@ -35,6 +29,22 @@ public:
    using ObjectPtr = std::shared_ptr<Object>;
    using Ptr = std::shared_ptr<PoolBase<TObject>>;

+    enum class BehaviourOnLimit
+    {
+        /**
+         * Default behaviour - when limit on pool size is reached, callers will wait until object will be returned back in pool.
+         */
+        Wait,
+
+        /**
+         * If no free objects in pool - allocate a new object, but not store it in pool.
+         * This behaviour is needed when we simply don't want to waste time waiting or if we cannot guarantee that query could be processed using fixed amount of connections.
+         * For example, when we read from table on s3, one GetObject request corresponds to the whole FileSystemCache segment. This segments are shared between different
+         * reading tasks, so in general case connection could be taken from pool by one task and returned back by another one. And these tasks are processed completely independently.
+         */
+        AllocateNewBypassingPool,
+    };
+
 private:

    /** The object with the flag, whether it is currently used. */
@ -89,37 +99,53 @@ public:
        Object & operator*() && = delete;
        const Object & operator*() const && = delete;

-        Object * operator->() &             { return &*data->data.object; }
-        const Object * operator->() const & { return &*data->data.object; }
-        Object & operator*() &              { return *data->data.object; }
-        const Object & operator*() const &  { return *data->data.object; }
+        Object * operator->() &             { return castToObjectPtr(); }
+        const Object * operator->() const & { return castToObjectPtr(); }
+        Object & operator*() &              { return *castToObjectPtr(); }
+        const Object & operator*() const &  { return *castToObjectPtr(); }

        /**
         * Expire an object to make it reallocated later.
         */
        void expire()
        {
-            data->data.is_expired = true;
+            if (data.index() == 1)
+                std::get<1>(data)->data.is_expired = true;
        }

-        bool isNull() const { return data == nullptr; }
-
-        PoolBase * getPool() const
-        {
-            if (!data)
-                throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Attempt to get pool from uninitialized entry");
-            return &data->data.pool;
-        }
+        bool isNull() const { return data.index() == 0 ? !std::get<0>(data) : !std::get<1>(data); }

    private:
-        std::shared_ptr<PoolEntryHelper> data;
+        /**
+         * Plain object will be stored instead of PoolEntryHelper if fallback was made in get() (see BehaviourOnLimit::AllocateNewBypassingPool).
+         */
+        std::variant<ObjectPtr, std::shared_ptr<PoolEntryHelper>> data;

-        explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) {}
+        explicit Entry(ObjectPtr && object) : data(std::move(object)) { }
+
+        explicit Entry(PooledObject & object) : data(std::make_shared<PoolEntryHelper>(object)) { }
+
+        auto castToObjectPtr() const
+        {
+            return std::visit(
+                [](const auto & ptr)
+                {
+                    using T = std::decay_t<decltype(ptr)>;
+                    if constexpr (std::is_same_v<ObjectPtr, T>)
+                        return ptr.get();
+                    else
+                        return ptr->data.object.get();
+                },
+                data);
+        }
    };

    virtual ~PoolBase() = default;

-    /** Allocates the object. Wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite. */
+    /** Allocates the object.
+     *  If 'behaviour_on_limit' is Wait - wait for free object in pool for 'timeout'. With 'timeout' < 0, the timeout is infinite.
+     *  If 'behaviour_on_limit' is AllocateNewBypassingPool and there is no free object - a new object will be created but not stored in the pool.
+     */
    Entry get(Poco::Timespan::TimeDiff timeout)
    {
        std::unique_lock lock(mutex);
@ -150,6 +176,9 @@ public:
                return Entry(*items.back());
            }

+            if (behaviour_on_limit == BehaviourOnLimit::AllocateNewBypassingPool)
+                return Entry(allocObject());
+
            Stopwatch blocked;
            if (timeout < 0)
            {
@ -184,6 +213,8 @@ private:
    /** The maximum size of the pool. */
    unsigned max_items;

+    BehaviourOnLimit behaviour_on_limit;
+
    /** Pool. */
    Objects items;

@ -192,11 +223,10 @@ private:
    std::condition_variable available;

 protected:
-
    Poco::Logger * log;

-    PoolBase(unsigned max_items_, Poco::Logger * log_)
-       : max_items(max_items_), log(log_)
+    PoolBase(unsigned max_items_, Poco::Logger * log_, BehaviourOnLimit behaviour_on_limit_ = BehaviourOnLimit::Wait)
+        : max_items(max_items_), behaviour_on_limit(behaviour_on_limit_), log(log_)
    {
        items.reserve(max_items);
    }
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -4,7 +4,7 @@


 /// Available events. Add something here as you wish.
-#define APPLY_FOR_EVENTS(M) \
+#define APPLY_FOR_BUILTIN_EVENTS(M) \
    M(Query, "Number of queries to be interpreted and potentially executed. Does not include queries that failed to parse or were rejected due to AST size limits, quota limits or limits on the number of simultaneously running queries. May include internal queries initiated by ClickHouse itself. Does not count subqueries.") \
    M(SelectQuery, "Same as Query, but only for SELECT queries.") \
    M(InsertQuery, "Same as Query, but only for INSERT queries.") \
@ -368,6 +368,10 @@ The server successfully detected this situation and will download merged part fr
    M(ReadBufferFromS3InitMicroseconds, "Time spent initializing connection to S3.") \
    M(ReadBufferFromS3Bytes, "Bytes read from S3.") \
    M(ReadBufferFromS3RequestsErrors, "Number of exceptions while reading from S3.") \
+    M(ReadBufferFromS3ResetSessions, "Number of HTTP sessions that were reset in ReadBufferFromS3.") \
+    M(ReadBufferFromS3PreservedSessions, "Number of HTTP sessions that were preserved in ReadBufferFromS3.") \
+    \
+    M(ReadWriteBufferFromHTTPPreservedSessions, "Number of HTTP sessions that were preserved in ReadWriteBufferFromHTTP.") \
    \
    M(WriteBufferFromS3Microseconds, "Time spent on writing to S3.") \
    M(WriteBufferFromS3Bytes, "Bytes written to S3.") \
@ -536,6 +540,11 @@ The server successfully detected this situation and will download merged part fr
    M(LogError, "Number of log messages with level Error") \
    M(LogFatal, "Number of log messages with level Fatal") \

+#ifdef APPLY_FOR_EXTERNAL_EVENTS
+    #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M) APPLY_FOR_EXTERNAL_EVENTS(M)
+#else
+    #define APPLY_FOR_EVENTS(M) APPLY_FOR_BUILTIN_EVENTS(M)
+#endif

 namespace ProfileEvents
 {
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@ -91,7 +91,7 @@ namespace ErrorCodes
    extern const int NOT_IMPLEMENTED;
 }

-#if USE_UNWIND
+#ifndef __APPLE__
 Timer::Timer()
    : log(&Poco::Logger::get("Timer"))
 {}
@ -120,6 +120,15 @@ void Timer::createIfNecessary(UInt64 thread_id, int clock_type, int pause_signal
                throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Failed to create thread timer. The function "
                                "'timer_create' returned non-zero but didn't set errno. This is bug in your OS.");

+            /// For example, it cannot be created if the server is run under QEMU:
+            /// "Failed to create thread timer, errno: 11, strerror: Resource temporarily unavailable."
+
+            /// You could accidentally run the server under QEMU without being aware,
+            /// if you use Docker image for a different architecture,
+            /// and you have the "binfmt-misc" kernel module, and "qemu-user" tools.
+
+            /// Also, it cannot be created if the server has too many threads.
+
            throwFromErrno("Failed to create thread timer", ErrorCodes::CANNOT_CREATE_TIMER);
        }
        timer_id.emplace(local_timer_id);
@ -200,13 +209,13 @@ QueryProfilerBase<ProfilerImpl>::QueryProfilerBase(UInt64 thread_id, int clock_t
    UNUSED(pause_signal);

    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler disabled because they cannot work under sanitizers");
-#elif !USE_UNWIND
+#elif defined(__APPLE__)
    UNUSED(thread_id);
    UNUSED(clock_type);
    UNUSED(period);
    UNUSED(pause_signal);

-    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work with stock libunwind");
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "QueryProfiler cannot work on OSX");
 #else
    /// Sanity check.
    if (!hasPHDRCache())
@ -255,7 +264,7 @@ QueryProfilerBase<ProfilerImpl>::~QueryProfilerBase()
 template <typename ProfilerImpl>
 void QueryProfilerBase<ProfilerImpl>::cleanup()
 {
-#if USE_UNWIND
+#ifndef __APPLE__
    timer.stop();
    signal_handler_disarmed = true;
 #endif
--- a/src/Common/QueryProfiler.h
+++ b/src/Common/QueryProfiler.h
@ -28,7 +28,7 @@ namespace DB
  * Note that signal handler implementation is defined by template parameter. See QueryProfilerReal and QueryProfilerCPU.
  */

-#if USE_UNWIND
+#ifndef __APPLE__
 class Timer
 {
 public:
@ -60,7 +60,7 @@ private:

    Poco::Logger * log;

-#if USE_UNWIND
+#ifndef __APPLE__
    inline static thread_local Timer timer = Timer();
 #endif

--- a/src/Common/StackTrace.cpp
+++ b/src/Common/StackTrace.cpp
@ -20,13 +20,10 @@
 #include <sstream>
 #include <unordered_map>
 #include <fmt/format.h>
+#include <libunwind.h>

 #include "config.h"

-#if USE_UNWIND
-#    include <libunwind.h>
-#endif
-
 namespace
 {
 /// Currently this variable is set up once on server startup.
@ -211,8 +208,7 @@ void StackTrace::symbolize(
    const StackTrace::FramePointers & frame_pointers, [[maybe_unused]] size_t offset, size_t size, StackTrace::Frames & frames)
 {
 #if defined(__ELF__) && !defined(OS_FREEBSD)
-    auto symbol_index_ptr = DB::SymbolIndex::instance();
-    const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
+    const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance();
    std::unordered_map<std::string, DB::Dwarf> dwarfs;

    for (size_t i = 0; i < offset; ++i)
@ -287,12 +283,8 @@ StackTrace::StackTrace(const ucontext_t & signal_context)

 void StackTrace::tryCapture()
 {
-#if USE_UNWIND
    size = unw_backtrace(frame_pointers.data(), capacity);
    __msan_unpoison(frame_pointers.data(), size * sizeof(frame_pointers[0]));
-#else
-    size = 0;
-#endif
 }

 /// ClickHouse uses bundled libc++ so type names will be the same on every system thus it's safe to hardcode them
@ -348,8 +340,7 @@ toStringEveryLineImpl([[maybe_unused]] bool fatal, const StackTraceRefTriple & s
    using enum DB::Dwarf::LocationInfoMode;
    const auto mode = fatal ? FULL_WITH_INLINE : FAST;

-    auto symbol_index_ptr = DB::SymbolIndex::instance();
-    const DB::SymbolIndex & symbol_index = *symbol_index_ptr;
+    const DB::SymbolIndex & symbol_index = DB::SymbolIndex::instance();
    std::unordered_map<String, DB::Dwarf> dwarfs;

    for (size_t i = stack_trace.offset; i < stack_trace.size; ++i)
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@ -793,88 +793,6 @@ public:
    }
 };

-
-// Searches for needle surrounded by token-separators.
-// Separators are anything inside ASCII (0-128) and not alphanum.
-// Any value outside of basic ASCII (>=128) is considered a non-separator symbol, hence UTF-8 strings
-// should work just fine. But any Unicode whitespace is not considered a token separtor.
-template <typename StringSearcher>
-class TokenSearcher : public StringSearcherBase
-{
-    StringSearcher searcher;
-    size_t needle_size;
-
-public:
-
-    template <typename CharT>
-    requires (sizeof(CharT) == 1)
-    static bool isValidNeedle(const CharT * needle_, size_t needle_size_)
-    {
-        return std::none_of(needle_, needle_ + needle_size_, isTokenSeparator);
-    }
-
-    template <typename CharT>
-    requires (sizeof(CharT) == 1)
-    TokenSearcher(const CharT * needle_, size_t needle_size_)
-        : searcher(needle_, needle_size_)
-        , needle_size(needle_size_)
-    {
-        /// The caller is responsible for calling isValidNeedle()
-        chassert(isValidNeedle(needle_, needle_size_));
-    }
-
-    template <typename CharT>
-    requires (sizeof(CharT) == 1)
-    ALWAYS_INLINE bool compare(const CharT * haystack, const CharT * haystack_end, const CharT * pos) const
-    {
-        // use searcher only if pos is in the beginning of token and pos + searcher.needle_size is end of token.
-        if (isToken(haystack, haystack_end, pos))
-            return searcher.compare(haystack, haystack_end, pos);
-
-        return false;
-    }
-
-    template <typename CharT>
-    requires (sizeof(CharT) == 1)
-    const CharT * search(const CharT * haystack, const CharT * const haystack_end) const
-    {
-        // use searcher.search(), then verify that returned value is a token
-        // if it is not, skip it and re-run
-
-        const auto * pos = haystack;
-        while (pos < haystack_end)
-        {
-            pos = searcher.search(pos, haystack_end);
-            if (pos == haystack_end || isToken(haystack, haystack_end, pos))
-                return pos;
-
-            // assuming that heendle does not contain any token separators.
-            pos += needle_size;
-        }
-        return haystack_end;
-    }
-
-    template <typename CharT>
-    requires (sizeof(CharT) == 1)
-    const CharT * search(const CharT * haystack, size_t haystack_size) const
-    {
-        return search(haystack, haystack + haystack_size);
-    }
-
-    template <typename CharT>
-    requires (sizeof(CharT) == 1)
-    ALWAYS_INLINE bool isToken(const CharT * haystack, const CharT * const haystack_end, const CharT* p) const
-    {
-        return (p == haystack || isTokenSeparator(*(p - 1)))
-             && (p + needle_size >= haystack_end || isTokenSeparator(*(p + needle_size)));
-    }
-
-    ALWAYS_INLINE static bool isTokenSeparator(const uint8_t c)
-    {
-        return !(isAlphaNumericASCII(c) || !isASCII(c));
-    }
-};
-
 }

 using ASCIICaseSensitiveStringSearcher =   impl::StringSearcher<true, true>;
@ -882,9 +800,6 @@ using ASCIICaseInsensitiveStringSearcher = impl::StringSearcher<false, true>;
 using UTF8CaseSensitiveStringSearcher =    impl::StringSearcher<true, false>;
 using UTF8CaseInsensitiveStringSearcher =  impl::StringSearcher<false, false>;

-using ASCIICaseSensitiveTokenSearcher =    impl::TokenSearcher<ASCIICaseSensitiveStringSearcher>;
-using ASCIICaseInsensitiveTokenSearcher =  impl::TokenSearcher<ASCIICaseInsensitiveStringSearcher>;
-
 /// Use only with short haystacks where cheap initialization is required.
 template <bool CaseInsensitive>
 struct StdLibASCIIStringSearcher
@ -906,11 +821,11 @@ struct StdLibASCIIStringSearcher
        if constexpr (CaseInsensitive)
            return std::search(
                haystack_start, haystack_end, needle_start, needle_end,
-                [](char c1, char c2) {return std::toupper(c1) == std::toupper(c2);});
+                [](char c1, char c2) { return std::toupper(c1) == std::toupper(c2); });
        else
            return std::search(
                haystack_start, haystack_end, needle_start, needle_end,
-                [](char c1, char c2) {return c1 == c2;});
+                [](char c1, char c2) { return c1 == c2; });
    }

    template <typename CharT>
--- a/src/Common/SymbolIndex.cpp
+++ b/src/Common/SymbolIndex.cpp
@ -9,7 +9,6 @@

 #include <link.h>

-//#include <iostream>
 #include <filesystem>

 #include <base/sort.h>
@ -510,7 +509,7 @@ const T * find(const void * address, const std::vector<T> & vec)
 }


-void SymbolIndex::update()
+void SymbolIndex::load()
 {
    dl_iterate_phdr(collectSymbols, &data);

@ -550,24 +549,12 @@ String SymbolIndex::getBuildIDHex() const
    return build_id_hex;
 }

-MultiVersion<SymbolIndex> & SymbolIndex::instanceImpl()
+const SymbolIndex & SymbolIndex::instance()
 {
-    static MultiVersion<SymbolIndex> instance(std::unique_ptr<SymbolIndex>(new SymbolIndex));
+    static SymbolIndex instance;
    return instance;
 }

-MultiVersion<SymbolIndex>::Version SymbolIndex::instance()
-{
-    return instanceImpl().get();
-}
-
-void SymbolIndex::reload()
-{
-    instanceImpl().set(std::unique_ptr<SymbolIndex>(new SymbolIndex));
-    /// Also drop stacktrace cache.
-    StackTrace::dropCache();
-}
-
 }

 #endif
--- a/src/Common/SymbolIndex.h
+++ b/src/Common/SymbolIndex.h
@ -8,8 +8,6 @@
 #include <Common/Elf.h>
 #include <boost/noncopyable.hpp>

-#include <Common/MultiVersion.h>
-
 namespace DB
 {

@ -20,11 +18,10 @@ namespace DB
 class SymbolIndex : private boost::noncopyable
 {
 protected:
-    SymbolIndex() { update(); }
+    SymbolIndex() { load(); }

 public:
-    static MultiVersion<SymbolIndex>::Version instance();
-    static void reload();
+    static const SymbolIndex & instance();

    struct Symbol
    {
@ -90,8 +87,7 @@ public:
 private:
    Data data;

-    void update();
-    static MultiVersion<SymbolIndex> & instanceImpl();
+    void load();
 };

 }
--- a/src/Common/ThreadStatus.cpp
+++ b/src/Common/ThreadStatus.cpp
@ -199,13 +199,14 @@ ThreadStatus::~ThreadStatus()
    if (deleter)
        deleter();

+    chassert(!check_current_thread_on_destruction || current_thread == this);
+
    /// Only change current_thread if it's currently being used by this ThreadStatus
    /// For example, PushingToViews chain creates and deletes ThreadStatus instances while running in the main query thread
-    if (check_current_thread_on_destruction)
-    {
-        assert(current_thread == this);
+    if (current_thread == this)
        current_thread = nullptr;
-    }
+    else if (check_current_thread_on_destruction)
+        LOG_ERROR(log, "current_thread contains invalid address");
 }

 void ThreadStatus::updatePerformanceCounters()
--- a/src/Common/Volnitsky.h
+++ b/src/Common/Volnitsky.h
@ -730,9 +730,6 @@ using VolnitskyUTF8 = VolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher
 using VolnitskyCaseInsensitive = VolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>; /// ignores non-ASCII bytes
 using VolnitskyCaseInsensitiveUTF8 = VolnitskyBase<false, false, UTF8CaseInsensitiveStringSearcher>;

-using VolnitskyCaseSensitiveToken = VolnitskyBase<true, true, ASCIICaseSensitiveTokenSearcher>;
-using VolnitskyCaseInsensitiveToken = VolnitskyBase<false, true, ASCIICaseInsensitiveTokenSearcher>;
-
 using MultiVolnitsky = MultiVolnitskyBase<true, true, ASCIICaseSensitiveStringSearcher>;
 using MultiVolnitskyUTF8 = MultiVolnitskyBase<true, false, UTF8CaseSensitiveStringSearcher>;
 using MultiVolnitskyCaseInsensitive = MultiVolnitskyBase<false, true, ASCIICaseInsensitiveStringSearcher>;
--- a/src/Common/config.h.in
+++ b/src/Common/config.h.in
@ -9,7 +9,6 @@
 #cmakedefine01 USE_AWS_S3
 #cmakedefine01 USE_AZURE_BLOB_STORAGE
 #cmakedefine01 USE_BROTLI
-#cmakedefine01 USE_UNWIND
 #cmakedefine01 USE_CASSANDRA
 #cmakedefine01 USE_SENTRY
 #cmakedefine01 USE_GRPC
--- a/src/Common/examples/symbol_index.cpp
+++ b/src/Common/examples/symbol_index.cpp
@ -22,8 +22,7 @@ int main(int argc, char ** argv)
        return 1;
    }

-    auto symbol_index_ptr = SymbolIndex::instance();
-    const SymbolIndex & symbol_index = *symbol_index_ptr;
+    const SymbolIndex & symbol_index = SymbolIndex::instance();

    for (const auto & elem : symbol_index.symbols())
        std::cout << elem.name << ": " << elem.address_begin << " ... " << elem.address_end << "\n";
--- a/src/Common/getResource.cpp
+++ b/src/Common/getResource.cpp
@ -16,7 +16,7 @@ std::string_view getResource(std::string_view name)

 #if defined USE_MUSL
    /// If static linking is used, we cannot use dlsym and have to parse ELF symbol table by ourself.
-    return DB::SymbolIndex::instance()->getResource(name_replaced);
+    return DB::SymbolIndex::instance().getResource(name_replaced);

 #else
    // In most `dlsym(3)` APIs, one passes the symbol name as it appears via
--- a/src/Common/logger_useful.h
+++ b/src/Common/logger_useful.h
@ -15,12 +15,15 @@ namespace Poco { class Logger; }
 #define LogToStr(x, y) std::make_unique<LogToStrImpl>(x, y)
 #define LogFrequencyLimiter(x, y) std::make_unique<LogFrequencyLimiterIml>(x, y)

+using LogSeriesLimiterPtr = std::shared_ptr<LogSeriesLimiter>;
+
 namespace
 {
    [[maybe_unused]] const ::Poco::Logger * getLogger(const ::Poco::Logger * logger) { return logger; }
    [[maybe_unused]] const ::Poco::Logger * getLogger(const std::atomic<::Poco::Logger *> & logger) { return logger.load(); }
    [[maybe_unused]] std::unique_ptr<LogToStrImpl> getLogger(std::unique_ptr<LogToStrImpl> && logger) { return logger; }
    [[maybe_unused]] std::unique_ptr<LogFrequencyLimiterIml> getLogger(std::unique_ptr<LogFrequencyLimiterIml> && logger) { return logger; }
+    [[maybe_unused]] LogSeriesLimiterPtr getLogger(LogSeriesLimiterPtr & logger) { return logger; }
 }

 #define LOG_IMPL_FIRST_ARG(X, ...) X
--- a/src/Coordination/KeeperContext.cpp
+++ b/src/Coordination/KeeperContext.cpp
@ -220,7 +220,7 @@ KeeperContext::Storage KeeperContext::getLogsPathFromConfig(const Poco::Util::Ab
        if (!fs::exists(path))
            fs::create_directories(path);

-        return std::make_shared<DiskLocal>("LocalLogDisk", path, 0);
+        return std::make_shared<DiskLocal>("LocalLogDisk", path);
    };

    /// the most specialized path
@ -246,7 +246,7 @@ KeeperContext::Storage KeeperContext::getSnapshotsPathFromConfig(const Poco::Uti
        if (!fs::exists(path))
            fs::create_directories(path);

-        return std::make_shared<DiskLocal>("LocalSnapshotDisk", path, 0);
+        return std::make_shared<DiskLocal>("LocalSnapshotDisk", path);
    };

    /// the most specialized path
@ -272,7 +272,7 @@ KeeperContext::Storage KeeperContext::getStatePathFromConfig(const Poco::Util::A
        if (!fs::exists(path))
            fs::create_directories(path);

-        return std::make_shared<DiskLocal>("LocalStateFileDisk", path, 0);
+        return std::make_shared<DiskLocal>("LocalStateFileDisk", path);
    };

    if (config.has("keeper_server.state_storage_disk"))
--- a/src/Coordination/KeeperSnapshotManagerS3.cpp
+++ b/src/Coordination/KeeperSnapshotManagerS3.cpp
@ -145,14 +145,14 @@ void KeeperSnapshotManagerS3::uploadSnapshotImpl(const SnapshotFileInfo & snapsh

        const auto create_writer = [&](const auto & key)
        {
-            return WriteBufferFromS3
-            {
+            return WriteBufferFromS3(
+                s3_client->client,
                s3_client->client,
                s3_client->uri.bucket,
                key,
                DBMS_DEFAULT_BUFFER_SIZE,
                request_settings_1
-            };
+            );
        };

        LOG_INFO(log, "Will try to upload snapshot on {} to S3", snapshot_file_info.path);
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@ -71,16 +71,16 @@ protected:
    DB::KeeperContextPtr keeper_context = std::make_shared<DB::KeeperContext>(true);
    Poco::Logger * log{&Poco::Logger::get("CoordinationTest")};

-    void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared<DB::DiskLocal>("LogDisk", path, 0)); }
+    void setLogDirectory(const std::string & path) { keeper_context->setLogDisk(std::make_shared<DB::DiskLocal>("LogDisk", path)); }

    void setSnapshotDirectory(const std::string & path)
    {
-        keeper_context->setSnapshotDisk(std::make_shared<DB::DiskLocal>("SnapshotDisk", path, 0));
+        keeper_context->setSnapshotDisk(std::make_shared<DB::DiskLocal>("SnapshotDisk", path));
    }

    void setStateFileDirectory(const std::string & path)
    {
-        keeper_context->setStateFileDisk(std::make_shared<DB::DiskLocal>("StateFile", path, 0));
+        keeper_context->setStateFileDisk(std::make_shared<DB::DiskLocal>("StateFile", path));
    }
 };

@ -1503,9 +1503,9 @@ void testLogAndStateMachine(
    using namespace DB;

    ChangelogDirTest snapshots("./snapshots");
-    keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots", 0));
+    keeper_context->setSnapshotDisk(std::make_shared<DiskLocal>("SnapshotDisk", "./snapshots"));
    ChangelogDirTest logs("./logs");
-    keeper_context->setLogDisk(std::make_shared<DiskLocal>("LogDisk", "./logs", 0));
+    keeper_context->setLogDisk(std::make_shared<DiskLocal>("LogDisk", "./logs"));

    ResponsesQueue queue(std::numeric_limits<size_t>::max());
    SnapshotsQueue snapshots_queue{1};
--- a/src/Core/DecimalFunctions.h
+++ b/src/Core/DecimalFunctions.h
@ -48,7 +48,11 @@ inline auto scaleMultiplier(UInt32 scale)

 /** Components of DecimalX value:
 * whole - represents whole part of decimal, can be negative or positive.
- * fractional - for fractional part of decimal, always positive.
+ * fractional - for fractional part of decimal.
+ *
+ *  0.123 represents  0 /  0.123
+ * -0.123 represents  0 / -0.123
+ * -1.123 represents -1 /  0.123
 */
 template <typename DecimalType>
 struct DecimalComponents
--- a/src/Core/Defines.h
+++ b/src/Core/Defines.h
@ -41,7 +41,7 @@
 /// The boundary on which the blocks for asynchronous file operations should be aligned.
 #define DEFAULT_AIO_FILE_BLOCK_SIZE 4096

-#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 180
+#define DEFAULT_HTTP_READ_BUFFER_TIMEOUT 30
 #define DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT 1
 /// Maximum number of http-connections between two endpoints
 /// the number is unmotivated
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -102,6 +102,7 @@ class IColumn;
    M(Bool, s3_allow_parallel_part_upload, true, "Use multiple threads for s3 multipart upload. It may lead to slightly higher memory usage", 0) \
    M(Bool, s3_throw_on_zero_files_match, false, "Throw an error, when ListObjects request cannot match any files", 0) \
    M(UInt64, s3_retry_attempts, 10, "Setting for Aws::Client::RetryStrategy, Aws::Client does retries itself, 0 means no retries", 0) \
+    M(UInt64, s3_request_timeout_ms, 3000, "Idleness timeout for sending and receiving data to/from S3. Fail if a single TCP read or write call blocks for this long.", 0) \
    M(Bool, enable_s3_requests_logging, false, "Enable very explicit logging of S3 requests. Makes sense for debug only.", 0) \
    M(UInt64, hdfs_replication, 0, "The actual number of replications can be specified when the hdfs file is created.", 0) \
    M(Bool, hdfs_truncate_on_insert, false, "Enables or disables truncate before insert in s3 engine tables", 0) \
@ -659,7 +660,7 @@ class IColumn;
    M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
    M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
    \
-    M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
+    M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::pread, "Method of reading data from storage file, one of: read, pread, mmap. The mmap method does not apply to clickhouse-server (it's intended for clickhouse-local).", 0) \
    M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, io_uring, pread_threadpool. The 'io_uring' method is experimental and does not work for Log, TinyLog, StripeLog, File, Set and Join, and other tables with append-able files in presence of concurrent reads and writes.", 0) \
    M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
    M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
@ -1010,6 +1011,7 @@ class IColumn;
    M(Bool, regexp_dict_allow_hyperscan, true, "Allow regexp_tree dictionary using Hyperscan library.", 0) \
    \
    M(Bool, dictionary_use_async_executor, false, "Execute a pipeline for reading from a dictionary with several threads. It's supported only by DIRECT dictionary with CLICKHOUSE source.", 0) \
+    M(Bool, input_format_csv_allow_variable_number_of_columns, false, "Ignore extra columns in CSV input (if file has more columns than expected) and treat missing fields in CSV input as default values", 0) \

 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
--- a/src/Core/SettingsChangesHistory.h
+++ b/src/Core/SettingsChangesHistory.h
@ -80,6 +80,8 @@ namespace SettingsChangesHistory
 /// It's used to implement `compatibility` setting (see https://github.com/ClickHouse/ClickHouse/issues/35972)
 static std::map<ClickHouseVersion, SettingsChangesHistory::SettingsChanges> settings_changes_history =
 {
+    {"23.6", {{"http_send_timeout", 180, 30, "3 minutes seems crazy long. Note that this is timeout for a single network write call, not for the whole upload operation."},
+              {"http_receive_timeout", 180, 30, "See http_send_timeout."}}},
    {"23.5", {{"input_format_parquet_preserve_order", true, false, "Allow Parquet reader to reorder rows for better parallelism."},
              {"parallelize_output_from_storages", false, true, "Allow parallelism when executing queries that read from file/url/s3/etc. This may reorder rows."},
              {"use_with_fill_by_sorting_prefix", false, true, "Columns preceding WITH FILL columns in ORDER BY clause form sorting prefix. Rows with different values in sorting prefix are filled independently"},
--- a/src/Daemon/BaseDaemon.cpp
+++ b/src/Daemon/BaseDaemon.cpp
@ -154,7 +154,7 @@ static void signalHandler(int sig, siginfo_t * info, void * context)
    writePODBinary(*info, out);
    writePODBinary(signal_context, out);
    writePODBinary(stack_trace, out);
-    writeVectorBinary(Exception::thread_frame_pointers, out);
+    writeVectorBinary(Exception::enable_job_stack_trace ? Exception::thread_frame_pointers : std::vector<StackTrace::FramePointers>{}, out);
    writeBinary(static_cast<UInt32>(getThreadId()), out);
    writePODBinary(current_thread, out);

@ -310,6 +310,57 @@ private:
    {
        ThreadStatus thread_status;

+        /// First log those fields that are safe to access and that should not cause new fault.
+        /// That way we will have some duplicated info in the log but we don't loose important info
+        /// in case of double fault.
+
+        LOG_FATAL(log, "########## Short fault info ############");
+        LOG_FATAL(log, "(version {}{}, build id: {}, git hash: {}) (from thread {}) Received signal {}",
+                VERSION_STRING, VERSION_OFFICIAL, daemon.build_id, daemon.git_hash,
+                thread_num, sig);
+
+        std::string signal_description = "Unknown signal";
+
+        /// Some of these are not really signals, but our own indications on failure reason.
+        if (sig == StdTerminate)
+            signal_description = "std::terminate";
+        else if (sig == SanitizerTrap)
+            signal_description = "sanitizer trap";
+        else if (sig >= 0)
+            signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
+
+        LOG_FATAL(log, "Signal description: {}", signal_description);
+
+        String error_message;
+
+        if (sig != SanitizerTrap)
+            error_message = signalToErrorMessage(sig, info, *context);
+        else
+            error_message = "Sanitizer trap.";
+
+        LOG_FATAL(log, fmt::runtime(error_message));
+
+        String bare_stacktrace_str;
+        if (stack_trace.getSize())
+        {
+            /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
+            /// NOTE: This still require memory allocations and mutex lock inside logger.
+            ///       BTW we can also print it to stderr using write syscalls.
+
+            WriteBufferFromOwnString bare_stacktrace;
+            writeString("Stack trace:", bare_stacktrace);
+            for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
+            {
+                writeChar(' ', bare_stacktrace);
+                writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
+            }
+
+            LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
+            bare_stacktrace_str = bare_stacktrace.str();
+        }
+
+        /// Now try to access potentially unsafe data in thread_ptr.
+
        String query_id;
        String query;

@ -326,16 +377,6 @@ private:
            }
        }

-        std::string signal_description = "Unknown signal";
-
-        /// Some of these are not really signals, but our own indications on failure reason.
-        if (sig == StdTerminate)
-            signal_description = "std::terminate";
-        else if (sig == SanitizerTrap)
-            signal_description = "sanitizer trap";
-        else if (sig >= 0)
-            signal_description = strsignal(sig); // NOLINT(concurrency-mt-unsafe) // it is not thread-safe but ok in this context
-
        LOG_FATAL(log, "########################################");

        if (query_id.empty())
@ -351,30 +392,11 @@ private:
                thread_num, query_id, query, signal_description, sig);
        }

-        String error_message;
-
-        if (sig != SanitizerTrap)
-            error_message = signalToErrorMessage(sig, info, *context);
-        else
-            error_message = "Sanitizer trap.";
-
        LOG_FATAL(log, fmt::runtime(error_message));

-        if (stack_trace.getSize())
+        if (!bare_stacktrace_str.empty())
        {
-            /// Write bare stack trace (addresses) just in case if we will fail to print symbolized stack trace.
-            /// NOTE: This still require memory allocations and mutex lock inside logger.
-            ///       BTW we can also print it to stderr using write syscalls.
-
-            WriteBufferFromOwnString bare_stacktrace;
-            writeString("Stack trace:", bare_stacktrace);
-            for (size_t i = stack_trace.getOffset(); i < stack_trace.getSize(); ++i)
-            {
-                writeChar(' ', bare_stacktrace);
-                writePointerHex(stack_trace.getFramePointers()[i], bare_stacktrace);
-            }
-
-            LOG_FATAL(log, fmt::runtime(bare_stacktrace.str()));
+            LOG_FATAL(log, fmt::runtime(bare_stacktrace_str));
        }

        /// Write symbolized stack trace line by line for better grep-ability.
@ -964,7 +986,7 @@ void BaseDaemon::initializeTerminationAndSignalProcessing()
    signal_listener_thread.start(*signal_listener);

 #if defined(__ELF__) && !defined(OS_FREEBSD)
-    String build_id_hex = SymbolIndex::instance()->getBuildIDHex();
+    String build_id_hex = SymbolIndex::instance().getBuildIDHex();
    if (build_id_hex.empty())
        build_id = "";
    else
@ -1101,6 +1123,7 @@ void BaseDaemon::setupWatchdog()

        if (0 == pid)
        {
+            updateCurrentThreadIdAfterFork();
            logger().information("Forked a child process to watch");
 #if defined(OS_LINUX)
            if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL))
--- a/src/Daemon/SentryWriter.cpp
+++ b/src/Daemon/SentryWriter.cpp
@ -150,7 +150,7 @@ void SentryWriter::onFault(int sig, const std::string & error_message, const Sta
        sentry_set_extra("signal_number", sentry_value_new_int32(sig));

        #if defined(__ELF__) && !defined(OS_FREEBSD)
-            const String & build_id_hex = DB::SymbolIndex::instance()->getBuildIDHex();
+            const String & build_id_hex = DB::SymbolIndex::instance().getBuildIDHex();
            sentry_set_tag("build_id", build_id_hex.c_str());
        #endif

--- a/src/Databases/DatabaseOrdinary.cpp
+++ b/src/Databases/DatabaseOrdinary.cpp
@ -13,6 +13,7 @@
 #include <IO/WriteHelpers.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/InterpreterCreateQuery.h>
+#include <Interpreters/FunctionNameNormalizer.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Parsers/ParserCreateQuery.h>
@ -182,6 +183,7 @@ void DatabaseOrdinary::loadTablesMetadata(ContextPtr local_context, ParsedTables
            auto ast = parseQueryFromMetadata(log, getContext(), full_path.string(), /*throw_on_error*/ true, /*remove_empty*/ false);
            if (ast)
            {
+                FunctionNameNormalizer().visit(ast.get());
                auto * create_query = ast->as<ASTCreateQuery>();
                /// NOTE No concurrent writes are possible during database loading
                create_query->setDatabase(TSA_SUPPRESS_WARNING_FOR_READ(database_name));
--- a/src/Dictionaries/CacheDictionary.cpp
+++ b/src/Dictionaries/CacheDictionary.cpp
@ -138,7 +138,7 @@ Columns CacheDictionary<dictionary_key_type>::getColumns(
    const Columns & default_values_columns) const
 {
    /**
-    * Flow of getColumsImpl
+    * Flow of getColumnsImpl
    * 1. Get fetch result from storage
    * 2. If all keys are found in storage and not expired
    *   2.1. If storage returns fetched columns in order of keys then result is returned to client.
@ -549,16 +549,17 @@ void CacheDictionary<dictionary_key_type>::update(CacheDictionaryUpdateUnitPtr<d

    for (size_t i = 0; i < key_index_to_state_from_storage.size(); ++i)
    {
-        if (key_index_to_state_from_storage[i].isExpired()
-            || key_index_to_state_from_storage[i].isNotFound())
+        if (key_index_to_state_from_storage[i].isExpired() || key_index_to_state_from_storage[i].isNotFound())
        {
-            if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
-                requested_keys_vector.emplace_back(requested_keys[i]);
-            else
-                requested_complex_key_rows.emplace_back(i);
-
            auto requested_key = requested_keys[i];
-            not_found_keys.insert(requested_key);
+            auto [_, inserted] = not_found_keys.insert(requested_key);
+            if (inserted)
+            {
+                if constexpr (dictionary_key_type == DictionaryKeyType::Simple)
+                    requested_keys_vector.emplace_back(requested_keys[i]);
+                else
+                    requested_complex_key_rows.emplace_back(i);
+            }
        }
    }

--- a/src/Disks/DiskEncrypted.cpp
+++ b/src/Disks/DiskEncrypted.cpp
@ -266,7 +266,7 @@ public:
    }

    UInt64 getSize() const override { return reservation->getSize(); }
-    UInt64 getUnreservedSpace() const override { return reservation->getUnreservedSpace(); }
+    std::optional<UInt64> getUnreservedSpace() const override { return reservation->getUnreservedSpace(); }

    DiskPtr getDisk(size_t i) const override
    {
@ -285,19 +285,32 @@ private:
 };

 DiskEncrypted::DiskEncrypted(
-    const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_)
-    : DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), use_fake_transaction_)
+    const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_)
+    : DiskEncrypted(name_, parseDiskEncryptedSettings(name_, config_, config_prefix_, map_), config_, config_prefix_)
 {
 }

-DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_, bool use_fake_transaction_)
+DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_,
+                             const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_)
+    : IDisk(name_, config_, config_prefix_)
+    , delegate(settings_->wrapped_disk)
+    , encrypted_name(name_)
+    , disk_path(settings_->disk_path)
+    , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path)
+    , current_settings(std::move(settings_))
+    , use_fake_transaction(config_.getBool(config_prefix_ + ".use_fake_transaction", true))
+{
+    delegate->createDirectories(disk_path);
+}
+
+DiskEncrypted::DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_)
    : IDisk(name_)
    , delegate(settings_->wrapped_disk)
    , encrypted_name(name_)
    , disk_path(settings_->disk_path)
    , disk_absolute_path(settings_->wrapped_disk->getPath() + settings_->disk_path)
    , current_settings(std::move(settings_))
-    , use_fake_transaction(use_fake_transaction_)
+    , use_fake_transaction(true)
 {
    delegate->createDirectories(disk_path);
 }
@ -310,32 +323,6 @@ ReservationPtr DiskEncrypted::reserve(UInt64 bytes)
    return std::make_unique<DiskEncryptedReservation>(std::static_pointer_cast<DiskEncrypted>(shared_from_this()), std::move(reservation));
 }

-void DiskEncrypted::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
-    /// Check if we can copy the file without deciphering.
-    if (isSameDiskType(*this, *to_disk))
-    {
-        /// Disk type is the same, check if the key is the same too.
-        if (auto * to_disk_enc = typeid_cast<DiskEncrypted *>(to_disk.get()))
-        {
-            auto from_settings = current_settings.get();
-            auto to_settings = to_disk_enc->current_settings.get();
-            if (from_settings->all_keys == to_settings->all_keys)
-            {
-                /// Keys are the same so we can simply copy the encrypted file.
-                auto wrapped_from_path = wrappedPath(from_path);
-                auto to_delegate = to_disk_enc->delegate;
-                auto wrapped_to_path = to_disk_enc->wrappedPath(to_path);
-                delegate->copy(wrapped_from_path, to_delegate, wrapped_to_path);
-                return;
-            }
-        }
-    }
-
-    /// Copy the file through buffers with deciphering.
-    copyThroughBuffers(from_path, to_disk, to_path);
-}
-

 void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
 {
@ -359,11 +346,8 @@ void DiskEncrypted::copyDirectoryContent(const String & from_dir, const std::sha
        }
    }

-    if (!to_disk->exists(to_dir))
-        to_disk->createDirectories(to_dir);
-
    /// Copy the file through buffers with deciphering.
-    copyThroughBuffers(from_dir, to_disk, to_dir);
+    IDisk::copyDirectoryContent(from_dir, to_disk, to_dir);
 }

 std::unique_ptr<ReadBufferFromFileBase> DiskEncrypted::readFile(
@ -443,7 +427,7 @@ std::unordered_map<String, String> DiskEncrypted::getSerializedMetadata(const st

 void DiskEncrypted::applyNewSettings(
    const Poco::Util::AbstractConfiguration & config,
-    ContextPtr /*context*/,
+    ContextPtr context,
    const String & config_prefix,
    const DisksMap & disk_map)
 {
@ -455,6 +439,7 @@ void DiskEncrypted::applyNewSettings(
        throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Сhanging disk path on the fly is not supported. Disk {}", name);

    current_settings.set(std::move(new_settings));
+    IDisk::applyNewSettings(config, context, config_prefix, disk_map);
 }

 void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check)
@ -467,7 +452,7 @@ void registerDiskEncrypted(DiskFactory & factory, bool global_skip_access_check)
        const DisksMap & map) -> DiskPtr
    {
        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
-        DiskPtr disk = std::make_shared<DiskEncrypted>(name, config, config_prefix, map, config.getBool(config_prefix + ".use_fake_transaction", true));
+        DiskPtr disk = std::make_shared<DiskEncrypted>(name, config, config_prefix, map);
        disk->startup(context, skip_access_check);
        return disk;
    };
--- a/src/Disks/DiskEncrypted.h
+++ b/src/Disks/DiskEncrypted.h
@ -21,8 +21,10 @@ class WriteBufferFromFileBase;
 class DiskEncrypted : public IDisk
 {
 public:
-    DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_, bool use_fake_transaction_);
-    DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_, bool use_fake_transaction_);
+    DiskEncrypted(const String & name_, const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_, const DisksMap & map_);
+    DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_,
+                  const Poco::Util::AbstractConfiguration & config_, const String & config_prefix_);
+    DiskEncrypted(const String & name_, std::unique_ptr<const DiskEncryptedSettings> settings_);

    const String & getName() const override { return encrypted_name; }
    const String & getPath() const override { return disk_absolute_path; }
@ -110,8 +112,6 @@ public:
        delegate->listFiles(wrapped_path, file_names);
    }

-    void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
-
    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;

    std::unique_ptr<ReadBufferFromFileBase> readFile(
@ -312,17 +312,17 @@ public:
        }
    }

-    UInt64 getTotalSpace() const override
+    std::optional<UInt64> getTotalSpace() const override
    {
        return delegate->getTotalSpace();
    }

-    UInt64 getAvailableSpace() const override
+    std::optional<UInt64> getAvailableSpace() const override
    {
        return delegate->getAvailableSpace();
    }

-    UInt64 getUnreservedSpace() const override
+    std::optional<UInt64> getUnreservedSpace() const override
    {
        return delegate->getUnreservedSpace();
    }
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@ -78,7 +78,7 @@ public:
    {}

    UInt64 getSize() const override { return size; }
-    UInt64 getUnreservedSpace() const override { return unreserved_space; }
+    std::optional<UInt64> getUnreservedSpace() const override { return unreserved_space; }

    DiskPtr getDisk(size_t i) const override
    {
@ -175,8 +175,11 @@ std::optional<UInt64> DiskLocal::tryReserve(UInt64 bytes)
 {
    std::lock_guard lock(DiskLocal::reservation_mutex);

-    UInt64 available_space = getAvailableSpace();
-    UInt64 unreserved_space = available_space - std::min(available_space, reserved_bytes);
+    auto available_space = getAvailableSpace();
+
+    UInt64 unreserved_space = available_space
+        ? *available_space - std::min(*available_space, reserved_bytes)
+        : std::numeric_limits<UInt64>::max();

    if (bytes == 0)
    {
@ -187,12 +190,24 @@ std::optional<UInt64> DiskLocal::tryReserve(UInt64 bytes)

    if (unreserved_space >= bytes)
    {
-        LOG_TRACE(
-            logger,
-            "Reserved {} on local disk {}, having unreserved {}.",
-            ReadableSize(bytes),
-            backQuote(name),
-            ReadableSize(unreserved_space));
+        if (available_space)
+        {
+            LOG_TRACE(
+                logger,
+                "Reserved {} on local disk {}, having unreserved {}.",
+                ReadableSize(bytes),
+                backQuote(name),
+                ReadableSize(unreserved_space));
+        }
+        else
+        {
+            LOG_TRACE(
+                logger,
+                "Reserved {} on local disk {}.",
+                ReadableSize(bytes),
+                backQuote(name));
+        }
+
        ++reservation_count;
        reserved_bytes += bytes;
        return {unreserved_space - bytes};
@ -218,14 +233,14 @@ static UInt64 getTotalSpaceByName(const String & name, const String & disk_path,
    return total_size - keep_free_space_bytes;
 }

-UInt64 DiskLocal::getTotalSpace() const
+std::optional<UInt64> DiskLocal::getTotalSpace() const
 {
    if (broken || readonly)
        return 0;
    return getTotalSpaceByName(name, disk_path, keep_free_space_bytes);
 }

-UInt64 DiskLocal::getAvailableSpace() const
+std::optional<UInt64> DiskLocal::getAvailableSpace() const
 {
    if (broken || readonly)
        return 0;
@ -242,10 +257,10 @@ UInt64 DiskLocal::getAvailableSpace() const
    return total_size - keep_free_space_bytes;
 }

-UInt64 DiskLocal::getUnreservedSpace() const
+std::optional<UInt64> DiskLocal::getUnreservedSpace() const
 {
    std::lock_guard lock(DiskLocal::reservation_mutex);
-    auto available_space = getAvailableSpace();
+    auto available_space = *getAvailableSpace();
    available_space -= std::min(available_space, reserved_bytes);
    return available_space;
 }
@ -417,29 +432,12 @@ bool inline isSameDiskType(const IDisk & one, const IDisk & another)
    return typeid(one) == typeid(another);
 }

-void DiskLocal::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
-    if (isSameDiskType(*this, *to_disk))
-    {
-        fs::path to = fs::path(to_disk->getPath()) / to_path;
-        fs::path from = fs::path(disk_path) / from_path;
-        if (from_path.ends_with('/'))
-            from = from.parent_path();
-        if (fs::is_directory(from))
-            to /= from.filename();
-
-        fs::copy(from, to, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
-    }
-    else
-        copyThroughBuffers(from_path, to_disk, to_path, /* copy_root_dir */ true); /// Base implementation.
-}
-
 void DiskLocal::copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir)
 {
    if (isSameDiskType(*this, *to_disk))
-        fs::copy(from_dir, to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
+        fs::copy(fs::path(disk_path) / from_dir, fs::path(to_disk->getPath()) / to_dir, fs::copy_options::recursive | fs::copy_options::overwrite_existing); /// Use more optimal way.
    else
-        copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false); /// Base implementation.
+        IDisk::copyDirectoryContent(from_dir, to_disk, to_dir);
 }

 SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
@ -448,7 +446,7 @@ SyncGuardPtr DiskLocal::getDirectorySyncGuard(const String & path) const
 }


-void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap &)
+void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr context, const String & config_prefix, const DisksMap & disk_map)
 {
    String new_disk_path;
    UInt64 new_keep_free_space_bytes;
@ -460,10 +458,13 @@ void DiskLocal::applyNewSettings(const Poco::Util::AbstractConfiguration & confi

    if (keep_free_space_bytes != new_keep_free_space_bytes)
        keep_free_space_bytes = new_keep_free_space_bytes;
+
+    IDisk::applyNewSettings(config, context, config_prefix, disk_map);
 }

-DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_)
-    : IDisk(name_)
+DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_,
+                     const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+    : IDisk(name_, config, config_prefix)
    , disk_path(path_)
    , keep_free_space_bytes(keep_free_space_bytes_)
    , logger(&Poco::Logger::get("DiskLocal"))
@ -472,13 +473,24 @@ DiskLocal::DiskLocal(const String & name_, const String & path_, UInt64 keep_fre
 }

 DiskLocal::DiskLocal(
-    const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context, UInt64 local_disk_check_period_ms)
-    : DiskLocal(name_, path_, keep_free_space_bytes_)
+    const String & name_, const String & path_, UInt64 keep_free_space_bytes_, ContextPtr context,
+    const Poco::Util::AbstractConfiguration & config, const String & config_prefix)
+    : DiskLocal(name_, path_, keep_free_space_bytes_, config, config_prefix)
 {
+    auto local_disk_check_period_ms = config.getUInt("local_disk_check_period_ms", 0);
    if (local_disk_check_period_ms > 0)
        disk_checker = std::make_unique<DiskLocalCheckThread>(this, context, local_disk_check_period_ms);
 }

+DiskLocal::DiskLocal(const String & name_, const String & path_)
+    : IDisk(name_)
+    , disk_path(path_)
+    , keep_free_space_bytes(0)
+    , logger(&Poco::Logger::get("DiskLocal"))
+    , data_source_description(getLocalDataSourceDescription(disk_path))
+{
+}
+
 DataSourceDescription DiskLocal::getDataSourceDescription() const
 {
    return data_source_description;
@ -720,7 +732,7 @@ void registerDiskLocal(DiskFactory & factory, bool global_skip_access_check)

        bool skip_access_check = global_skip_access_check || config.getBool(config_prefix + ".skip_access_check", false);
        std::shared_ptr<IDisk> disk
-            = std::make_shared<DiskLocal>(name, path, keep_free_space_bytes, context, config.getUInt("local_disk_check_period_ms", 0));
+            = std::make_shared<DiskLocal>(name, path, keep_free_space_bytes, context, config, config_prefix);
        disk->startup(context, skip_access_check);
        return disk;
    };
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@ -19,23 +19,25 @@ public:
    friend class DiskLocalCheckThread;
    friend class DiskLocalReservation;

-    DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_);
+    DiskLocal(const String & name_, const String & path_, UInt64 keep_free_space_bytes_,
+              const Poco::Util::AbstractConfiguration & config, const String & config_prefix);
    DiskLocal(
        const String & name_,
        const String & path_,
        UInt64 keep_free_space_bytes_,
        ContextPtr context,
-        UInt64 local_disk_check_period_ms);
+        const Poco::Util::AbstractConfiguration & config,
+        const String & config_prefix);
+
+    DiskLocal(const String & name_, const String & path_);

    const String & getPath() const override { return disk_path; }

    ReservationPtr reserve(UInt64 bytes) override;

-    UInt64 getTotalSpace() const override;
-
-    UInt64 getAvailableSpace() const override;
-
-    UInt64 getUnreservedSpace() const override;
+    std::optional<UInt64> getTotalSpace() const override;
+    std::optional<UInt64> getAvailableSpace() const override;
+    std::optional<UInt64> getUnreservedSpace() const override;

    UInt64 getKeepingFreeSpace() const override { return keep_free_space_bytes; }

@ -63,8 +65,6 @@ public:

    void replaceFile(const String & from_path, const String & to_path) override;

-    void copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path) override;
-
    void copyDirectoryContent(const String & from_dir, const std::shared_ptr<IDisk> & to_disk, const String & to_dir) override;

    void listFiles(const String & path, std::vector<String> & file_names) const override;
--- a/src/Disks/DiskSelector.cpp
+++ b/src/Disks/DiskSelector.cpp
@ -53,7 +53,7 @@ void DiskSelector::initialize(const Poco::Util::AbstractConfiguration & config,
        disks.emplace(
            default_disk_name,
            std::make_shared<DiskLocal>(
-                default_disk_name, context->getPath(), 0, context, config.getUInt("local_disk_check_period_ms", 0)));
+                default_disk_name, context->getPath(), 0, context, config, config_prefix));
    }

    is_initialized = true;
--- a/src/Disks/Executor.h
+++ b/src/Disks/Executor.h
@ -1,42 +0,0 @@
-#pragma once
-
-#include <future>
-#include <functional>
-
-namespace DB
-{
-
-/// Interface to run task asynchronously with possibility to wait for execution.
-class Executor
-{
-public:
-    virtual ~Executor() = default;
-    virtual std::future<void> execute(std::function<void()> task) = 0;
-};
-
-/// Executes task synchronously in case when disk doesn't support async operations.
-class SyncExecutor : public Executor
-{
-public:
-    SyncExecutor() = default;
-    std::future<void> execute(std::function<void()> task) override
-    {
-        auto promise = std::make_shared<std::promise<void>>();
-        try
-        {
-            task();
-            promise->set_value();
-        }
-        catch (...)
-        {
-            try
-            {
-                promise->set_exception(std::current_exception());
-            }
-            catch (...) { }
-        }
-        return promise->get_future();
-    }
-};
-
-}
--- a/src/Disks/IDisk.cpp
+++ b/src/Disks/IDisk.cpp
@ -1,5 +1,4 @@
 #include "IDisk.h"
-#include "Disks/Executor.h"
 #include <IO/ReadBufferFromFileBase.h>
 #include <IO/WriteBufferFromFileBase.h>
 #include <IO/copyData.h>
@ -80,18 +79,33 @@ UInt128 IDisk::getEncryptedFileIV(const String &) const

 using ResultsCollector = std::vector<std::future<void>>;

-void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, Executor & exec, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings)
+void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_path, ThreadPool & pool, ResultsCollector & results, bool copy_root_dir, const WriteSettings & settings)
 {
    if (from_disk.isFile(from_path))
    {
-        auto result = exec.execute(
-            [&from_disk, from_path, &to_disk, to_path, &settings]()
+        auto promise = std::make_shared<std::promise<void>>();
+        auto future = promise->get_future();
+
+        pool.scheduleOrThrowOnError(
+            [&from_disk, from_path, &to_disk, to_path, &settings, promise, thread_group = CurrentThread::getGroup()]()
            {
-                setThreadName("DiskCopier");
-                from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings);
+                try
+                {
+                    SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachFromGroupIfNotDetached(););
+
+                    if (thread_group)
+                        CurrentThread::attachToGroup(thread_group);
+
+                    from_disk.copyFile(from_path, to_disk, fs::path(to_path) / fileName(from_path), settings);
+                    promise->set_value();
+                }
+                catch (...)
+                {
+                    promise->set_exception(std::current_exception());
+                }
            });

-        results.push_back(std::move(result));
+        results.push_back(std::move(future));
    }
    else
    {
@ -104,13 +118,12 @@ void asyncCopy(IDisk & from_disk, String from_path, IDisk & to_disk, String to_p
        }

        for (auto it = from_disk.iterateDirectory(from_path); it->isValid(); it->next())
-            asyncCopy(from_disk, it->path(), to_disk, dest, exec, results, true, settings);
+            asyncCopy(from_disk, it->path(), to_disk, dest, pool, results, true, settings);
    }
 }

 void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path, bool copy_root_dir)
 {
-    auto & exec = to_disk->getExecutor();
    ResultsCollector results;

    WriteSettings settings;
@ -118,17 +131,12 @@ void IDisk::copyThroughBuffers(const String & from_path, const std::shared_ptr<I
    /// Avoid high memory usage. See test_s3_zero_copy_ttl/test.py::test_move_and_s3_memory_usage
    settings.s3_allow_parallel_part_upload = false;

-    asyncCopy(*this, from_path, *to_disk, to_path, exec, results, copy_root_dir, settings);
+    asyncCopy(*this, from_path, *to_disk, to_path, copying_thread_pool, results, copy_root_dir, settings);

    for (auto & result : results)
        result.wait();
    for (auto & result : results)
-        result.get();
-}
-
-void IDisk::copy(const String & from_path, const std::shared_ptr<IDisk> & to_disk, const String & to_path)
-{
-    copyThroughBuffers(from_path, to_disk, to_path, true);
+        result.get();   /// May rethrow an exception
 }


@ -137,7 +145,7 @@ void IDisk::copyDirectoryContent(const String & from_dir, const std::shared_ptr<
    if (!to_disk->exists(to_dir))
        to_disk->createDirectories(to_dir);

-    copyThroughBuffers(from_dir, to_disk, to_dir, false);
+    copyThroughBuffers(from_dir, to_disk, to_dir, /* copy_root_dir */ false);
 }

 void IDisk::truncateFile(const String &, size_t)
@ -233,4 +241,9 @@ catch (Exception & e)
    throw;
 }

+void IDisk::applyNewSettings(const Poco::Util::AbstractConfiguration & config, ContextPtr /*context*/, const String & config_prefix, const DisksMap & /*map*/)
+{
+    copying_thread_pool.setMaxThreads(config.getInt(config_prefix + ".thread_pool_size", 16));
+}
+
 }
--- a/Show More
+++ b/Show More