Merge remote-tracking branch 'ck/master' into rocksdb_metacache

2024-11-21 23:21:59 +00:00 · 2022-03-28 21:03:22 +08:00 · 2022-03-28 21:03:22 +08:00 · 38702be40c
commit 38702be40c
parent a75ded9da3 2db8d354c6
259 changed files with 5284 additions and 2011 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -210,3 +210,6 @@ CheckOptions:
    value: false
  - key: performance-move-const-arg.CheckTriviallyCopyableMove
    value: false
+    # Workaround clang-tidy bug: https://github.com/llvm/llvm-project/issues/46097
+  - key: readability-identifier-naming.TypeTemplateParameterIgnoredRegexp
+    value: expr-type
--- a/.github/workflows/nightly.yml
+++ b/.github/workflows/nightly.yml
@ -7,6 +7,7 @@ env:
 "on":
  schedule:
    - cron: '13 3 * * *'
+  workflow_dispatch:

 jobs:
  DockerHubPushAarch64:
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -11,6 +11,7 @@

 * Make `arrayCompact` function behave as other higher-order functions: perform compaction not of lambda function results but on the original array. If you're using nontrivial lambda functions in arrayCompact you may restore old behaviour by wrapping `arrayCompact` arguments into `arrayMap`. Closes [#34010](https://github.com/ClickHouse/ClickHouse/issues/34010) [#18535](https://github.com/ClickHouse/ClickHouse/issues/18535) [#14778](https://github.com/ClickHouse/ClickHouse/issues/14778). [#34795](https://github.com/ClickHouse/ClickHouse/pull/34795) ([Alexandre Snarskii](https://github.com/snar)).
 * Change implementation specific behavior on overflow of function `toDatetime`. It will be saturated to the nearest min/max supported instant of datetime instead of wraparound. This change is highlighted as "backward incompatible" because someone may unintentionally rely on the old behavior. [#32898](https://github.com/ClickHouse/ClickHouse/pull/32898) ([HaiBo Li](https://github.com/marising)).
+* Make function `cast(value, 'IPv4')`, `cast(value, 'IPv6')` behave same as `toIPv4`, `toIPv6` functions. Changed behavior of incorrect IP address passed into functions `toIPv4`,` toIPv6`,  now if invalid IP address passes into this functions exception will be raised, before this function return default value. Added functions `IPv4StringToNumOrDefault`, `IPv4StringToNumOrNull`, `IPv6StringToNumOrDefault`, `IPv6StringOrNull` `toIPv4OrDefault`, `toIPv4OrNull`, `toIPv6OrDefault`, `toIPv6OrNull`. Functions `IPv4StringToNumOrDefault `, `toIPv4OrDefault `, `toIPv6OrDefault ` should be used if previous logic relied on `IPv4StringToNum`, `toIPv4`, `toIPv6` returning default value for invalid address. Added setting `cast_ipv4_ipv6_default_on_conversion_error`, if this setting enabled, then IP address conversion functions will behave as before. Closes [#22825](https://github.com/ClickHouse/ClickHouse/issues/22825). Closes [#5799](https://github.com/ClickHouse/ClickHouse/issues/5799). Closes [#35156](https://github.com/ClickHouse/ClickHouse/issues/35156). [#35240](https://github.com/ClickHouse/ClickHouse/pull/35240) ([Maksim Kita](https://github.com/kitaisreal)).

 #### New Feature

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -266,7 +266,7 @@ if (OBJCOPY_PATH AND YANDEX_OFFICIAL_BUILD AND (NOT CMAKE_TOOLCHAIN_FILE))
 endif ()

 # Allows to build stripped binary in a separate directory
-if (OBJCOPY_PATH AND READELF_PATH)
+if (OBJCOPY_PATH AND STRIP_PATH)
    option(INSTALL_STRIPPED_BINARIES "Build stripped binaries with debug info in separate directory" OFF)
    if (INSTALL_STRIPPED_BINARIES)
        set(STRIPPED_BINARIES_OUTPUT "stripped" CACHE STRING "A separate directory for stripped information")
--- a/cmake/strip.sh
+++ b/cmake/strip.sh
@ -1,28 +0,0 @@
-#!/usr/bin/env bash
-
-BINARY_PATH=$1
-BINARY_NAME=$(basename "$BINARY_PATH")
-DESTINATION_STRIPPED_DIR=$2
-OBJCOPY_PATH=${3:objcopy}
-READELF_PATH=${4:readelf}
-
-BUILD_ID=$($READELF_PATH -n "$1" | sed -n '/Build ID/ { s/.*: //p; q; }')
-BUILD_ID_PREFIX=${BUILD_ID:0:2}
-BUILD_ID_SUFFIX=${BUILD_ID:2}
-
-DESTINATION_DEBUG_INFO_DIR="$DESTINATION_STRIPPED_DIR/lib/debug/.build-id"
-DESTINATION_STRIP_BINARY_DIR="$DESTINATION_STRIPPED_DIR/bin"
-
-mkdir -p "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX"
-mkdir -p "$DESTINATION_STRIP_BINARY_DIR"
-
-
-cp "$BINARY_PATH" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
-
-$OBJCOPY_PATH --only-keep-debug --compress-debug-sections "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME" "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
-chmod 0644 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
-chown 0:0 "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug"
-
-strip --remove-section=.comment --remove-section=.note "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
-
-$OBJCOPY_PATH --add-gnu-debuglink "$DESTINATION_DEBUG_INFO_DIR/$BUILD_ID_PREFIX/$BUILD_ID_SUFFIX.debug" "$DESTINATION_STRIP_BINARY_DIR/$BINARY_NAME"
--- a/cmake/strip_binary.cmake
+++ b/cmake/strip_binary.cmake
@ -11,16 +11,43 @@ macro(clickhouse_strip_binary)
       message(FATAL_ERROR "A binary path name must be provided for stripping binary")
   endif()

-
   if (NOT DEFINED STRIP_DESTINATION_DIR)
       message(FATAL_ERROR "Destination directory for stripped binary must be provided")
   endif()

   add_custom_command(TARGET ${STRIP_TARGET} POST_BUILD
-     COMMAND bash ${ClickHouse_SOURCE_DIR}/cmake/strip.sh ${STRIP_BINARY_PATH} ${STRIP_DESTINATION_DIR} ${OBJCOPY_PATH} ${READELF_PATH}
+       COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/lib/debug/bin"
+       COMMAND mkdir -p "${STRIP_DESTINATION_DIR}/bin"
+       COMMAND cp "${STRIP_BINARY_PATH}" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
+       COMMAND "${OBJCOPY_PATH}" --only-keep-debug --compress-debug-sections "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}" "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
+       COMMAND chmod 0644 "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug"
+       COMMAND "${STRIP_PATH}" --remove-section=.comment --remove-section=.note "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
+       COMMAND "${OBJCOPY_PATH}" --add-gnu-debuglink "${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug" "${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET}"
       COMMENT "Stripping clickhouse binary" VERBATIM
   )

   install(PROGRAMS ${STRIP_DESTINATION_DIR}/bin/${STRIP_TARGET} DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
-   install(DIRECTORY ${STRIP_DESTINATION_DIR}/lib/debug DESTINATION ${CMAKE_INSTALL_LIBDIR} COMPONENT clickhouse)
+   install(FILES ${STRIP_DESTINATION_DIR}/lib/debug/bin/${STRIP_TARGET}.debug DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}/${STRIP_TARGET}.debug COMPONENT clickhouse)
+endmacro()
+
+
+macro(clickhouse_make_empty_debug_info_for_nfpm)
+   set(oneValueArgs TARGET DESTINATION_DIR)
+   cmake_parse_arguments(EMPTY_DEBUG "" "${oneValueArgs}" "" ${ARGN})
+
+   if (NOT DEFINED EMPTY_DEBUG_TARGET)
+       message(FATAL_ERROR "A target name must be provided for stripping binary")
+   endif()
+
+   if (NOT DEFINED EMPTY_DEBUG_DESTINATION_DIR)
+       message(FATAL_ERROR "Destination directory for empty debug must be provided")
+   endif()
+
+   add_custom_command(TARGET ${EMPTY_DEBUG_TARGET} POST_BUILD
+       COMMAND mkdir -p "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug"
+       COMMAND touch "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug"
+       COMMENT "Addiding empty debug info for NFPM" VERBATIM
+   )
+
+   install(FILES "${EMPTY_DEBUG_DESTINATION_DIR}/lib/debug/${EMPTY_DEBUG_TARGET}.debug" DESTINATION "${CMAKE_INSTALL_LIBDIR}/debug/${CMAKE_INSTALL_FULL_BINDIR}" COMPONENT clickhouse)
 endmacro()
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -170,32 +170,32 @@ else ()
    message (FATAL_ERROR "Cannot find objcopy.")
 endif ()

-# Readelf (FIXME copypaste)
+# Strip (FIXME copypaste)

 if (COMPILER_GCC)
-    find_program (READELF_PATH NAMES "llvm-readelf" "llvm-readelf-13" "llvm-readelf-12" "llvm-readelf-11" "readelf")
+    find_program (STRIP_PATH NAMES "llvm-strip" "llvm-strip-13" "llvm-strip-12" "llvm-strip-11" "strip")
 else ()
-    find_program (READELF_PATH NAMES "llvm-readelf-${COMPILER_VERSION_MAJOR}" "llvm-readelf" "readelf")
+    find_program (STRIP_PATH NAMES "llvm-strip-${COMPILER_VERSION_MAJOR}" "llvm-strip" "strip")
 endif ()

-if (NOT READELF_PATH AND OS_DARWIN)
+if (NOT STRIP_PATH AND OS_DARWIN)
    find_program (BREW_PATH NAMES "brew")
    if (BREW_PATH)
        execute_process (COMMAND ${BREW_PATH} --prefix llvm ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE LLVM_PREFIX)
        if (LLVM_PREFIX)
-            find_program (READELF_PATH NAMES "llvm-readelf" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
+            find_program (STRIP_PATH NAMES "llvm-strip" PATHS "${LLVM_PREFIX}/bin" NO_DEFAULT_PATH)
        endif ()
-        if (NOT READELF_PATH)
+        if (NOT STRIP_PATH)
            execute_process (COMMAND ${BREW_PATH} --prefix binutils ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE OUTPUT_VARIABLE BINUTILS_PREFIX)
            if (BINUTILS_PREFIX)
-                find_program (READELF_PATH NAMES "readelf" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
+                find_program (STRIP_PATH NAMES "strip" PATHS "${BINUTILS_PREFIX}/bin" NO_DEFAULT_PATH)
            endif ()
        endif ()
    endif ()
 endif ()

-if (READELF_PATH)
-    message (STATUS "Using readelf: ${READELF_PATH}")
+if (STRIP_PATH)
+    message (STATUS "Using strip: ${STRIP_PATH}")
 else ()
-    message (FATAL_ERROR "Cannot find readelf.")
+    message (FATAL_ERROR "Cannot find strip.")
 endif ()
--- a/contrib/hyperscan
+++ b/contrib/hyperscan
@ -1 +1 @@
-Subproject commit e9f08df0213fc637aac0a5bbde9beeaeba2fe9fa
+Subproject commit 5edc68c5ac68d2d4f876159e9ee84def6d3dc87c
--- a/contrib/libcxx
+++ b/contrib/libcxx
@ -1 +1 @@
-Subproject commit 61e60294b1de01483caa9f5d00f437c99b674de6
+Subproject commit 172b2ae074f6755145b91c53a95c8540c1468239
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -18,12 +18,14 @@ set(SRCS
 "${LIBCXX_SOURCE_DIR}/src/filesystem/directory_iterator.cpp"
 "${LIBCXX_SOURCE_DIR}/src/filesystem/int128_builtins.cpp"
 "${LIBCXX_SOURCE_DIR}/src/filesystem/operations.cpp"
+"${LIBCXX_SOURCE_DIR}/src/format.cpp"
 "${LIBCXX_SOURCE_DIR}/src/functional.cpp"
 "${LIBCXX_SOURCE_DIR}/src/future.cpp"
 "${LIBCXX_SOURCE_DIR}/src/hash.cpp"
 "${LIBCXX_SOURCE_DIR}/src/ios.cpp"
 "${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp"
 "${LIBCXX_SOURCE_DIR}/src/iostream.cpp"
+"${LIBCXX_SOURCE_DIR}/src/legacy_pointer_safety.cpp"
 "${LIBCXX_SOURCE_DIR}/src/locale.cpp"
 "${LIBCXX_SOURCE_DIR}/src/memory.cpp"
 "${LIBCXX_SOURCE_DIR}/src/mutex.cpp"
@ -33,6 +35,9 @@ set(SRCS
 "${LIBCXX_SOURCE_DIR}/src/random.cpp"
 "${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp"
 "${LIBCXX_SOURCE_DIR}/src/regex.cpp"
+"${LIBCXX_SOURCE_DIR}/src/ryu/d2fixed.cpp"
+"${LIBCXX_SOURCE_DIR}/src/ryu/d2s.cpp"
+"${LIBCXX_SOURCE_DIR}/src/ryu/f2s.cpp"
 "${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp"
 "${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp"
 "${LIBCXX_SOURCE_DIR}/src/string.cpp"
@ -49,7 +54,9 @@ set(SRCS
 add_library(cxx ${SRCS})
 set_target_properties(cxx PROPERTIES FOLDER "contrib/libcxx-cmake")

-target_include_directories(cxx SYSTEM BEFORE PUBLIC $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>)
+target_include_directories(cxx SYSTEM BEFORE PUBLIC
+        $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}/include>
+        $<BUILD_INTERFACE:${LIBCXX_SOURCE_DIR}>/src)
 target_compile_definitions(cxx PRIVATE -D_LIBCPP_BUILDING_LIBRARY -DLIBCXX_BUILDING_LIBCXXABI)

 # Enable capturing stack traces for all exceptions.
--- a/contrib/libcxxabi
+++ b/contrib/libcxxabi
@ -1 +1 @@
-Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076
+Subproject commit 6eb7cc7a7bdd779e6734d1b9fb451df2274462d7
--- a/contrib/libcxxabi-cmake/CMakeLists.txt
+++ b/contrib/libcxxabi-cmake/CMakeLists.txt
@ -1,24 +1,24 @@
 set(LIBCXXABI_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libcxxabi")

 set(SRCS
-"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_default_handlers.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
-"${LIBCXXABI_SOURCE_DIR}/src/cxa_aux_runtime.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_guard.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_thread_atexit.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/cxa_vector.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/cxa_virtual.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/fallback_malloc.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/private_typeinfo.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp"
 "${LIBCXXABI_SOURCE_DIR}/src/stdlib_new_delete.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/stdlib_stdexcept.cpp"
+"${LIBCXXABI_SOURCE_DIR}/src/stdlib_typeinfo.cpp"
 )

 add_library(cxxabi ${SRCS})
@ -30,6 +30,7 @@ target_compile_options(cxxabi PRIVATE -w)
 target_include_directories(cxxabi SYSTEM BEFORE
    PUBLIC $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/include>
    PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/include>
+    PRIVATE $<BUILD_INTERFACE:${LIBCXXABI_SOURCE_DIR}/../libcxx/src>
 )
 target_compile_definitions(cxxabi PRIVATE -D_LIBCPP_BUILDING_LIBRARY)
 target_compile_options(cxxabi PRIVATE -nostdinc++ -fno-sanitize=undefined -Wno-macro-redefined) # If we don't disable UBSan, infinite recursion happens in dynamic_cast.
--- a/contrib/libxml2
+++ b/contrib/libxml2
@ -1 +1 @@
-Subproject commit 18890f471c420411aa3c989e104d090966ec9dbf
+Subproject commit a075d256fd9ff15590b86d981b75a50ead124fca
--- a/contrib/replxx
+++ b/contrib/replxx
@ -1 +1 @@
-Subproject commit 9460e5e0fc10f78f460af26a6bd928798cac864d
+Subproject commit 6f0b6f151ae2a044625ae93acd19ca365fcea64d
--- a/docker/docs/check/Dockerfile
+++ b/docker/docs/check/Dockerfile
@ -1,4 +1,3 @@
-# rebuild in #33610
 # docker build -t clickhouse/docs-check .
 ARG FROM_TAG=latest
 FROM clickhouse/docs-builder:$FROM_TAG
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@ -131,9 +131,6 @@ function start()
        # use root to match with current uid
        clickhouse start --user root >/var/log/clickhouse-server/stdout.log 2>>/var/log/clickhouse-server/stderr.log
        sleep 0.5
-        cat /var/log/clickhouse-server/stdout.log
-        tail -n200 /var/log/clickhouse-server/stderr.log
-        tail -n200 /var/log/clickhouse-server/clickhouse-server.log
        counter=$((counter + 1))
    done

@ -211,14 +208,12 @@ stop
 start

 clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/test_results.tsv \
-                       || echo -e 'Server failed to start\tFAIL' >> /test_output/test_results.tsv
+                       || (echo -e 'Server failed to start (see application_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
+                       && grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log > /test_output/application_errors.txt)

 [ -f /var/log/clickhouse-server/clickhouse-server.log ] || echo -e "Server log does not exist\tFAIL"
 [ -f /var/log/clickhouse-server/stderr.log ] || echo -e "Stderr log does not exist\tFAIL"

-# Print Fatal log messages to stdout
-zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log*
-
 # Grep logs for sanitizer asserts, crashes and other critical errors

 # Sanitizer asserts
@ -235,20 +230,26 @@ zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/
    || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv

 # Logical errors
-zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
-    && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
+zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log* > /test_output/logical_errors.txt \
+    && echo -e 'Logical error thrown (see clickhouse-server.log or logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
    || echo -e 'No logical errors\tOK' >> /test_output/test_results.tsv

+# Remove file logical_errors.txt if it's empty
+[ -s /test_output/logical_errors.txt ] || rm /test_output/logical_errors.txt
+
 # Crash
 zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
    && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
    || echo -e 'Not crashed\tOK' >> /test_output/test_results.tsv

 # It also checks for crash without stacktrace (printed by watchdog)
-zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /dev/null \
-    && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
+zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log* > /test_output/fatal_messages.txt \
+    && echo -e 'Fatal message in clickhouse-server.log (see fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
    || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv

+# Remove file fatal_messages.txt if it's empty
+[ -s /test_output/fatal_messages.txt ] || rm /test_output/fatal_messages.txt
+
 zgrep -Fa "########################################" /test_output/* > /dev/null \
    && echo -e 'Killed by signal (output files)\tFAIL' >> /test_output/test_results.tsv

@ -259,12 +260,12 @@ echo -e "Backward compatibility check\n"

 echo "Download previous release server"
 mkdir previous_release_package_folder
-clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
-    || echo -e 'Download script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
+clickhouse-client --query="SELECT version()" | ./download_previous_release && echo -e 'Download script exit code\tOK' >> /test_output/test_results.tsv \
+    || echo -e 'Download script failed\tFAIL' >> /test_output/test_results.tsv

 if [ "$(ls -A previous_release_package_folder/clickhouse-common-static_*.deb && ls -A previous_release_package_folder/clickhouse-server_*.deb)" ]
 then
-    echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/backward_compatibility_check_results.tsv
+    echo -e "Successfully downloaded previous release packets\tOK" >> /test_output/test_results.tsv
    stop

    # Uninstall current packages
@ -290,8 +291,8 @@ then
    mkdir tmp_stress_output
    
    ./stress --backward-compatibility-check --output-folder tmp_stress_output --global-time-limit=1200 \
-        && echo -e 'Test script exit code\tOK' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'Test script failed\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
+        && echo -e 'Backward compatibility check: Test script exit code\tOK' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: Test script failed\tFAIL' >> /test_output/test_results.tsv
    rm -rf tmp_stress_output

    clickhouse-client --query="SELECT 'Tables count:', count() FROM system.tables"
@ -301,8 +302,9 @@ then
    # Start new server
    configure
    start 500
-    clickhouse-client --query "SELECT 'Server successfully started', 'OK'" >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'Server failed to start\tFAIL' >> /test_output/backward_compatibility_check_results.tsv
+    clickhouse-client --query "SELECT 'Backward compatibility check: Server successfully started', 'OK'" >> /test_output/test_results.tsv \
+        || (echo -e 'Backward compatibility check: Server failed to start\tFAIL' >> /test_output/test_results.tsv \
+        && grep -Fa "<Error>.*Application" /var/log/clickhouse-server/clickhouse-server.log >> /test_output/bc_check_application_errors.txt)

    clickhouse-client --query="SELECT 'Server version: ', version()"

@ -312,10 +314,12 @@ then
    stop

    # Error messages (we should ignore some errors)
+    echo "Check for Error messages in server log:"
    zgrep -Fav -e "Code: 236. DB::Exception: Cancelled merging parts" \
               -e "Code: 236. DB::Exception: Cancelled mutating parts" \
               -e "REPLICA_IS_ALREADY_ACTIVE" \
               -e "REPLICA_IS_ALREADY_EXIST" \
+               -e "ALL_REPLICAS_LOST" \
               -e "DDLWorker: Cannot parse DDL task query" \
               -e "RaftInstance: failed to accept a rpc connection due to error 125" \
               -e "UNKNOWN_DATABASE" \
@ -328,47 +332,53 @@ then
               -e "Code: 1000, e.code() = 111, Connection refused" \
               -e "UNFINISHED" \
               -e "Renaming unexpected part" \
-        /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /dev/null \
-        && echo -e 'Error message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'No Error messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
+        /var/log/clickhouse-server/clickhouse-server.log | zgrep -Fa "<Error>" > /test_output/bc_check_error_messages.txt \
+        && echo -e 'Backward compatibility check: Error message in clickhouse-server.log (see bc_check_error_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: No Error messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
+
+    # Remove file bc_check_error_messages.txt if it's empty
+    [ -s /test_output/bc_check_error_messages.txt ] || rm /test_output/bc_check_error_messages.txt

    # Sanitizer asserts
    zgrep -Fa "==================" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
    zgrep -Fa "WARNING" /var/log/clickhouse-server/stderr.log >> /test_output/tmp
    zgrep -Fav "ASan doesn't fully support makecontext/swapcontext functions" /test_output/tmp > /dev/null \
-        && echo -e 'Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'No sanitizer asserts\tOK' >> /test_output/backward_compatibility_check_results.tsv
+        && echo -e 'Backward compatibility check: Sanitizer assert (in stderr.log)\tFAIL' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: No sanitizer asserts\tOK' >> /test_output/test_results.tsv
    rm -f /test_output/tmp

    # OOM
    zgrep -Fa " <Fatal> Application: Child process was terminated by signal 9" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
-        && echo -e 'OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'No OOM messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
+        && echo -e 'Backward compatibility check: OOM killer (or signal 9) in clickhouse-server.log\tFAIL' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: No OOM messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv

    # Logical errors
-    zgrep -Fa "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
-        && echo -e 'Logical error thrown (see clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'No logical errors\tOK' >> /test_output/backward_compatibility_check_results.tsv
+    echo "Check for Logical errors in server log:"
+    zgrep -Fa -A20 "Code: 49, e.displayText() = DB::Exception:" /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_logical_errors.txt \
+        && echo -e 'Backward compatibility check: Logical error thrown (see clickhouse-server.log or bc_check_logical_errors.txt)\tFAIL' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: No logical errors\tOK' >> /test_output/test_results.tsv
+
+    # Remove file bc_check_logical_errors.txt if it's empty
+    [ -s /test_output/bc_check_logical_errors.txt ] || rm /test_output/bc_check_logical_errors.txt

    # Crash
    zgrep -Fa "########################################" /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
-        && echo -e 'Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'Not crashed\tOK' >> /test_output/backward_compatibility_check_results.tsv
+        && echo -e 'Backward compatibility check: Killed by signal (in clickhouse-server.log)\tFAIL' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: Not crashed\tOK' >> /test_output/test_results.tsv

    # It also checks for crash without stacktrace (printed by watchdog)
-    zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /dev/null \
-        && echo -e 'Fatal message in clickhouse-server.log\tFAIL' >> /test_output/backward_compatibility_check_results.tsv \
-        || echo -e 'No fatal messages in clickhouse-server.log\tOK' >> /test_output/backward_compatibility_check_results.tsv
+    echo "Check for Fatal message in server log:"
+    zgrep -Fa " <Fatal> " /var/log/clickhouse-server/clickhouse-server.log > /test_output/bc_check_fatal_messages.txt \
+        && echo -e 'Backward compatibility check: Fatal message in clickhouse-server.log (see bc_check_fatal_messages.txt)\tFAIL' >> /test_output/test_results.tsv \
+        || echo -e 'Backward compatibility check: No fatal messages in clickhouse-server.log\tOK' >> /test_output/test_results.tsv
+
+    # Remove file bc_check_fatal_messages.txt if it's empty
+    [ -s /test_output/bc_check_fatal_messages.txt ] || rm /test_output/bc_check_fatal_messages.txt

 else
-    echo -e "Failed to download previous release packets\tFAIL" >> /test_output/backward_compatibility_check_results.tsv
+    echo -e "Backward compatibility check: Failed to download previous release packets\tFAIL" >> /test_output/test_results.tsv
 fi

-zgrep -Fa "FAIL" /test_output/backward_compatibility_check_results.tsv > /dev/null \
-        && echo -e 'Backward compatibility check\tFAIL' >> /test_output/test_results.tsv \
-        || echo -e 'Backward compatibility check\tOK' >> /test_output/test_results.tsv
-
-
 # Put logs into /test_output/
 for log_file in /var/log/clickhouse-server/clickhouse-server.log*
 do
--- a/docs/en/engines/table-engines/integrations/hive.md
+++ b/docs/en/engines/table-engines/integrations/hive.md
@ -137,7 +137,7 @@ CREATE TABLE test.test_orc
    `f_array_array_float` Array(Array(Float32)),
    `day` String
 )
-ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
+ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
 PARTITION BY day

 ```
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@ -195,5 +195,6 @@ toc_title: Adopters
 | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
 | <a href="https://domclick.ru/" class="favicon">ДомКлик</a> | Real Estate | — | — | — | [Article in Russian, October 2021](https://habr.com/ru/company/domclick/blog/585936/) |
 | <a href="https://magenta-technology.ru/sistema-upravleniya-marshrutami-inkassacii-as-strela/" class="favicon">АС "Стрела"</a> | Transportation | — | — | — | [Job posting, Jan 2022](https://vk.com/topic-111905078_35689124?post=3553) |
+| <a href="https://piwik.pro/" class="favicon">Piwik PRO</a> | Web Analytics | — | — | — | [Official website, Dec 2018](https://piwik.pro/blog/piwik-pro-clickhouse-faster-efficient-reports/) |

 [Original article](https://clickhouse.com/docs/en/introduction/adopters/) <!--hide-->
--- a/docs/en/operations/caches.md
+++ b/docs/en/operations/caches.md
@ -5,7 +5,7 @@ toc_title: Caches

 # Cache Types {#cache-types}

-When performing queries, ClichHouse uses different caches.
+When performing queries, ClickHouse uses different caches.

 Main cache types:

--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1616,3 +1616,14 @@ Possible values:

 Default value: `10000`.

+## global_memory_usage_overcommit_max_wait_microseconds {#global_memory_usage_overcommit_max_wait_microseconds}
+
+Sets maximum waiting time for global overcommit tracker.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `0`.
+
+
--- a/docs/en/operations/settings/memory-overcommit.md
+++ b/docs/en/operations/settings/memory-overcommit.md
@ -0,0 +1,31 @@
+# Memory overcommit
+
+Memory overcommit is an experimental technique intended to allow to set more flexible memory limits for queries.
+
+The idea of this technique is to introduce settings which can represent guaranteed amount of memory a query can use.
+When memory overcommit is enabled and the memory limit is reached ClickHouse will select the most overcommitted query and try to free memory by killing this query.
+
+When memory limit is reached any query will wait some time during atempt to allocate new memory.
+If timeout is passed and memory is freed, the query continues execution. Otherwise an exception will be thrown and the query is killed.
+
+Selection of query to stop or kill is performed by either global or user overcommit trackers depending on what memory limit is reached.
+
+## User overcommit tracker
+
+User overcommit tracker finds a query with the biggest overcommit ratio in the user's query list.
+Overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage` setting.
+
+Waiting timeout is set by `memory_usage_overcommit_max_wait_microseconds` setting.
+
+**Example**
+
+```sql
+SELECT number FROM numbers(1000) GROUP BY number SETTINGS max_guaranteed_memory_usage=4000, memory_usage_overcommit_max_wait_microseconds=500
+```
+
+## Global overcommit tracker
+
+Global overcommit tracker finds a query with the biggest overcommit ratio in the list of all queries.
+In this case overcommit ratio is computed as number of allocated bytes divided by value of `max_guaranteed_memory_usage_for_user` setting.
+
+Waiting timeout is set by `global_memory_usage_overcommit_max_wait_microseconds` parameter in the configuration file.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -4220,10 +4220,36 @@ Possible values:
 -   0 — Disabled.
 -   1 — Enabled. The wait time equal shutdown_wait_unfinished config.

-Default value: 0.
+Default value: `0`.

 ## shutdown_wait_unfinished

 The waiting time in seconds for currently handled connections when shutdown server.

-Default Value: 5.
+Default Value: `5`.
+
+## max_guaranteed_memory_usage
+
+Maximum guaranteed memory usage for processing of single query.
+It represents soft limit in case when hard limit is reached on user level.
+Zero means unlimited.
+Read more about [memory overcommit](memory-overcommit.md).
+
+Default value: `0`.
+
+## memory_usage_overcommit_max_wait_microseconds
+
+Maximum time thread will wait for memory to be freed in the case of memory overcommit on a user level.
+If the timeout is reached and memory is not freed, an exception is thrown.
+Read more about [memory overcommit](memory-overcommit.md).
+
+Default value: `0`.
+
+## max_guaranteed_memory_usage_for_user
+
+Maximum guaranteed memory usage for processing all concurrently running queries for the user.
+It represents soft limit in case when hard limit is reached on global level.
+Zero means unlimited.
+Read more about [memory overcommit](memory-overcommit.md).
+
+Default value: `0`.
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@ -10,7 +10,7 @@ cssmin==0.2.0
 future==0.18.2
 htmlmin==0.1.12
 idna==2.10
-Jinja2>=3.0.3
+Jinja2==3.0.3
 jinja2-highlight==0.6.1
 jsmin==3.0.0
 livereload==2.6.3
--- a/docs/zh/engines/table-engines/integrations/hive.md
+++ b/docs/zh/engines/table-engines/integrations/hive.md
@ -140,7 +140,7 @@ CREATE TABLE test.test_orc
    `f_array_array_float` Array(Array(Float32)),
    `day` String
 )
-ENGINE = Hive('thrift://202.168.117.26:9083', 'test', 'test_orc')
+ENGINE = Hive('thrift://localhost:9083', 'test', 'test_orc')
 PARTITION BY day

 ```
--- a/docs/zh/operations/system-tables/functions.md
+++ b/docs/zh/operations/system-tables/functions.md
@ -15,7 +15,7 @@
 ```
 ┌─name─────────────────────┬─is_aggregate─┬─case_insensitive─┬─alias_to─┐
 │ sumburConsistentHash     │            0 │                0 │          │
-│ yandexConsistentHash     │            0 │                0 │          │
+│ kostikConsistentHash     │            0 │                0 │          │
 │ demangle                 │            0 │                0 │          │
 │ addressToLine            │            0 │                0 │          │
 │ JSONExtractRaw           │            0 │                0 │          │
--- a/packages/clickhouse-common-static-dbg.yaml
+++ b/packages/clickhouse-common-static-dbg.yaml
@ -21,8 +21,12 @@ description: |
    This package contains the debugging symbols for clickhouse-common.

 contents:
- src: root/usr/lib/debug
-  dst: /usr/lib/debug
+- src: root/usr/lib/debug/usr/bin/clickhouse.debug
+  dst: /usr/lib/debug/usr/bin/clickhouse.debug
+- src: root/usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
+  dst: /usr/lib/debug/usr/bin/clickhouse-odbc-bridge.debug
+- src: root/usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
+  dst: /usr/lib/debug/usr/bin/clickhouse-library-bridge.debug
 # docs
 - src: ../AUTHORS
  dst: /usr/share/doc/clickhouse-common-static-dbg/AUTHORS
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -473,18 +473,11 @@ else ()
    if (INSTALL_STRIPPED_BINARIES)
        clickhouse_strip_binary(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT} BINARY_PATH clickhouse)
    else()
+        clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/${STRIPPED_BINARIES_OUTPUT})
        install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
    endif()
 endif()

-if (NOT INSTALL_STRIPPED_BINARIES)
-    # Install dunny debug directory
-    # TODO: move logic to every place where clickhouse_strip_binary is used
-    add_custom_command(TARGET clickhouse POST_BUILD COMMAND echo > .empty )
-    install(FILES "${CMAKE_CURRENT_BINARY_DIR}/.empty" DESTINATION ${CMAKE_INSTALL_LIBDIR}/debug/.empty)
-endif()
-
-
 if (ENABLE_TESTS)
    set (CLICKHOUSE_UNIT_TESTS_TARGETS unit_tests_dbms)
    add_custom_target (clickhouse-tests ALL DEPENDS ${CLICKHOUSE_UNIT_TESTS_TARGETS})
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -137,5 +137,10 @@ if (BUILD_STANDALONE_KEEPER)
    add_dependencies(clickhouse-keeper clickhouse_keeper_configs)
    set_target_properties(clickhouse-keeper PROPERTIES RUNTIME_OUTPUT_DIRECTORY ../)

+    if (INSTALL_STRIPPED_BINARIES)
+        clickhouse_strip_binary(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-keeper)
+    else()
+        clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-keeper DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
        install(TARGETS clickhouse-keeper RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+    endif()
 endif()
--- a/programs/library-bridge/CMakeLists.txt
+++ b/programs/library-bridge/CMakeLists.txt
@ -27,5 +27,6 @@ set_target_properties(clickhouse-library-bridge PROPERTIES RUNTIME_OUTPUT_DIRECT
 if (INSTALL_STRIPPED_BINARIES)
    clickhouse_strip_binary(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-library-bridge)
 else()
+    clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-library-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
    install(TARGETS clickhouse-library-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 endif()
--- a/programs/odbc-bridge/CMakeLists.txt
+++ b/programs/odbc-bridge/CMakeLists.txt
@ -42,6 +42,7 @@ endif()
 if (INSTALL_STRIPPED_BINARIES)
    clickhouse_strip_binary(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT} BINARY_PATH ../clickhouse-odbc-bridge)
 else()
+    clickhouse_make_empty_debug_info_for_nfpm(TARGET clickhouse-odbc-bridge DESTINATION_DIR ${CMAKE_CURRENT_BINARY_DIR}/../${STRIPPED_BINARIES_OUTPUT})
    install(TARGETS clickhouse-odbc-bridge RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 endif()

--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -45,6 +45,7 @@
 #include <Core/ServerUUID.h>
 #include <IO/HTTPCommon.h>
 #include <IO/ReadHelpers.h>
+#include <IO/IOThreadPool.h>
 #include <IO/UseSSL.h>
 #include <Interpreters/AsynchronousMetrics.h>
 #include <Interpreters/DDLWorker.h>
@ -554,6 +555,10 @@ if (ThreadFuzzer::instance().isEffective())
        config().getUInt("thread_pool_queue_size", 10000)
    );

+    IOThreadPool::initialize(
+        config().getUInt("max_io_thread_pool_size", 100),
+        config().getUInt("max_io_thread_pool_free_size", 0),
+        config().getUInt("io_thread_pool_queue_size", 10000));

    /// Initialize global local cache for remote filesystem.
    if (config().has("local_cache_for_remote_fs"))
--- a/src/Client/ConnectionPoolWithFailover.cpp
+++ b/src/Client/ConnectionPoolWithFailover.cpp
@ -29,15 +29,15 @@ ConnectionPoolWithFailover::ConnectionPoolWithFailover(
        time_t decrease_error_period_,
        size_t max_error_cap_)
    : Base(std::move(nested_pools_), decrease_error_period_, max_error_cap_, &Poco::Logger::get("ConnectionPoolWithFailover"))
-    , default_load_balancing(load_balancing)
+    , get_priority_load_balancing(load_balancing)
 {
    const std::string & local_hostname = getFQDNOrHostName();

-    hostname_differences.resize(nested_pools.size());
+    get_priority_load_balancing.hostname_differences.resize(nested_pools.size());
    for (size_t i = 0; i < nested_pools.size(); ++i)
    {
        ConnectionPool & connection_pool = dynamic_cast<ConnectionPool &>(*nested_pools[i]);
-        hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
+        get_priority_load_balancing.hostname_differences[i] = getHostNameDifference(local_hostname, connection_pool.getHost());
    }
 }

@ -51,36 +51,15 @@ IConnectionPool::Entry ConnectionPoolWithFailover::get(const ConnectionTimeouts
    };

    size_t offset = 0;
+    LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
    if (settings)
-        offset = settings->load_balancing_first_offset % nested_pools.size();
-    GetPriorityFunc get_priority;
-    switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
    {
-    case LoadBalancing::NEAREST_HOSTNAME:
-        get_priority = [&](size_t i) { return hostname_differences[i]; };
-        break;
-    case LoadBalancing::IN_ORDER:
-        get_priority = [](size_t i) { return i; };
-        break;
-    case LoadBalancing::RANDOM:
-        break;
-    case LoadBalancing::FIRST_OR_RANDOM:
-        get_priority = [offset](size_t i) -> size_t { return i != offset; };
-        break;
-    case LoadBalancing::ROUND_ROBIN:
-        if (last_used >= nested_pools.size())
-            last_used = 0;
-        ++last_used;
-        /* Consider nested_pools.size() equals to 5
-         * last_used = 1 -> get_priority: 0 1 2 3 4
-         * last_used = 2 -> get_priority: 4 0 1 2 3
-         * last_used = 3 -> get_priority: 4 3 0 1 2
-         * ...
-         * */
-        get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
-        break;
+        offset = settings->load_balancing_first_offset % nested_pools.size();
+        load_balancing = LoadBalancing(settings->load_balancing);
    }

+    GetPriorityFunc get_priority = get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
+
    UInt64 max_ignored_errors = settings ? settings->distributed_replica_max_ignored_errors.value : 0;
    bool fallback_to_stale_replicas = settings ? settings->fallback_to_stale_replicas_for_distributed_queries.value : true;

@ -173,38 +152,14 @@ std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::g
 ConnectionPoolWithFailover::Base::GetPriorityFunc ConnectionPoolWithFailover::makeGetPriorityFunc(const Settings * settings)
 {
    size_t offset = 0;
+    LoadBalancing load_balancing = get_priority_load_balancing.load_balancing;
    if (settings)
-        offset = settings->load_balancing_first_offset % nested_pools.size();
-
-    GetPriorityFunc get_priority;
-    switch (settings ? LoadBalancing(settings->load_balancing) : default_load_balancing)
    {
-        case LoadBalancing::NEAREST_HOSTNAME:
-            get_priority = [&](size_t i) { return hostname_differences[i]; };
-            break;
-        case LoadBalancing::IN_ORDER:
-            get_priority = [](size_t i) { return i; };
-            break;
-        case LoadBalancing::RANDOM:
-            break;
-        case LoadBalancing::FIRST_OR_RANDOM:
-            get_priority = [offset](size_t i) -> size_t { return i != offset; };
-            break;
-        case LoadBalancing::ROUND_ROBIN:
-            if (last_used >= nested_pools.size())
-                last_used = 0;
-            ++last_used;
-            /* Consider nested_pools.size() equals to 5
-             * last_used = 1 -> get_priority: 0 1 2 3 4
-             * last_used = 2 -> get_priority: 5 0 1 2 3
-             * last_used = 3 -> get_priority: 5 4 0 1 2
-             * ...
-             * */
-            get_priority = [&](size_t i) { ++i; return i < last_used ? nested_pools.size() - i : i - last_used; };
-            break;
+        offset = settings->load_balancing_first_offset % nested_pools.size();
+        load_balancing = LoadBalancing(settings->load_balancing);
    }

-    return get_priority;
+    return get_priority_load_balancing.getPriorityFunc(load_balancing, offset, nested_pools.size());
 }

 std::vector<ConnectionPoolWithFailover::TryResult> ConnectionPoolWithFailover::getManyImpl(
--- a/src/Client/ConnectionPoolWithFailover.h
+++ b/src/Client/ConnectionPoolWithFailover.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Common/PoolWithFailoverBase.h>
+#include <Common/GetPriorityForLoadBalancing.h>
 #include <Client/ConnectionPool.h>

 #include <chrono>
@ -109,9 +110,7 @@ private:

    GetPriorityFunc makeGetPriorityFunc(const Settings * settings);

-    std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
-    size_t last_used = 0; /// Last used for round_robin policy.
-    LoadBalancing default_load_balancing;
+    GetPriorityForLoadBalancing get_priority_load_balancing;
 };

 using ConnectionPoolWithFailoverPtr = std::shared_ptr<ConnectionPoolWithFailover>;
--- a/src/Columns/MaskOperations.cpp
+++ b/src/Columns/MaskOperations.cpp
@ -83,11 +83,20 @@ size_t extractMaskNumericImpl(
    const PaddedPODArray<UInt8> * null_bytemap,
    PaddedPODArray<UInt8> * nulls)
 {
+    if constexpr (!column_is_short)
+    {
+        if (data.size() != mask.size())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a full data column is not equal to the size of a mask");
+    }
+
    size_t ones_count = 0;
    size_t data_index = 0;
-    size_t mask_size = mask.size();

-    for (size_t i = 0; i != mask_size; ++i)
+    size_t mask_size = mask.size();
+    size_t data_size = data.size();
+
+    size_t i = 0;
+    for (; i != mask_size && data_index != data_size; ++i)
    {
        // Change mask only where value is 1.
        if (!mask[i])
@ -120,6 +129,13 @@ size_t extractMaskNumericImpl(

        mask[i] = value;
    }
+
+    if constexpr (column_is_short)
+    {
+        if (data_index != data_size)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "The size of a short column is not equal to the number of ones in a mask");
+    }
+
    return ones_count;
 }

--- a/src/Common/ArenaWithFreeLists.h
+++ b/src/Common/ArenaWithFreeLists.h
@ -113,5 +113,35 @@ public:
    }
 };

+class SynchronizedArenaWithFreeLists : private ArenaWithFreeLists
+{
+public:
+    explicit SynchronizedArenaWithFreeLists(
+        const size_t initial_size = 4096, const size_t growth_factor = 2,
+        const size_t linear_growth_threshold = 128 * 1024 * 1024)
+        : ArenaWithFreeLists{initial_size, growth_factor, linear_growth_threshold}
+    {}
+
+    char * alloc(const size_t size)
+    {
+        std::lock_guard lock{mutex};
+        return ArenaWithFreeLists::alloc(size);
+    }
+
+    void free(char * ptr, const size_t size)
+    {
+        std::lock_guard lock{mutex};
+        return ArenaWithFreeLists::free(ptr, size);
+    }
+
+    /// Size of the allocated pool in bytes
+    size_t size() const
+    {
+        std::lock_guard lock{mutex};
+        return ArenaWithFreeLists::size();
+    }
+private:
+    mutable std::mutex mutex;
+};

 }
--- a/src/Common/FiberStack.h
+++ b/src/Common/FiberStack.h
@ -31,8 +31,8 @@ public:
    /// probably it worth to try to increase stack size for coroutines.
    ///
    /// Current value is just enough for all tests in our CI. It's not selected in some special
-    /// way. We will have 40 pages with 4KB page size.
-    static constexpr size_t default_stack_size = 192 * 1024; /// 64KB was not enough for tests
+    /// way. We will have 80 pages with 4KB page size.
+    static constexpr size_t default_stack_size = 320 * 1024; /// 64KB was not enough for tests

    explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
    {
--- a/src/Common/GetPriorityForLoadBalancing.cpp
+++ b/src/Common/GetPriorityForLoadBalancing.cpp
@ -0,0 +1,49 @@
+#include <Common/GetPriorityForLoadBalancing.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+std::function<size_t(size_t index)> GetPriorityForLoadBalancing::getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const
+{
+    std::function<size_t(size_t index)> get_priority;
+    switch (load_balance)
+    {
+        case LoadBalancing::NEAREST_HOSTNAME:
+            if (hostname_differences.empty())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "It's a bug: hostname_differences is not initialized");
+            get_priority = [&](size_t i) { return hostname_differences[i]; };
+            break;
+        case LoadBalancing::IN_ORDER:
+            get_priority = [](size_t i) { return i; };
+            break;
+        case LoadBalancing::RANDOM:
+            break;
+        case LoadBalancing::FIRST_OR_RANDOM:
+            get_priority = [offset](size_t i) -> size_t { return i != offset; };
+            break;
+        case LoadBalancing::ROUND_ROBIN:
+            if (last_used >= pool_size)
+                last_used = 0;
+            ++last_used;
+            /* Consider pool_size equals to 5
+             * last_used = 1 -> get_priority: 0 1 2 3 4
+             * last_used = 2 -> get_priority: 4 0 1 2 3
+             * last_used = 3 -> get_priority: 4 3 0 1 2
+             * ...
+             * */
+            get_priority = [&](size_t i)
+            {
+                ++i;
+                return i < last_used ? pool_size - i : i - last_used;
+            };
+            break;
+    }
+    return get_priority;
+}
+
+}
--- a/src/Common/GetPriorityForLoadBalancing.h
+++ b/src/Common/GetPriorityForLoadBalancing.h
@ -0,0 +1,34 @@
+#pragma once
+
+#include <Core/SettingsEnums.h>
+
+namespace DB
+{
+
+class GetPriorityForLoadBalancing
+{
+public:
+    GetPriorityForLoadBalancing(LoadBalancing load_balancing_) : load_balancing(load_balancing_) {}
+    GetPriorityForLoadBalancing(){}
+
+    bool operator == (const GetPriorityForLoadBalancing & other) const
+    {
+        return load_balancing == other.load_balancing && hostname_differences == other.hostname_differences;
+    }
+
+    bool operator != (const GetPriorityForLoadBalancing & other) const
+    {
+        return !(*this == other);
+    }
+
+    std::function<size_t(size_t index)> getPriorityFunc(LoadBalancing load_balance, size_t offset, size_t pool_size) const;
+
+    std::vector<size_t> hostname_differences; /// Distances from name of this host to the names of hosts of pools.
+
+    LoadBalancing load_balancing = LoadBalancing::RANDOM;
+
+private:
+    mutable size_t last_used = 0; /// Last used for round_robin policy.
+};
+
+}
--- a/src/Common/IntervalKind.cpp
+++ b/src/Common/IntervalKind.cpp
@ -13,6 +13,9 @@ Int32 IntervalKind::toAvgSeconds() const
 {
    switch (kind)
    {
+        case IntervalKind::Nanosecond: return 0;    /// fractional parts of seconds have 0 seconds
+        case IntervalKind::Microsecond: return 0;
+        case IntervalKind::Millisecond: return 0;
        case IntervalKind::Second: return 1;
        case IntervalKind::Minute: return 60;
        case IntervalKind::Hour: return 3600;
@ -52,6 +55,9 @@ const char * IntervalKind::toKeyword() const
 {
    switch (kind)
    {
+        case IntervalKind::Nanosecond: return "NANOSECOND";
+        case IntervalKind::Microsecond: return "MICROSECOND";
+        case IntervalKind::Millisecond: return "MILLISECOND";
        case IntervalKind::Second: return "SECOND";
        case IntervalKind::Minute: return "MINUTE";
        case IntervalKind::Hour: return "HOUR";
@ -69,6 +75,9 @@ const char * IntervalKind::toLowercasedKeyword() const
 {
    switch (kind)
    {
+        case IntervalKind::Nanosecond: return "nanosecond";
+        case IntervalKind::Microsecond: return "microsecond";
+        case IntervalKind::Millisecond: return "millisecond";
        case IntervalKind::Second: return "second";
        case IntervalKind::Minute: return "minute";
        case IntervalKind::Hour: return "hour";
@ -86,6 +95,12 @@ const char * IntervalKind::toDateDiffUnit() const
 {
    switch (kind)
    {
+        case IntervalKind::Nanosecond:
+            return "nanosecond";
+        case IntervalKind::Microsecond:
+            return "microsecond";
+        case IntervalKind::Millisecond:
+            return "millisecond";
        case IntervalKind::Second:
            return "second";
        case IntervalKind::Minute:
@ -111,6 +126,12 @@ const char * IntervalKind::toNameOfFunctionToIntervalDataType() const
 {
    switch (kind)
    {
+        case IntervalKind::Nanosecond:
+            return "toIntervalNanosecond";
+        case IntervalKind::Microsecond:
+            return "toIntervalMicrosecond";
+        case IntervalKind::Millisecond:
+            return "toIntervalMillisecond";
        case IntervalKind::Second:
            return "toIntervalSecond";
        case IntervalKind::Minute:
@ -136,6 +157,12 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const
 {
    switch (kind)
    {
+        case IntervalKind::Nanosecond:
+            return "toNanosecond";
+        case IntervalKind::Microsecond:
+            return "toMicrosecond";
+        case IntervalKind::Millisecond:
+            return "toMillisecond";
        case IntervalKind::Second:
            return "toSecond";
        case IntervalKind::Minute:
@ -162,6 +189,21 @@ const char * IntervalKind::toNameOfFunctionExtractTimePart() const

 bool IntervalKind::tryParseString(const std::string & kind, IntervalKind::Kind & result)
 {
+    if ("nanosecond" == kind)
+    {
+        result = IntervalKind::Nanosecond;
+        return true;
+    }
+    if ("microsecond" == kind)
+    {
+        result = IntervalKind::Microsecond;
+        return true;
+    }
+    if ("millisecond" == kind)
+    {
+        result = IntervalKind::Millisecond;
+        return true;
+    }
    if ("second" == kind)
    {
        result = IntervalKind::Second;
--- a/src/Common/IntervalKind.h
+++ b/src/Common/IntervalKind.h
@ -10,6 +10,9 @@ struct IntervalKind
 {
    enum Kind
    {
+        Nanosecond,
+        Microsecond,
+        Millisecond,
        Second,
        Minute,
        Hour,
@ -61,6 +64,9 @@ struct IntervalKind

 /// NOLINTNEXTLINE
 #define FOR_EACH_INTERVAL_KIND(M) \
+    M(Nanosecond) \
+    M(Microsecond) \
+    M(Millisecond) \
    M(Second) \
    M(Minute) \
    M(Hour) \
--- a/src/Common/OvercommitTracker.cpp
+++ b/src/Common/OvercommitTracker.cpp
@ -23,6 +23,12 @@ void OvercommitTracker::setMaxWaitTime(UInt64 wait_time)

 bool OvercommitTracker::needToStopQuery(MemoryTracker * tracker)
 {
+    // NOTE: Do not change the order of locks
+    //
+    // global_mutex must be acquired before overcommit_m, because
+    // method OvercommitTracker::unsubscribe(MemoryTracker *) is
+    // always called with already acquired global_mutex in
+    // ProcessListEntry::~ProcessListEntry().
    std::unique_lock<std::mutex> global_lock(global_mutex);
    std::unique_lock<std::mutex> lk(overcommit_m);

@ -76,7 +82,7 @@ void UserOvercommitTracker::pickQueryToExcludeImpl()
    MemoryTracker * query_tracker = nullptr;
    OvercommitRatio current_ratio{0, 0};
    // At this moment query list must be read only.
-    // BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
+    // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
    auto & queries = user_process_list->queries;
    LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", queries.size());
    for (auto const & query : queries)
@ -111,9 +117,9 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
    MemoryTracker * query_tracker = nullptr;
    OvercommitRatio current_ratio{0, 0};
    // At this moment query list must be read only.
-    // BlockQueryIfMemoryLimit is used in ProcessList to guarantee this.
-    LOG_DEBUG(logger, "Trying to choose query to stop");
-    process_list->processEachQueryStatus([&](DB::QueryStatus const & query)
+    // This is guaranteed by locking global_mutex in OvercommitTracker::needToStopQuery.
+    LOG_DEBUG(logger, "Trying to choose query to stop from {} queries", process_list->size());
+    for (auto const & query : process_list->processes)
    {
        if (query.isKilled())
            return;
@ -134,7 +140,7 @@ void GlobalOvercommitTracker::pickQueryToExcludeImpl()
            query_tracker = memory_tracker;
            current_ratio   = ratio;
        }
-    });
+    }
    LOG_DEBUG(logger, "Selected to stop query with overcommit ratio {}/{}",
        current_ratio.committed, current_ratio.soft_limit);
    picked_tracker = query_tracker;
--- a/src/Common/OvercommitTracker.h
+++ b/src/Common/OvercommitTracker.h
@ -43,8 +43,6 @@ class MemoryTracker;
 // is killed to free memory.
 struct OvercommitTracker : boost::noncopyable
 {
-    explicit OvercommitTracker(std::mutex & global_mutex_);
-
    void setMaxWaitTime(UInt64 wait_time);

    bool needToStopQuery(MemoryTracker * tracker);
@ -54,8 +52,12 @@ struct OvercommitTracker : boost::noncopyable
    virtual ~OvercommitTracker() = default;

 protected:
+    explicit OvercommitTracker(std::mutex & global_mutex_);
+
    virtual void pickQueryToExcludeImpl() = 0;

+    // This mutex is used to disallow concurrent access
+    // to picked_tracker and cancelation_state variables.
    mutable std::mutex overcommit_m;
    mutable std::condition_variable cv;

@ -87,6 +89,11 @@ private:
        }
    }

+    // Global mutex which is used in ProcessList to synchronize
+    // insertion and deletion of queries.
+    // OvercommitTracker::pickQueryToExcludeImpl() implementations
+    // require this mutex to be locked, because they read list (or sublist)
+    // of queries.
    std::mutex & global_mutex;
 };

--- a/src/Common/RadixSort.h
+++ b/src/Common/RadixSort.h
@ -515,6 +515,11 @@ public:
        radixSortLSDInternal<false>(arr, size, false, nullptr);
    }

+    static void executeLSD(Element * arr, size_t size, bool reverse)
+    {
+        radixSortLSDInternal<false>(arr, size, reverse, nullptr);
+    }
+
    /** This function will start to sort inplace (modify 'arr')
      *  but on the last step it will write result directly to the destination
      *  instead of finishing sorting 'arr'.
--- a/src/Common/ZooKeeper/CMakeLists.txt
+++ b/src/Common/ZooKeeper/CMakeLists.txt
@ -22,7 +22,6 @@ target_link_libraries (clickhouse_common_zookeeper_no_log
    PRIVATE
        string_utils
 )
-
 if (ENABLE_EXAMPLES)
    add_subdirectory(examples)
 endif()
--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@ -5,15 +5,15 @@

 #include <functional>
 #include <filesystem>
-#include <pcg-random/pcg_random.hpp>

-#include <base/logger_useful.h>
 #include <base/find_symbols.h>
-#include <Common/randomSeed.h>
+#include <base/getFQDNOrHostName.h>
 #include <Common/StringUtils/StringUtils.h>
 #include <Common/Exception.h>
+#include <Common/isLocalAddress.h>

 #include <Poco/Net/NetException.h>
+#include <Poco/Net/DNS.h>


 #define ZOOKEEPER_CONNECTION_TIMEOUT_MS 1000
@ -48,7 +48,7 @@ static void check(Coordination::Error code, const std::string & path)


 void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
-                     int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_)
+                     int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
 {
    log = &Poco::Logger::get("ZooKeeper");
    hosts = hosts_;
@ -57,6 +57,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
    operation_timeout_ms = operation_timeout_ms_;
    chroot = chroot_;
    implementation = implementation_;
+    get_priority_load_balancing = get_priority_load_balancing_;

    if (implementation == "zookeeper")
    {
@ -66,14 +67,13 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
        Coordination::ZooKeeper::Nodes nodes;
        nodes.reserve(hosts.size());

-        Strings shuffled_hosts = hosts;
        /// Shuffle the hosts to distribute the load among ZooKeeper nodes.
-        pcg64 generator(randomSeed());
-        std::shuffle(shuffled_hosts.begin(), shuffled_hosts.end(), generator);
+        std::vector<ShuffleHost> shuffled_hosts = shuffleHosts();

        bool dns_error = false;
-        for (auto & host_string : shuffled_hosts)
+        for (auto & host : shuffled_hosts)
        {
+            auto & host_string = host.host;
            try
            {
                bool secure = bool(startsWith(host_string, "secure://"));
@ -81,6 +81,7 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
                if (secure)
                    host_string.erase(0, strlen("secure://"));

+                LOG_TEST(log, "Adding ZooKeeper host {} ({})", host_string, Poco::Net::SocketAddress{host_string}.toString());
                nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
            }
            catch (const Poco::Net::HostNotFoundException & e)
@ -154,23 +155,47 @@ void ZooKeeper::init(const std::string & implementation_, const Strings & hosts_
    }
 }

+std::vector<ShuffleHost> ZooKeeper::shuffleHosts() const
+{
+    std::function<size_t(size_t index)> get_priority = get_priority_load_balancing.getPriorityFunc(get_priority_load_balancing.load_balancing, 0, hosts.size());
+    std::vector<ShuffleHost> shuffle_hosts;
+    for (size_t i = 0; i < hosts.size(); ++i)
+    {
+        ShuffleHost shuffle_host;
+        shuffle_host.host = hosts[i];
+        if (get_priority)
+            shuffle_host.priority = get_priority(i);
+        shuffle_host.randomize();
+        shuffle_hosts.emplace_back(shuffle_host);
+    }
+
+    std::sort(
+        shuffle_hosts.begin(), shuffle_hosts.end(),
+        [](const ShuffleHost & lhs, const ShuffleHost & rhs)
+        {
+            return ShuffleHost::compare(lhs, rhs);
+        });
+
+    return shuffle_hosts;
+}
+
 ZooKeeper::ZooKeeper(const std::string & hosts_string, const std::string & identity_, int32_t session_timeout_ms_,
                     int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
-                     std::shared_ptr<DB::ZooKeeperLog> zk_log_)
+                     std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
 {
    zk_log = std::move(zk_log_);
    Strings hosts_strings;
    splitInto<','>(hosts_strings, hosts_string);

-    init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
+    init(implementation_, hosts_strings, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
 }

 ZooKeeper::ZooKeeper(const Strings & hosts_, const std::string & identity_, int32_t session_timeout_ms_,
                     int32_t operation_timeout_ms_, const std::string & chroot_, const std::string & implementation_,
-                     std::shared_ptr<DB::ZooKeeperLog> zk_log_)
+                     std::shared_ptr<DB::ZooKeeperLog> zk_log_, const GetPriorityForLoadBalancing & get_priority_load_balancing_)
 {
    zk_log = std::move(zk_log_);
-    init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_);
+    init(implementation_, hosts_, identity_, session_timeout_ms_, operation_timeout_ms_, chroot_, get_priority_load_balancing_);
 }

 struct ZooKeeperArgs
@ -213,6 +238,15 @@ struct ZooKeeperArgs
            {
                implementation = config.getString(config_name + "." + key);
            }
+            else if (key == "zookeeper_load_balancing")
+            {
+                String load_balancing_str = config.getString(config_name + "." + key);
+                /// Use magic_enum to avoid dependency from dbms (`SettingFieldLoadBalancingTraits::fromString(...)`)
+                auto load_balancing = magic_enum::enum_cast<DB::LoadBalancing>(Poco::toUpper(load_balancing_str));
+                if (!load_balancing)
+                    throw DB::Exception(DB::ErrorCodes::BAD_ARGUMENTS, "Unknown load balancing: {}", load_balancing_str);
+                get_priority_load_balancing.load_balancing = *load_balancing;
+            }
            else
                throw KeeperException(std::string("Unknown key ") + key + " in config file", Coordination::Error::ZBADARGUMENTS);
        }
@ -224,6 +258,15 @@ struct ZooKeeperArgs
            if (chroot.back() == '/')
                chroot.pop_back();
        }
+
+        /// init get_priority_load_balancing
+        get_priority_load_balancing.hostname_differences.resize(hosts.size());
+        const String & local_hostname = getFQDNOrHostName();
+        for (size_t i = 0; i < hosts.size(); ++i)
+        {
+            const String & node_host = hosts[i].substr(0, hosts[i].find_last_of(':'));
+            get_priority_load_balancing.hostname_differences[i] = DB::getHostNameDifference(local_hostname, node_host);
+        }
    }

    Strings hosts;
@ -232,13 +275,14 @@ struct ZooKeeperArgs
    int operation_timeout_ms;
    std::string chroot;
    std::string implementation;
+    GetPriorityForLoadBalancing get_priority_load_balancing;
 };

 ZooKeeper::ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_)
    : zk_log(std::move(zk_log_))
 {
    ZooKeeperArgs args(config, config_name);
-    init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot);
+    init(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing);
 }

 bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config, const std::string & config_name) const
@ -249,8 +293,11 @@ bool ZooKeeper::configChanged(const Poco::Util::AbstractConfiguration & config,
    if (args.implementation == implementation && implementation == "testkeeper")
        return false;

-    return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot)
-        != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot);
+    if (args.get_priority_load_balancing != get_priority_load_balancing)
+        return true;
+
+    return std::tie(args.implementation, args.hosts, args.identity, args.session_timeout_ms, args.operation_timeout_ms, args.chroot, args.get_priority_load_balancing)
+        != std::tie(implementation, hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, args.get_priority_load_balancing);
 }


@ -757,7 +804,7 @@ bool ZooKeeper::waitForDisappear(const std::string & path, const WaitCondition &

 ZooKeeperPtr ZooKeeper::startNewSession() const
 {
-    return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log);
+    return std::make_shared<ZooKeeper>(hosts, identity, session_timeout_ms, operation_timeout_ms, chroot, implementation, zk_log, get_priority_load_balancing);
 }


--- a/src/Common/ZooKeeper/ZooKeeper.h
+++ b/src/Common/ZooKeeper/ZooKeeper.h
@ -13,7 +13,10 @@
 #include <Common/Stopwatch.h>
 #include <Common/ZooKeeper/IKeeper.h>
 #include <Common/ZooKeeper/ZooKeeperConstants.h>
+#include <Common/GetPriorityForLoadBalancing.h>
+#include <Common/thread_local_rng.h>
 #include <unistd.h>
+#include <random>


 namespace ProfileEvents
@ -37,6 +40,25 @@ namespace zkutil
 /// Preferred size of multi() command (in number of ops)
 constexpr size_t MULTI_BATCH_SIZE = 100;

+struct ShuffleHost
+{
+    String host;
+    Int64 priority = 0;
+    UInt32 random = 0;
+
+    void randomize()
+    {
+        random = thread_local_rng();
+    }
+
+    static bool compare(const ShuffleHost & lhs, const ShuffleHost & rhs)
+    {
+        return std::forward_as_tuple(lhs.priority, lhs.random)
+               < std::forward_as_tuple(rhs.priority, rhs.random);
+    }
+};
+
+using GetPriorityForLoadBalancing = DB::GetPriorityForLoadBalancing;

 /// ZooKeeper session. The interface is substantially different from the usual libzookeeper API.
 ///
@ -58,14 +80,16 @@ public:
              int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
              const std::string & chroot_ = "",
              const std::string & implementation_ = "zookeeper",
-              std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
+              std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
+              const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});

    explicit ZooKeeper(const Strings & hosts_, const std::string & identity_ = "",
              int32_t session_timeout_ms_ = Coordination::DEFAULT_SESSION_TIMEOUT_MS,
              int32_t operation_timeout_ms_ = Coordination::DEFAULT_OPERATION_TIMEOUT_MS,
              const std::string & chroot_ = "",
              const std::string & implementation_ = "zookeeper",
-              std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr);
+              std::shared_ptr<DB::ZooKeeperLog> zk_log_ = nullptr,
+              const GetPriorityForLoadBalancing & get_priority_load_balancing_ = {});

    /** Config of the form:
        <zookeeper>
@ -91,6 +115,8 @@ public:
    */
    ZooKeeper(const Poco::Util::AbstractConfiguration & config, const std::string & config_name, std::shared_ptr<DB::ZooKeeperLog> zk_log_);

+    std::vector<ShuffleHost> shuffleHosts() const;
+
    /// Creates a new session with the same parameters. This method can be used for reconnecting
    /// after the session has expired.
    /// This object remains unchanged, and the new session is returned.
@ -284,7 +310,7 @@ private:
    friend class EphemeralNodeHolder;

    void init(const std::string & implementation_, const Strings & hosts_, const std::string & identity_,
-              int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_);
+              int32_t session_timeout_ms_, int32_t operation_timeout_ms_, const std::string & chroot_, const GetPriorityForLoadBalancing & get_priority_load_balancing_);

    /// The following methods don't any throw exceptions but return error codes.
    Coordination::Error createImpl(const std::string & path, const std::string & data, int32_t mode, std::string & path_created);
@ -311,6 +337,8 @@ private:
    Poco::Logger * log = nullptr;
    std::shared_ptr<DB::ZooKeeperLog> zk_log;

+    GetPriorityForLoadBalancing get_priority_load_balancing;
+
    AtomicStopwatch session_uptime;
 };

--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@ -451,7 +451,7 @@ void ZooKeeper::connect(
    }
    else
    {
-        LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}", socket.peerAddress().toString(), session_id);
+        LOG_TEST(log, "Connected to ZooKeeper at {} with session_id {}{}", socket.peerAddress().toString(), session_id, fail_reasons.str());
    }
 }

--- a/src/Common/formatIPv6.h
+++ b/src/Common/formatIPv6.h
@ -11,7 +11,7 @@
 constexpr size_t IPV4_BINARY_LENGTH = 4;
 constexpr size_t IPV6_BINARY_LENGTH = 16;
 constexpr size_t IPV4_MAX_TEXT_LENGTH = 15;     /// Does not count tail zero byte.
-constexpr size_t IPV6_MAX_TEXT_LENGTH = 39;
+constexpr size_t IPV6_MAX_TEXT_LENGTH = 45;     /// Does not count tail zero byte.

 namespace DB
 {
--- a/src/Common/isLocalAddress.cpp
+++ b/src/Common/isLocalAddress.cpp
@ -124,6 +124,7 @@ bool isLocalAddress(const Poco::Net::SocketAddress & address, UInt16 clickhouse_

 size_t getHostNameDifference(const std::string & local_hostname, const std::string & host)
 {
+    /// FIXME should we replace it with Levenstein distance? (we already have it in NamePrompter)
    size_t hostname_difference = 0;
    for (size_t i = 0; i < std::min(local_hostname.length(), host.length()); ++i)
        if (local_hostname[i] != host[i])
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@ -13,6 +13,7 @@

 #include <iterator>
 #include <base/sort.h>
+#include <boost/algorithm/string.hpp>


 namespace DB
@ -269,8 +270,18 @@ const ColumnWithTypeAndName & Block::safeGetByPosition(size_t position) const
 }


-const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
+const ColumnWithTypeAndName * Block::findByName(const std::string & name, bool case_insensitive) const
 {
+    if (case_insensitive)
+    {
+        auto found = std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); });
+        if (found == data.end())
+        {
+            return nullptr;
+        }
+        return &*found;
+    }
+
    auto it = index_by_name.find(name);
    if (index_by_name.end() == it)
    {
@ -280,19 +291,23 @@ const ColumnWithTypeAndName * Block::findByName(const std::string & name) const
 }


-const ColumnWithTypeAndName & Block::getByName(const std::string & name) const
+const ColumnWithTypeAndName & Block::getByName(const std::string & name, bool case_insensitive) const
 {
-    const auto * result = findByName(name);
+    const auto * result = findByName(name, case_insensitive);
    if (!result)
-        throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
-            , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+        throw Exception(
+            "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);

    return *result;
 }


-bool Block::has(const std::string & name) const
+bool Block::has(const std::string & name, bool case_insensitive) const
 {
+    if (case_insensitive)
+        return std::find_if(data.begin(), data.end(), [&](const auto & column) { return boost::iequals(column.name, name); })
+            != data.end();
+
    return index_by_name.end() != index_by_name.find(name);
 }

@ -301,8 +316,8 @@ size_t Block::getPositionByName(const std::string & name) const
 {
    auto it = index_by_name.find(name);
    if (index_by_name.end() == it)
-        throw Exception("Not found column " + name + " in block. There are only columns: " + dumpNames()
-            , ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);
+        throw Exception(
+            "Not found column " + name + " in block. There are only columns: " + dumpNames(), ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK);

    return it->second;
 }
--- a/src/Core/Block.h
+++ b/src/Core/Block.h
@ -60,21 +60,21 @@ public:
    ColumnWithTypeAndName & safeGetByPosition(size_t position);
    const ColumnWithTypeAndName & safeGetByPosition(size_t position) const;

-    ColumnWithTypeAndName* findByName(const std::string & name)
+    ColumnWithTypeAndName* findByName(const std::string & name, bool case_insensitive = false)
    {
        return const_cast<ColumnWithTypeAndName *>(
-            const_cast<const Block *>(this)->findByName(name));
+            const_cast<const Block *>(this)->findByName(name, case_insensitive));
    }

-    const ColumnWithTypeAndName * findByName(const std::string & name) const;
+    const ColumnWithTypeAndName * findByName(const std::string & name, bool case_insensitive = false) const;

-    ColumnWithTypeAndName & getByName(const std::string & name)
+    ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false)
    {
        return const_cast<ColumnWithTypeAndName &>(
-            const_cast<const Block *>(this)->getByName(name));
+            const_cast<const Block *>(this)->getByName(name, case_insensitive));
    }

-    const ColumnWithTypeAndName & getByName(const std::string & name) const;
+    const ColumnWithTypeAndName & getByName(const std::string & name, bool case_insensitive = false) const;

    Container::iterator begin() { return data.begin(); }
    Container::iterator end() { return data.end(); }
@ -83,7 +83,7 @@ public:
    Container::const_iterator cbegin() const { return data.cbegin(); }
    Container::const_iterator cend() const { return data.cend(); }

-    bool has(const std::string & name) const;
+    bool has(const std::string & name, bool case_insensitive = false) const;

    size_t getPositionByName(const std::string & name) const;

--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -47,6 +47,8 @@ class IColumn;
    M(UInt64, max_insert_delayed_streams_for_parallel_write, 0, "The maximum number of streams (columns) to delay final part flush. Default - auto (1000 in case of underlying storage supports parallel write, for example S3 and disabled otherwise)", 0) \
    M(UInt64, max_final_threads, 16, "The maximum number of threads to read from table with FINAL.", 0) \
    M(MaxThreads, max_threads, 0, "The maximum number of threads to execute the request. By default, it is determined automatically.", 0) \
+    M(MaxThreads, max_download_threads, 4, "The maximum number of threads to download data (e.g. for URL engine).", 0) \
+    M(UInt64, max_download_buffer_size, 10*1024*1024, "The maximal size of buffer for parallel downloading (e.g. for URL engine) per each thread.", 0) \
    M(UInt64, max_read_buffer_size, DBMS_DEFAULT_BUFFER_SIZE, "The maximum size of the buffer to read from the filesystem.", 0) \
    M(UInt64, max_distributed_connections, 1024, "The maximum number of connections for distributed processing of one query (should be greater than max_threads).", 0) \
    M(UInt64, max_query_size, DBMS_DEFAULT_MAX_QUERY_SIZE, "Which part of the query can be read into RAM for parsing (the remaining data for INSERT, if any, is read later)", 0) \
@ -614,11 +616,13 @@ class IColumn;
    M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
    M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
    M(Bool, input_format_null_as_default, true, "For text input formats initialize null fields with default values if data type of this field is not nullable", 0) \
-    M(Bool, input_format_use_lowercase_column_name, false, "Use lowercase column name while reading input formats", 0) \
    M(Bool, input_format_arrow_import_nested, false, "Allow to insert array of structs into Nested table in Arrow input format.", 0) \
+    M(Bool, input_format_arrow_case_insensitive_column_matching, false, "Ignore case when matching Arrow columns with CH columns.", 0) \
    M(Bool, input_format_orc_import_nested, false, "Allow to insert array of structs into Nested table in ORC input format.", 0) \
    M(Int64, input_format_orc_row_batch_size, 100'000, "Batch size when reading ORC stripes.", 0) \
+    M(Bool, input_format_orc_case_insensitive_column_matching, false, "Ignore case when matching ORC columns with CH columns.", 0) \
    M(Bool, input_format_parquet_import_nested, false, "Allow to insert array of structs into Nested table in Parquet input format.", 0) \
+    M(Bool, input_format_parquet_case_insensitive_column_matching, false, "Ignore case when matching Parquet columns with CH columns.", 0) \
    M(Bool, input_format_allow_seeks, true, "Allow seeks while reading in ORC/Parquet/Arrow input formats", 0) \
    M(Bool, input_format_orc_allow_missing_columns, false, "Allow missing columns while reading ORC input formats", 0) \
    M(Bool, input_format_parquet_allow_missing_columns, false, "Allow missing columns while reading Parquet input formats", 0) \
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -149,4 +149,5 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
                        {"str", FormatSettings::MsgPackUUIDRepresentation::STR},
                        {"ext", FormatSettings::MsgPackUUIDRepresentation::EXT}})

+
 }
--- a/src/DataTypes/DataTypeInterval.cpp
+++ b/src/DataTypes/DataTypeInterval.cpp
@ -13,6 +13,9 @@ bool DataTypeInterval::equals(const IDataType & rhs) const

 void registerDataTypeInterval(DataTypeFactory & factory)
 {
+    factory.registerSimpleDataType("IntervalNanosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Nanosecond)); });
+    factory.registerSimpleDataType("IntervalMicrosecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Microsecond)); });
+    factory.registerSimpleDataType("IntervalMillisecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Millisecond)); });
    factory.registerSimpleDataType("IntervalSecond", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Second)); });
    factory.registerSimpleDataType("IntervalMinute", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Minute)); });
    factory.registerSimpleDataType("IntervalHour", [] { return DataTypePtr(std::make_shared<DataTypeInterval>(IntervalKind::Hour)); });
--- a/src/DataTypes/NestedUtils.cpp
+++ b/src/DataTypes/NestedUtils.cpp
@ -15,6 +15,8 @@

 #include <Parsers/IAST.h>

+#include <boost/algorithm/string/case_conv.hpp>
+

 namespace DB
 {
@ -227,14 +229,17 @@ void validateArraySizes(const Block & block)
 }


-std::unordered_set<String> getAllTableNames(const Block & block)
+std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case)
 {
    std::unordered_set<String> nested_table_names;
-    for (auto & name : block.getNames())
+    for (const auto & name : block.getNames())
    {
        auto nested_table_name = Nested::extractTableName(name);
+        if (to_lower_case)
+            boost::to_lower(nested_table_name);
+
        if (!nested_table_name.empty())
-            nested_table_names.insert(nested_table_name);
+            nested_table_names.insert(std::move(nested_table_name));
    }
    return nested_table_names;
 }
--- a/src/DataTypes/NestedUtils.h
+++ b/src/DataTypes/NestedUtils.h
@ -32,7 +32,7 @@ namespace Nested
    void validateArraySizes(const Block & block);

    /// Get all nested tables names from a block.
-    std::unordered_set<String> getAllTableNames(const Block & block);
+    std::unordered_set<String> getAllTableNames(const Block & block, bool to_lower_case = false);
 }

 }
--- a/src/Databases/DatabaseReplicated.cpp
+++ b/src/Databases/DatabaseReplicated.cpp
@ -88,6 +88,9 @@ DatabaseReplicated::DatabaseReplicated(
    /// If zookeeper chroot prefix is used, path should start with '/', because chroot concatenates without it.
    if (zookeeper_path.front() != '/')
        zookeeper_path = "/" + zookeeper_path;
+
+    if (!db_settings.collection_name.value.empty())
+        fillClusterAuthInfo(db_settings.collection_name.value, context_->getConfigRef());
 }

 String DatabaseReplicated::getFullReplicaName() const
@ -191,22 +194,36 @@ ClusterPtr DatabaseReplicated::getClusterImpl() const
        shards.back().emplace_back(unescapeForFileName(host_port));
    }

-    String username = db_settings.cluster_username;
-    String password = db_settings.cluster_password;
    UInt16 default_port = getContext()->getTCPPort();
-    bool secure = db_settings.cluster_secure_connection;

    bool treat_local_as_remote = false;
    bool treat_local_port_as_remote = getContext()->getApplicationType() == Context::ApplicationType::LOCAL;
    return std::make_shared<Cluster>(
        getContext()->getSettingsRef(),
        shards,
-        username,
-        password,
+        cluster_auth_info.cluster_username,
+        cluster_auth_info.cluster_password,
        default_port,
        treat_local_as_remote,
        treat_local_port_as_remote,
-        secure);
+        cluster_auth_info.cluster_secure_connection,
+        /*priority=*/1,
+        database_name,
+        cluster_auth_info.cluster_secret);
+}
+
+
+void DatabaseReplicated::fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config_ref)
+{
+    const auto & config_prefix = fmt::format("named_collections.{}", collection_name);
+
+    if (!config_ref.has(config_prefix))
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "There is no collection named `{}` in config", collection_name);
+
+    cluster_auth_info.cluster_username = config_ref.getString(config_prefix + ".cluster_username", "");
+    cluster_auth_info.cluster_password = config_ref.getString(config_prefix + ".cluster_password", "");
+    cluster_auth_info.cluster_secret = config_ref.getString(config_prefix + ".cluster_secret", "");
+    cluster_auth_info.cluster_secure_connection = config_ref.getBool(config_prefix + ".cluster_secure_connection", false);
 }

 void DatabaseReplicated::tryConnectToZooKeeperAndInitDatabase(bool force_attach)
--- a/src/Databases/DatabaseReplicated.h
+++ b/src/Databases/DatabaseReplicated.h
@ -75,6 +75,16 @@ private:
    bool createDatabaseNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);
    void createReplicaNodesInZooKeeper(const ZooKeeperPtr & current_zookeeper);

+    struct
+    {
+        String cluster_username{"default"};
+        String cluster_password;
+        String cluster_secret;
+        bool cluster_secure_connection{false};
+    } cluster_auth_info;
+
+    void fillClusterAuthInfo(String collection_name, const Poco::Util::AbstractConfiguration & config);
+
    void checkQueryValid(const ASTPtr & query, ContextPtr query_context) const;

    void recoverLostReplica(const ZooKeeperPtr & current_zookeeper, UInt32 our_log_ptr, UInt32 max_log_ptr);
--- a/src/Databases/DatabaseReplicatedSettings.h
+++ b/src/Databases/DatabaseReplicatedSettings.h
@ -11,9 +11,8 @@ class ASTStorage;
    M(Float,  max_broken_tables_ratio, 0.5, "Do not recover replica automatically if the ratio of staled tables to all tables is greater", 0) \
    M(UInt64, max_replication_lag_to_enqueue, 10, "Replica will throw exception on attempt to execute query if its replication lag greater", 0) \
    M(UInt64, wait_entry_commited_timeout_sec, 3600, "Replicas will try to cancel query if timeout exceed, but initiator host has not executed it yet", 0) \
-    M(String, cluster_username, "default", "Username to use when connecting to hosts of cluster", 0) \
-    M(String, cluster_password, "", "Password to use when connecting to hosts of cluster", 0) \
-    M(Bool, cluster_secure_connection, false, "Enable TLS when connecting to hosts of cluster", 0) \
+    M(String, collection_name, "", "A name of a collection defined in server's config where all info for cluster authentication is defined", 0) \
+

 DECLARE_SETTINGS_TRAITS(DatabaseReplicatedSettingsTraits, LIST_OF_DATABASE_REPLICATED_SETTINGS)

--- a/src/Disks/S3/DiskS3.cpp
+++ b/src/Disks/S3/DiskS3.cpp
@ -20,6 +20,7 @@
 #include <Common/getRandomASCIIString.h>

 #include <Interpreters/Context.h>
+#include <Interpreters/threadPoolCallbackRunner.h>
 #include <IO/ReadBufferFromS3.h>
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
@ -264,32 +265,6 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
    LOG_TRACE(log, "{} to file by path: {}. S3 path: {}",
              mode == WriteMode::Rewrite ? "Write" : "Append", backQuote(metadata_disk->getPath() + path), remote_fs_root_path + blob_name);

-    ScheduleFunc schedule = [pool = &getThreadPoolWriter(), thread_group = CurrentThread::getGroup()](auto callback)
-    {
-        pool->scheduleOrThrow([callback = std::move(callback), thread_group]()
-        {
-            if (thread_group)
-                CurrentThread::attachTo(thread_group);
-
-            SCOPE_EXIT_SAFE(
-                if (thread_group)
-                    CurrentThread::detachQueryIfNotDetached();
-
-                /// After we detached from the thread_group, parent for memory_tracker inside ThreadStatus will be reset to it's parent.
-                /// Typically, it may be changes from Process to User.
-                /// Usually it could be ok, because thread pool task is executed before user-level memory tracker is destroyed.
-                /// However, thread could stay alive inside the thread pool, and it's ThreadStatus as well.
-                /// When, finally, we destroy the thread (and the ThreadStatus),
-                /// it can use memory tracker in the ~ThreadStatus in order to alloc/free untracked_memory,\
-                /// and by this time user-level memory tracker may be already destroyed.
-                ///
-                /// As a work-around, reset memory tracker to total, which is always alive.
-                CurrentThread::get().memory_tracker.setParent(&total_memory_tracker);
-            );
-            callback();
-        });
-    };
-
    auto s3_buffer = std::make_unique<WriteBufferFromS3>(
        settings->client,
        bucket,
@ -299,7 +274,7 @@ std::unique_ptr<WriteBufferFromFileBase> DiskS3::writeFile(const String & path,
        settings->s3_upload_part_size_multiply_parts_count_threshold,
        settings->s3_max_single_part_upload_size,
        std::move(object_metadata),
-        buf_size, std::move(schedule));
+        buf_size, threadPoolCallbackRunner(getThreadPoolWriter()));

    auto create_metadata_callback = [this, path, blob_name, mode] (size_t count)
    {
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -89,10 +89,10 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.json.quote_64bit_integers = settings.output_format_json_quote_64bit_integers;
    format_settings.json.quote_denormals = settings.output_format_json_quote_denormals;
    format_settings.null_as_default = settings.input_format_null_as_default;
-    format_settings.use_lowercase_column_name = settings.input_format_use_lowercase_column_name;
    format_settings.decimal_trailing_zeros = settings.output_format_decimal_trailing_zeros;
    format_settings.parquet.row_group_size = settings.output_format_parquet_row_group_size;
    format_settings.parquet.import_nested = settings.input_format_parquet_import_nested;
+    format_settings.parquet.case_insensitive_column_matching = settings.input_format_parquet_case_insensitive_column_matching;
    format_settings.parquet.allow_missing_columns = settings.input_format_parquet_allow_missing_columns;
    format_settings.pretty.charset = settings.output_format_pretty_grid_charset.toString() == "ASCII" ? FormatSettings::Pretty::Charset::ASCII : FormatSettings::Pretty::Charset::UTF8;
    format_settings.pretty.color = settings.output_format_pretty_color;
@ -123,9 +123,11 @@ FormatSettings getFormatSettings(ContextPtr context, const Settings & settings)
    format_settings.arrow.low_cardinality_as_dictionary = settings.output_format_arrow_low_cardinality_as_dictionary;
    format_settings.arrow.import_nested = settings.input_format_arrow_import_nested;
    format_settings.arrow.allow_missing_columns = settings.input_format_arrow_allow_missing_columns;
+    format_settings.arrow.case_insensitive_column_matching = settings.input_format_arrow_case_insensitive_column_matching;
    format_settings.orc.import_nested = settings.input_format_orc_import_nested;
    format_settings.orc.allow_missing_columns = settings.input_format_orc_allow_missing_columns;
    format_settings.orc.row_batch_size = settings.input_format_orc_row_batch_size;
+    format_settings.orc.case_insensitive_column_matching = settings.input_format_orc_case_insensitive_column_matching;
    format_settings.defaults_for_omitted_fields = settings.input_format_defaults_for_omitted_fields;
    format_settings.capn_proto.enum_comparing_mode = settings.format_capn_proto_enum_comparising_mode;
    format_settings.seekable_read = settings.input_format_allow_seeks;
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -32,7 +32,6 @@ struct FormatSettings
    bool null_as_default = true;
    bool decimal_trailing_zeros = false;
    bool defaults_for_omitted_fields = true;
-    bool use_lowercase_column_name = false;

    bool seekable_read = true;
    UInt64 max_rows_to_read_for_schema_inference = 100;
@ -75,6 +74,7 @@ struct FormatSettings
        bool low_cardinality_as_dictionary = false;
        bool import_nested = false;
        bool allow_missing_columns = false;
+        bool case_insensitive_column_matching = false;
    } arrow;

    struct
@ -137,6 +137,7 @@ struct FormatSettings
        UInt64 row_group_size = 1000000;
        bool import_nested = false;
        bool allow_missing_columns = false;
+        bool case_insensitive_column_matching = false;
    } parquet;

    struct Pretty
@ -217,6 +218,7 @@ struct FormatSettings
        bool import_nested = false;
        bool allow_missing_columns = false;
        int64_t row_batch_size = 100'000;
+        bool case_insensitive_column_matching = false;
    } orc;

    /// For capnProto format we should determine how to
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@ -41,6 +41,11 @@ namespace ErrorCodes
        throw Exception("Illegal type Date of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
    }

+    static inline UInt32 dateTimeIsNotSupported(const char * name)
+    {
+        throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
 /// This factor transformation will say that the function is monotone everywhere.
 struct ZeroTransform
 {
@ -311,6 +316,133 @@ struct ToStartOfSecondImpl
    using FactorTransform = ZeroTransform;
 };

+struct ToStartOfMillisecondImpl
+{
+    static constexpr auto name = "toStartOfMillisecond";
+
+    static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
+    {
+        // given that scale is 6, scale_multiplier is 1000000
+        // for DateTime64 value of 123.456789:
+        // 123456789 - 789 = 123456000
+        // for DateTime64 value of -123.456789:
+        // -123456789 - (1000 + (-789)) = -123457000
+
+        if (scale_multiplier == 1000)
+        {
+            return datetime64;
+        }
+        else if (scale_multiplier <= 1000)
+        {
+            return datetime64 * (1000 / scale_multiplier);
+        }
+        else
+        {
+        auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000);
+
+        if (droppable_part_with_sign < 0)
+            droppable_part_with_sign += scale_multiplier;
+
+        return datetime64 - droppable_part_with_sign;
+        }
+    }
+
+    static inline UInt32 execute(UInt32, const DateLUTImpl &)
+    {
+        throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+    static inline UInt32 execute(Int32, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+    static inline UInt32 execute(UInt16, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+
+    using FactorTransform = ZeroTransform;
+};
+
+struct ToStartOfMicrosecondImpl
+{
+    static constexpr auto name = "toStartOfMicrosecond";
+
+    static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
+    {
+        // @see ToStartOfMillisecondImpl
+
+        if (scale_multiplier == 1000000)
+        {
+            return datetime64;
+        }
+        else if (scale_multiplier <= 1000000)
+        {
+            return datetime64 * (1000000 / scale_multiplier);
+        }
+        else
+        {
+            auto droppable_part_with_sign = DecimalUtils::getFractionalPartWithScaleMultiplier<DateTime64, true>(datetime64, scale_multiplier / 1000000);
+
+            if (droppable_part_with_sign < 0)
+                droppable_part_with_sign += scale_multiplier;
+
+            return datetime64 - droppable_part_with_sign;
+        }
+    }
+
+    static inline UInt32 execute(UInt32, const DateLUTImpl &)
+    {
+        throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+    static inline UInt32 execute(Int32, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+    static inline UInt32 execute(UInt16, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+
+    using FactorTransform = ZeroTransform;
+};
+
+struct ToStartOfNanosecondImpl
+{
+    static constexpr auto name = "toStartOfNanosecond";
+
+    static inline DateTime64 execute(const DateTime64 & datetime64, Int64 scale_multiplier, const DateLUTImpl &)
+    {
+        // @see ToStartOfMillisecondImpl
+        if (scale_multiplier == 1000000000)
+        {
+            return datetime64;
+        }
+        else if (scale_multiplier <= 1000000000)
+        {
+            return datetime64 * (1000000000 / scale_multiplier);
+        }
+        else
+        {
+            throw Exception("Illegal type of argument for function " + std::string(name) + ", DateTime64 expected", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        }
+    }
+
+    static inline UInt32 execute(UInt32, const DateLUTImpl &)
+    {
+        throw Exception("Illegal type DateTime of argument for function " + std::string(name), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+    static inline UInt32 execute(Int32, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+    static inline UInt32 execute(UInt16, const DateLUTImpl &)
+    {
+        return dateIsNotSupported(name);
+    }
+
+    using FactorTransform = ZeroTransform;
+};
+
 struct ToStartOfFiveMinuteImpl
 {
    static constexpr auto name = "toStartOfFiveMinute";
--- a/src/Functions/FunctionDateOrDateTimeAddInterval.h
+++ b/src/Functions/FunctionDateOrDateTimeAddInterval.h
@ -40,26 +40,158 @@ namespace ErrorCodes
 ///  - 'AddSecondsImpl::execute(UInt32, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(DateTime, ...) -> DateTime'
 ///  - 'AddSecondsImpl::execute(UInt16, ...) -> UInt32' is available to the ClickHouse users as 'addSeconds(Date, ...) -> DateTime'

+struct AddNanosecondsImpl
+{
+    static constexpr auto name = "addNanoseconds";
+
+    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
+        auto division = std::div(t.fractional * multiplier + delta, static_cast<Int64>(1000000000));
+        return {t.whole * multiplier + division.quot, t.fractional * multiplier + delta};
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9 - scale);
+        return t * multiplier + delta;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(9);
+        return t * multiplier + delta;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
+    {
+        throw Exception("addNanoSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
+    {
+        throw Exception("addNanoSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+};
+
+struct AddMicrosecondsImpl
+{
+    static constexpr auto name = "addMicroseconds";
+
+    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
+        if (scale <= 6)
+        {
+            auto division = std::div((t.fractional + delta), static_cast<Int64>(10e6));
+            return {t.whole * multiplier + division.quot, division.rem};
+        }
+        else
+        {
+            auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(10e6 * multiplier));
+            return {t.whole + division.quot, division.rem};
+        }
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(6 - scale));
+        return scale <= 6 ? t * multiplier + delta : t + delta * multiplier;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(6);
+        return t * multiplier + delta;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
+    {
+        throw Exception("addMicroSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
+    {
+        throw Exception("addMicroSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+};
+
+struct AddMillisecondsImpl
+{
+    static constexpr auto name = "addMilliseconds";
+
+    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 scale = DataTypeDateTime64::default_scale)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
+        if (scale <= 3)
+        {
+            auto division = std::div((t.fractional + delta), static_cast<Int64>(1000));
+            return {t.whole * multiplier + division.quot, division.rem};
+        }
+        else
+        {
+            auto division = std::div((t.fractional + delta * multiplier), static_cast<Int64>(1000 * multiplier));
+            return {t.whole + division.quot,division.rem};
+        }
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(std::abs(3 - scale));
+        return scale <= 3 ? t * multiplier + delta : t + delta * multiplier;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
+    {
+        Int64 multiplier = DecimalUtils::scaleMultiplier<DateTime64>(3);
+        return t * multiplier + delta;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64 execute(UInt16, Int64, const DateLUTImpl &, UInt16 = 0)
+    {
+        throw Exception("addMilliSeconds() cannot be used with Date", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64 execute(Int32, Int64, const DateLUTImpl &, UInt16 = 0)
+    {
+        throw Exception("addMilliSeconds() cannot be used with Date32", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+    }
+};
+
 struct AddSecondsImpl
 {
    static constexpr auto name = "addSeconds";

    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return {t.whole + delta, t.fractional};
    }

-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return t + delta;
    }
-    static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
+
+    static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        // use default datetime64 scale
        return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta) * 1000;
    }
-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.fromDayNum(DayNum(d)) + delta;
    }
@ -70,21 +202,29 @@ struct AddMinutesImpl
    static constexpr auto name = "addMinutes";

    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return {t.whole + delta * 60, t.fractional};
    }

-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        return t + 60 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return t + delta * 60;
    }
-    static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
+
+    static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        // use default datetime64 scale
        return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 60) * 1000;
    }
-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.fromDayNum(DayNum(d)) + delta * 60;
    }
@ -95,20 +235,29 @@ struct AddHoursImpl
    static constexpr auto name = "addHours";

    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &)
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return {t.whole + delta * 3600, t.fractional};
    }
-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &)
+
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl &, UInt16 scale = 0)
+    {
+        return t + 3600 * delta * DecimalUtils::scaleMultiplier<DateTime64>(scale);
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return t + delta * 3600;
    }
-    static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
+
+    static inline NO_SANITIZE_UNDEFINED Int64 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        // use default datetime64 scale
        return (time_zone.fromDayNum(ExtendedDayNum(d)) + delta * 3600) * 1000;
    }
-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.fromDayNum(DayNum(d)) + delta * 3600;
    }
@ -119,22 +268,30 @@ struct AddDaysImpl
    static constexpr auto name = "addDays";

    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return {time_zone.addDays(t.whole, delta), t.fractional};
    }

-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
+    {
+        auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
+        auto d = std::div(t, multiplier);
+        return time_zone.addDays(d.quot, delta) * multiplier + d.rem;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addDays(t, delta);
    }

-    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return d + delta;
    }

-    static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return d + delta;
    }
@ -145,22 +302,30 @@ struct AddWeeksImpl
    static constexpr auto name = "addWeeks";

    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return {time_zone.addWeeks(t.whole, delta), t.fractional};
    }

-    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
+    {
+        auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
+        auto d = std::div(t, multiplier);
+        return time_zone.addDays(d.quot, delta * 7) * multiplier + d.rem;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addWeeks(t, delta);
    }

-    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return d + delta * 7;
    }

-    static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &)
+    static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int32 delta, const DateLUTImpl &, UInt16 = 0)
    {
        return d + delta * 7;
    }
@ -170,23 +335,31 @@ struct AddMonthsImpl
 {
    static constexpr auto name = "addMonths";

-    static inline DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return {time_zone.addMonths(t.whole, delta), t.fractional};
    }

-    static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
+    {
+        auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
+        auto d = std::div(t, multiplier);
+        return time_zone.addMonths(d.quot, delta) * multiplier + d.rem;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addMonths(t, delta);
    }

-    static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addMonths(DayNum(d), delta);
    }

-    static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addMonths(ExtendedDayNum(d), delta);
    }
@ -197,22 +370,30 @@ struct AddQuartersImpl
    static constexpr auto name = "addQuarters";

    static inline DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone)
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return {time_zone.addQuarters(t.whole, delta), t.fractional};
    }

-    static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
+    {
+        auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
+        auto d = std::div(t, multiplier);
+        return time_zone.addQuarters(d.quot, delta) * multiplier + d.rem;
+    }
+
+    static inline UInt32 execute(UInt32 t, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addQuarters(t, delta);
    }

-    static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone)
+    static inline UInt16 execute(UInt16 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addQuarters(DayNum(d), delta);
    }

-    static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone)
+    static inline Int32 execute(Int32 d, Int32 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addQuarters(ExtendedDayNum(d), delta);
    }
@ -222,23 +403,31 @@ struct AddYearsImpl
 {
    static constexpr auto name = "addYears";

-    static inline DecimalUtils::DecimalComponents<DateTime64>
-    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DecimalUtils::DecimalComponents<DateTime64>
+    execute(DecimalUtils::DecimalComponents<DateTime64> t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return {time_zone.addYears(t.whole, delta), t.fractional};
    }

-    static inline UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED DateTime64
+    execute(DateTime64 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale = 0)
+    {
+        auto multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
+        auto d = std::div(t, multiplier);
+        return time_zone.addYears(d.quot, delta) * multiplier + d.rem;
+    }
+
+    static inline NO_SANITIZE_UNDEFINED UInt32 execute(UInt32 t, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addYears(t, delta);
    }

-    static inline UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED UInt16 execute(UInt16 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addYears(DayNum(d), delta);
    }

-    static inline Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone)
+    static inline NO_SANITIZE_UNDEFINED Int32 execute(Int32 d, Int64 delta, const DateLUTImpl & time_zone, UInt16 = 0)
    {
        return time_zone.addYears(ExtendedDayNum(d), delta);
    }
@ -250,13 +439,16 @@ struct SubtractIntervalImpl : public Transform
    using Transform::Transform;

    template <typename T>
-    inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone) const
+    inline NO_SANITIZE_UNDEFINED auto execute(T t, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
    {
        /// Signed integer overflow is Ok.
-        return Transform::execute(t, -delta, time_zone);
+        return Transform::execute(t, -delta, time_zone, scale);
    }
 };

+struct SubtractNanosecondsImpl : SubtractIntervalImpl<AddNanosecondsImpl> { static constexpr auto name = "subtractNanoseconds"; };
+struct SubtractMicrosecondsImpl : SubtractIntervalImpl<AddMicrosecondsImpl> { static constexpr auto name = "subtractMicroseconds"; };
+struct SubtractMillisecondsImpl : SubtractIntervalImpl<AddMillisecondsImpl> { static constexpr auto name = "subtractMilliseconds"; };
 struct SubtractSecondsImpl : SubtractIntervalImpl<AddSecondsImpl> { static constexpr auto name = "subtractSeconds"; };
 struct SubtractMinutesImpl : SubtractIntervalImpl<AddMinutesImpl> { static constexpr auto name = "subtractMinutes"; };
 struct SubtractHoursImpl : SubtractIntervalImpl<AddHoursImpl> { static constexpr auto name = "subtractHours"; };
@ -277,17 +469,17 @@ struct Adder
    {}

    template <typename FromVectorType, typename ToVectorType>
-    void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone) const
+    void NO_INLINE vectorConstant(const FromVectorType & vec_from, ToVectorType & vec_to, Int64 delta, const DateLUTImpl & time_zone, UInt16 scale) const
    {
        size_t size = vec_from.size();
        vec_to.resize(size);

        for (size_t i = 0; i < size; ++i)
-            vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone);
+            vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta), time_zone, scale);
    }

    template <typename FromVectorType, typename ToVectorType>
-    void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const
+    void vectorVector(const FromVectorType & vec_from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
    {
        size_t size = vec_from.size();
        vec_to.resize(size);
@ -296,11 +488,11 @@ struct Adder
            ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
            ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
            ColumnFloat32, ColumnFloat64>(
-            &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, size); return true; });
+            &delta, [&](const auto & column){ vectorVector(vec_from, vec_to, column, time_zone, scale, size); return true; });
    }

    template <typename FromType, typename ToVectorType>
-    void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone) const
+    void constantVector(const FromType & from, ToVectorType & vec_to, const IColumn & delta, const DateLUTImpl & time_zone, UInt16 scale) const
    {
        size_t size = delta.size();
        vec_to.resize(size);
@ -309,7 +501,7 @@ struct Adder
            ColumnUInt8, ColumnUInt16, ColumnUInt32, ColumnUInt64,
            ColumnInt8, ColumnInt16, ColumnInt32, ColumnInt64,
            ColumnFloat32, ColumnFloat64>(
-            &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, size); return true; });
+            &delta, [&](const auto & column){ constantVector(from, vec_to, column, time_zone, scale, size); return true; });
    }

 private:
@ -325,18 +517,18 @@ private:

    template <typename FromVectorType, typename ToVectorType, typename DeltaColumnType>
    NO_INLINE NO_SANITIZE_UNDEFINED void vectorVector(
-        const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
+        const FromVectorType & vec_from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
    {
        for (size_t i = 0; i < size; ++i)
-            vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone);
+            vec_to[i] = transform.execute(vec_from[i], checkOverflow(delta.getData()[i]), time_zone, scale);
    }

    template <typename FromType, typename ToVectorType, typename DeltaColumnType>
    NO_INLINE NO_SANITIZE_UNDEFINED void constantVector(
-        const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, size_t size) const
+        const FromType & from, ToVectorType & vec_to, const DeltaColumnType & delta, const DateLUTImpl & time_zone, UInt16 scale, size_t size) const
    {
        for (size_t i = 0; i < size; ++i)
-            vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone);
+            vec_to[i] = transform.execute(from, checkOverflow(delta.getData()[i]), time_zone, scale);
    }
 };

@ -344,7 +536,7 @@ private:
 template <typename FromDataType, typename ToDataType, typename Transform>
 struct DateTimeAddIntervalImpl
 {
-    static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type)
+    static ColumnPtr execute(Transform transform, const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, UInt16 scale = 0)
    {
        using FromValueType = typename FromDataType::FieldType;
        using FromColumnType = typename FromDataType::ColumnType;
@ -363,16 +555,15 @@ struct DateTimeAddIntervalImpl
        if (const auto * sources = checkAndGetColumn<FromColumnType>(source_col.get()))
        {
            if (const auto * delta_const_column = typeid_cast<const ColumnConst *>(&delta_column))
-                op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone);
+                op.vectorConstant(sources->getData(), col_to->getData(), delta_const_column->getInt(0), time_zone, scale);
            else
-                op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone);
+                op.vectorVector(sources->getData(), col_to->getData(), delta_column, time_zone, scale);
        }
        else if (const auto * sources_const = checkAndGetColumnConst<FromColumnType>(source_col.get()))
        {
            op.constantVector(
                sources_const->template getValue<FromValueType>(),
-                col_to->getData(),
-                delta_column, time_zone);
+                col_to->getData(), delta_column, time_zone, scale);
        }
        else
        {
@ -463,18 +654,10 @@ public:
        }
    }

-    // TransformDateTime64 helps choosing correct overload of exec and does some transformations
-    // on input and output parameters to simplify support of DateTime64 in concrete Transform.
-    template <typename FieldType>
-    using TransformType = std::conditional_t<
-        std::is_same_v<FieldType, DateTime64>,
-        TransformDateTime64<Transform>,
-        Transform>;
-
    /// Helper templates to deduce return type based on argument type, since some overloads may promote or denote types,
    /// e.g. addSeconds(Date, 1) => DateTime
    template <typename FieldType>
-    using TransformExecuteReturnType = decltype(std::declval<TransformType<FieldType>>().execute(FieldType(), 0, std::declval<DateLUTImpl>()));
+    using TransformExecuteReturnType = decltype(std::declval<Transform>().execute(FieldType(), 0, std::declval<DateLUTImpl>(), 0));

    // Deduces RETURN DataType from INPUT DataType, based on return type of Transform{}.execute(INPUT_TYPE, UInt64, DateLUTImpl).
    // e.g. for Transform-type that has execute()-overload with 'UInt16' input and 'UInt32' return,
@ -500,11 +683,33 @@ public:
            if (typeid_cast<const DataTypeDateTime64 *>(arguments[0].type.get()))
            {
                const auto & datetime64_type = assert_cast<const DataTypeDateTime64 &>(*arguments[0].type);
-                return std::make_shared<DataTypeDateTime64>(datetime64_type.getScale(), extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
+
+                auto from_scale = datetime64_type.getScale();
+                auto scale = from_scale;
+
+                if (std::is_same_v<Transform, AddNanosecondsImpl>)
+                    scale = 9;
+                else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
+                    scale = 6;
+                else if (std::is_same_v<Transform, AddMillisecondsImpl>)
+                    scale = 3;
+
+                scale = std::max(scale, from_scale);
+
+                return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
            }
            else
            {
-                return std::make_shared<DataTypeDateTime64>(DataTypeDateTime64::default_scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
+                auto scale = DataTypeDateTime64::default_scale;
+
+                if (std::is_same_v<Transform, AddNanosecondsImpl>)
+                    scale = 9;
+                else if (std::is_same_v<Transform, AddMicrosecondsImpl>)
+                    scale = 6;
+                else if (std::is_same_v<Transform, AddMillisecondsImpl>)
+                    scale = 3;
+
+                return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
            }
        }
        else
@ -541,9 +746,9 @@ public:
        }
        else if (const auto * datetime64_type = assert_cast<const DataTypeDateTime64 *>(from_type))
        {
-            using WrappedTransformType = TransformType<typename DataTypeDateTime64::FieldType>;
-            return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, WrappedTransformType>::execute(
-                    WrappedTransformType{datetime64_type->getScale()}, arguments, result_type);
+            auto from_scale = datetime64_type->getScale();
+            return DateTimeAddIntervalImpl<DataTypeDateTime64, TransformResultDataType<DataTypeDateTime64>, Transform>::execute(
+                Transform{}, arguments, result_type, from_scale);
        }
        else
            throw Exception("Illegal type " + arguments[0].type->getName() + " of first argument of function " + getName(),
--- a/src/Functions/FunctionDateOrDateTimeToSomething.h
+++ b/src/Functions/FunctionDateOrDateTimeToSomething.h
@ -88,6 +88,20 @@ public:
            Int64 scale = DataTypeDateTime64::default_scale;
            if (const auto * dt64 =  checkAndGetDataType<DataTypeDateTime64>(arguments[0].type.get()))
                scale = dt64->getScale();
+            auto source_scale = scale;
+
+            if constexpr (std::is_same_v<ToStartOfMillisecondImpl, Transform>)
+            {
+                scale = std::max(source_scale, static_cast<Int64>(3));
+            }
+            else if constexpr (std::is_same_v<ToStartOfMicrosecondImpl, Transform>)
+            {
+                scale = std::max(source_scale, static_cast<Int64>(6));
+            }
+            else if constexpr (std::is_same_v<ToStartOfNanosecondImpl, Transform>)
+            {
+                scale = std::max(source_scale, static_cast<Int64>(9));
+            }

            return std::make_shared<ToDataType>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
        }
--- a/src/Functions/FunctionsConversion.cpp
+++ b/src/Functions/FunctionsConversion.cpp
@ -112,6 +112,9 @@ void registerFunctionsConversion(FunctionFactory & factory)
    factory.registerFunction<FunctionParseDateTime64BestEffortOrZero>();
    factory.registerFunction<FunctionParseDateTime64BestEffortOrNull>();

+    factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalNanosecond, PositiveMonotonicity>>();
+    factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMicrosecond, PositiveMonotonicity>>();
+    factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMillisecond, PositiveMonotonicity>>();
    factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalSecond, PositiveMonotonicity>>();
    factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalMinute, PositiveMonotonicity>>();
    factory.registerFunction<FunctionConvert<DataTypeInterval, NameToIntervalHour, PositiveMonotonicity>>();
--- a/src/Functions/FunctionsConversion.h
+++ b/src/Functions/FunctionsConversion.h
@ -1487,6 +1487,9 @@ struct NameToDecimal256 { static constexpr auto name = "toDecimal256"; };
        static constexpr auto kind = IntervalKind::INTERVAL_KIND; \
    };

+DEFINE_NAME_TO_INTERVAL(Nanosecond)
+DEFINE_NAME_TO_INTERVAL(Microsecond)
+DEFINE_NAME_TO_INTERVAL(Millisecond)
 DEFINE_NAME_TO_INTERVAL(Second)
 DEFINE_NAME_TO_INTERVAL(Minute)
 DEFINE_NAME_TO_INTERVAL(Hour)
@ -2703,13 +2706,10 @@ private:
        return createWrapper<ToDataType>(from_type, to_type, requested_result_is_nullable);
    }

-    WrapperType createUInt8ToUInt8Wrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
+    WrapperType createUInt8ToBoolWrapper(const DataTypePtr from_type, const DataTypePtr to_type) const
    {
        return [from_type, to_type] (ColumnsWithTypeAndName & arguments, const DataTypePtr &, const ColumnNullable *, size_t /*input_rows_count*/) -> ColumnPtr
        {
-            if (isBool(from_type) || !isBool(to_type))
-                return arguments.front().column;
-
            /// Special case when we convert UInt8 column to Bool column.
            /// both columns have type UInt8, but we shouldn't use identity wrapper,
            /// because Bool column can contain only 0 and 1.
@ -3506,15 +3506,19 @@ private:
    /// 'requested_result_is_nullable' is true if CAST to Nullable type is requested.
    WrapperType prepareImpl(const DataTypePtr & from_type, const DataTypePtr & to_type, bool requested_result_is_nullable) const
    {
-        bool convert_to_ipv6 = to_type->getCustomName() && to_type->getCustomName()->getName() == "IPv6";
+        if (isUInt8(from_type) && isBool(to_type))
+            return createUInt8ToBoolWrapper(from_type, to_type);

-        if (from_type->equals(*to_type) && !convert_to_ipv6)
-        {
-            if (isUInt8(from_type))
-                return createUInt8ToUInt8Wrapper(from_type, to_type);
+        /// We can cast IPv6 into IPv6, IPv4 into IPv4, but we should not allow to cast FixedString(16) into IPv6 as part of identity cast
+        bool safe_convert_custom_types = true;

+        if (const auto * to_type_custom_name = to_type->getCustomName())
+            safe_convert_custom_types = from_type->getCustomName() && from_type->getCustomName()->getName() == to_type_custom_name->getName();
+        else if (const auto * from_type_custom_name = from_type->getCustomName())
+            safe_convert_custom_types = to_type->getCustomName() && from_type_custom_name->getName() == to_type->getCustomName()->getName();
+
+        if (from_type->equals(*to_type) && safe_convert_custom_types)
            return createIdentityWrapper(from_type);
-        }
        else if (WhichDataType(from_type).isNothing())
            return createNothingWrapper(to_type.get());

--- a/src/Functions/FunctionsTimeWindow.cpp
+++ b/src/Functions/FunctionsTimeWindow.cpp
@ -20,6 +20,7 @@ namespace ErrorCodes
    extern const int ILLEGAL_COLUMN;
    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int SYNTAX_ERROR;
 }

 namespace
@ -167,6 +168,13 @@ struct TimeWindowImpl<TUMBLE>

        switch (std::get<0>(interval))
        {
+                //TODO: add proper support for fractional seconds
+//            case IntervalKind::Nanosecond:
+//                return executeTumble<UInt32, IntervalKind::Nanosecond>(*time_column_vec, std::get<1>(interval), time_zone);
+//            case IntervalKind::Microsecond:
+//                return executeTumble<UInt32, IntervalKind::Microsecond>(*time_column_vec, std::get<1>(interval), time_zone);
+//            case IntervalKind::Millisecond:
+//                return executeTumble<UInt32, IntervalKind::Millisecond>(*time_column_vec, std::get<1>(interval), time_zone);
            case IntervalKind::Second:
                return executeTumble<UInt32, IntervalKind::Second>(*time_column_vec, std::get<1>(interval), time_zone);
            case IntervalKind::Minute:
@ -183,6 +191,8 @@ struct TimeWindowImpl<TUMBLE>
                return executeTumble<UInt16, IntervalKind::Quarter>(*time_column_vec, std::get<1>(interval), time_zone);
            case IntervalKind::Year:
                return executeTumble<UInt16, IntervalKind::Year>(*time_column_vec, std::get<1>(interval), time_zone);
+            default:
+                throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
        }
        __builtin_unreachable();
    }
@ -350,6 +360,16 @@ struct TimeWindowImpl<HOP>

        switch (std::get<0>(window_interval))
        {
+                //TODO: add proper support for fractional seconds
+//            case IntervalKind::Nanosecond:
+//                return executeHop<UInt32, IntervalKind::Nanosecond>(
+//                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
+//            case IntervalKind::Microsecond:
+//                return executeHop<UInt32, IntervalKind::Microsecond>(
+//                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
+//            case IntervalKind::Millisecond:
+//                return executeHop<UInt32, IntervalKind::Millisecond>(
+//                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
            case IntervalKind::Second:
                return executeHop<UInt32, IntervalKind::Second>(
                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
@ -374,6 +394,8 @@ struct TimeWindowImpl<HOP>
            case IntervalKind::Year:
                return executeHop<UInt16, IntervalKind::Year>(
                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
+            default:
+                throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
        }
        __builtin_unreachable();
    }
@ -487,6 +509,16 @@ struct TimeWindowImpl<WINDOW_ID>

        switch (std::get<0>(window_interval))
        {
+                //TODO: add proper support for fractional seconds
+//            case IntervalKind::Nanosecond:
+//                return executeHopSlice<UInt32, IntervalKind::Nanosecond>(
+//                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
+//            case IntervalKind::Microsecond:
+//                return executeHopSlice<UInt32, IntervalKind::Microsecond>(
+//                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
+//            case IntervalKind::Millisecond:
+//                return executeHopSlice<UInt32, IntervalKind::Millisecond>(
+//                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
            case IntervalKind::Second:
                return executeHopSlice<UInt32, IntervalKind::Second>(
                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
@ -511,6 +543,8 @@ struct TimeWindowImpl<WINDOW_ID>
            case IntervalKind::Year:
                return executeHopSlice<UInt16, IntervalKind::Year>(
                    *time_column_vec, std::get<1>(hop_interval), std::get<1>(window_interval), time_zone);
+            default:
+                throw Exception("Fraction seconds are unsupported by windows yet", ErrorCodes::SYNTAX_ERROR);
        }
        __builtin_unreachable();
    }
--- a/src/Functions/FunctionsTimeWindow.h
+++ b/src/Functions/FunctionsTimeWindow.h
@ -80,7 +80,32 @@ struct ToStartOfTransform;
    TRANSFORM_TIME(Hour)
    TRANSFORM_TIME(Minute)
    TRANSFORM_TIME(Second)
-#undef TRANSFORM_DATE
+#undef TRANSFORM_TIME
+
+#define TRANSFORM_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
+template<> \
+    struct ToStartOfTransform<IntervalKind::INTERVAL_KIND> \
+    { \
+        static Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
+        { \
+            if (scale <= DEF_SCALE) \
+            { \
+                auto val = t * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
+                if (delta == 1) \
+                    return val; \
+                else \
+                    return val - (val % delta); \
+            } \
+            else \
+            { \
+                return t - (t % (delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE))) ; \
+            } \
+        } \
+    };
+    TRANSFORM_SUBSECONDS(Millisecond, 3)
+    TRANSFORM_SUBSECONDS(Microsecond, 6)
+    TRANSFORM_SUBSECONDS(Nanosecond, 9)
+#undef TRANSFORM_SUBSECONDS

    template <IntervalKind::Kind unit>
    struct AddTime;
@ -117,6 +142,25 @@ struct ToStartOfTransform;
    ADD_TIME(Second, 1)
 #undef ADD_TIME

+#define ADD_SUBSECONDS(INTERVAL_KIND, DEF_SCALE) \
+template <> \
+    struct AddTime<IntervalKind::INTERVAL_KIND> \
+    { \
+        static inline NO_SANITIZE_UNDEFINED Int64 execute(Int64 t, UInt64 delta, const UInt32 scale) \
+        { \
+            if (scale < DEF_SCALE) \
+            { \
+                return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(DEF_SCALE - scale); \
+            } \
+            else \
+                return t + delta * DecimalUtils::scaleMultiplier<DateTime64>(scale - DEF_SCALE); \
+        } \
+    };
+    ADD_SUBSECONDS(Millisecond, 3)
+    ADD_SUBSECONDS(Microsecond, 6)
+    ADD_SUBSECONDS(Nanosecond, 9)
+#undef ADD_SUBSECONDS
+
 template <TimeWindowFunctionName type>
 struct TimeWindowImpl
 {
--- a/src/Functions/SubtractSubSeconds.cpp
+++ b/src/Functions/SubtractSubSeconds.cpp
@ -0,0 +1,28 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionDateOrDateTimeAddInterval.h>
+
+
+namespace DB
+{
+
+using FunctionSubtractNanoseconds = FunctionDateOrDateTimeAddInterval<SubtractNanosecondsImpl>;
+void registerFunctionSubtractNanoseconds(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionSubtractNanoseconds>();
+};
+
+using FunctionSubtractMicroseconds = FunctionDateOrDateTimeAddInterval<SubtractMicrosecondsImpl>;
+void registerFunctionSubtractMicroseconds(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionSubtractMicroseconds>();
+};
+
+using FunctionSubtractMilliseconds = FunctionDateOrDateTimeAddInterval<SubtractMillisecondsImpl>;
+void registerFunctionSubtractMilliseconds(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionSubtractMilliseconds>();
+};
+
+}
+
+
--- a/src/Functions/TransformDateTime64.h
+++ b/src/Functions/TransformDateTime64.h
@ -13,7 +13,7 @@ namespace DB
 * * DateTime64 value and scale factor (2)
 * * DateTime64 broken down to components, result of execute is then re-assembled back into DateTime64 value (3)
 *
- * Suitable Transfotm-types are commonly used in Date/DateTime manipulation functions,
+ * Suitable Transform-types are commonly used in Date/DateTime manipulation functions,
 * and should implement static (or const) function with following signatures:
 * 1:
 *     R execute(Int64 whole_value, ... )
--- a/src/Functions/addSubSeconds.cpp
+++ b/src/Functions/addSubSeconds.cpp
@ -0,0 +1,28 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionDateOrDateTimeAddInterval.h>
+
+
+namespace DB
+{
+
+using FunctionAddNanoseconds = FunctionDateOrDateTimeAddInterval<AddNanosecondsImpl>;
+void registerFunctionAddNanoseconds(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionAddNanoseconds>();
+};
+
+using FunctionAddMicroseconds = FunctionDateOrDateTimeAddInterval<AddMicrosecondsImpl>;
+void registerFunctionAddMicroseconds(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionAddMicroseconds>();
+};
+
+using FunctionAddMilliseconds = FunctionDateOrDateTimeAddInterval<AddMillisecondsImpl>;
+void registerFunctionAddMilliseconds(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionAddMilliseconds>();
+};
+
+}
+
+
--- a/src/Functions/caseWithExpression.cpp
+++ b/src/Functions/caseWithExpression.cpp
@ -43,6 +43,9 @@ public:
        for (size_t i = 2; i < args.size() - 1; i += 2)
            dst_array_types.push_back(args[i]);

+        // Type of the ELSE branch
+        dst_array_types.push_back(args.back());
+
        return getLeastSupertype(dst_array_types);
    }

--- a/src/Functions/kostikConsistentHash.cpp
+++ b/src/Functions/kostikConsistentHash.cpp
@ -7,9 +7,9 @@ namespace DB
 {

 /// An O(1) time and space consistent hash algorithm by Konstantin Oblakov
-struct YandexConsistentHashImpl
+struct KostikConsistentHashImpl
 {
-    static constexpr auto name = "yandexConsistentHash";
+    static constexpr auto name = "kostikConsistentHash";

    using HashType = UInt64;
    /// Actually it supports UInt64, but it is efficient only if n <= 32768
@ -23,12 +23,12 @@ struct YandexConsistentHashImpl
    }
 };

-using FunctionYandexConsistentHash = FunctionConsistentHashImpl<YandexConsistentHashImpl>;
+using FunctionKostikConsistentHash = FunctionConsistentHashImpl<KostikConsistentHashImpl>;

-void registerFunctionYandexConsistentHash(FunctionFactory & factory)
+void registerFunctionKostikConsistentHash(FunctionFactory & factory)
 {
-    factory.registerFunction<FunctionYandexConsistentHash>();
+    factory.registerFunction<FunctionKostikConsistentHash>();
+    factory.registerAlias("yandexConsistentHash", "kostikConsistentHash");
 }

 }
-
--- a/src/Functions/registerFunctionsConsistentHashing.cpp
+++ b/src/Functions/registerFunctionsConsistentHashing.cpp
@ -2,12 +2,12 @@ namespace DB
 {
 class FunctionFactory;

-void registerFunctionYandexConsistentHash(FunctionFactory & factory);
+void registerFunctionKostikConsistentHash(FunctionFactory & factory);
 void registerFunctionJumpConsistentHash(FunctionFactory & factory);

 void registerFunctionsConsistentHashing(FunctionFactory & factory)
 {
-    registerFunctionYandexConsistentHash(factory);
+    registerFunctionKostikConsistentHash(factory);
    registerFunctionJumpConsistentHash(factory);
 }

--- a/src/Functions/registerFunctionsDateTime.cpp
+++ b/src/Functions/registerFunctionsDateTime.cpp
@ -11,6 +11,9 @@ void registerFunctionToDayOfWeek(FunctionFactory &);
 void registerFunctionToDayOfYear(FunctionFactory &);
 void registerFunctionToHour(FunctionFactory &);
 void registerFunctionToMinute(FunctionFactory &);
+void registerFunctionToStartOfNanosecond(FunctionFactory &);
+void registerFunctionToStartOfMicrosecond(FunctionFactory &);
+void registerFunctionToStartOfMillisecond(FunctionFactory &);
 void registerFunctionToStartOfSecond(FunctionFactory &);
 void registerFunctionToSecond(FunctionFactory &);
 void registerFunctionToStartOfDay(FunctionFactory &);
@ -47,6 +50,9 @@ void registerFunctionTimeSlots(FunctionFactory &);
 void registerFunctionToYYYYMM(FunctionFactory &);
 void registerFunctionToYYYYMMDD(FunctionFactory &);
 void registerFunctionToYYYYMMDDhhmmss(FunctionFactory &);
+void registerFunctionAddNanoseconds(FunctionFactory &);
+void registerFunctionAddMicroseconds(FunctionFactory &);
+void registerFunctionAddMilliseconds(FunctionFactory &);
 void registerFunctionAddSeconds(FunctionFactory &);
 void registerFunctionAddMinutes(FunctionFactory &);
 void registerFunctionAddHours(FunctionFactory &);
@ -55,6 +61,9 @@ void registerFunctionAddWeeks(FunctionFactory &);
 void registerFunctionAddMonths(FunctionFactory &);
 void registerFunctionAddQuarters(FunctionFactory &);
 void registerFunctionAddYears(FunctionFactory &);
+void registerFunctionSubtractNanoseconds(FunctionFactory &);
+void registerFunctionSubtractMicroseconds(FunctionFactory &);
+void registerFunctionSubtractMilliseconds(FunctionFactory &);
 void registerFunctionSubtractSeconds(FunctionFactory &);
 void registerFunctionSubtractMinutes(FunctionFactory &);
 void registerFunctionSubtractHours(FunctionFactory &);
@ -93,6 +102,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
    registerFunctionToStartOfMonth(factory);
    registerFunctionToStartOfQuarter(factory);
    registerFunctionToStartOfYear(factory);
+    registerFunctionToStartOfNanosecond(factory);
+    registerFunctionToStartOfMicrosecond(factory);
+    registerFunctionToStartOfMillisecond(factory);
    registerFunctionToStartOfSecond(factory);
    registerFunctionToStartOfMinute(factory);
    registerFunctionToStartOfFiveMinute(factory);
@ -119,6 +131,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
    registerFunctionToYYYYMM(factory);
    registerFunctionToYYYYMMDD(factory);
    registerFunctionToYYYYMMDDhhmmss(factory);
+    registerFunctionAddNanoseconds(factory);
+    registerFunctionAddMicroseconds(factory);
+    registerFunctionAddMilliseconds(factory);
    registerFunctionAddSeconds(factory);
    registerFunctionAddMinutes(factory);
    registerFunctionAddHours(factory);
@ -127,6 +142,9 @@ void registerFunctionsDateTime(FunctionFactory & factory)
    registerFunctionAddMonths(factory);
    registerFunctionAddQuarters(factory);
    registerFunctionAddYears(factory);
+    registerFunctionSubtractNanoseconds(factory);
+    registerFunctionSubtractMicroseconds(factory);
+    registerFunctionSubtractMilliseconds(factory);
    registerFunctionSubtractSeconds(factory);
    registerFunctionSubtractMinutes(factory);
    registerFunctionSubtractHours(factory);
--- a/src/Functions/toStartOfInterval.cpp
+++ b/src/Functions/toStartOfInterval.cpp
@ -33,184 +33,273 @@ namespace
    template <>
    struct Transform<IntervalKind::Year>
    {
-        static constexpr auto name = function_name;
-
-        static UInt16 execute(UInt16 d, UInt64 years, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt16 d, Int64 years, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfYearInterval(DayNum(d), years);
        }

-        static UInt16 execute(Int32 d, UInt64 years, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int32 d, Int64 years, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfYearInterval(ExtendedDayNum(d), years);
        }

-        static UInt16 execute(UInt32 t, UInt64 years, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt32 t, Int64 years, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
        }

-        static UInt16 execute(Int64 t, UInt64 years, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int64 t, Int64 years, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfYearInterval(time_zone.toDayNum(t), years);
+            return time_zone.toStartOfYearInterval(time_zone.toDayNum(t / scale_multiplier), years);
        }
    };

    template <>
    struct Transform<IntervalKind::Quarter>
    {
-        static constexpr auto name = function_name;
-
-        static UInt16 execute(UInt16 d, UInt64 quarters, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt16 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfQuarterInterval(DayNum(d), quarters);
        }

-        static UInt16 execute(Int32 d, UInt64 quarters, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int32 d, Int64 quarters, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfQuarterInterval(ExtendedDayNum(d), quarters);
        }

-        static UInt16 execute(UInt32 t, UInt64 quarters, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt32 t, Int64 quarters, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
        }

-        static UInt16 execute(Int64 t, UInt64 quarters, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int64 t, Int64 quarters, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t), quarters);
+            return time_zone.toStartOfQuarterInterval(time_zone.toDayNum(t / scale_multiplier), quarters);
        }
    };

    template <>
    struct Transform<IntervalKind::Month>
    {
-        static constexpr auto name = function_name;
-
-        static UInt16 execute(UInt16 d, UInt64 months, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt16 d, Int64 months, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfMonthInterval(DayNum(d), months);
        }

-        static UInt16 execute(Int32 d, UInt64 months, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int32 d, Int64 months, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfMonthInterval(ExtendedDayNum(d), months);
        }

-        static UInt16 execute(UInt32 t, UInt64 months, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt32 t, Int64 months, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
        }

-        static UInt16 execute(Int64 t, UInt64 months, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int64 t, Int64 months, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t), months);
+            return time_zone.toStartOfMonthInterval(time_zone.toDayNum(t / scale_multiplier), months);
        }
    };

    template <>
    struct Transform<IntervalKind::Week>
    {
-        static constexpr auto name = function_name;
-
-        static UInt16 execute(UInt16 d, UInt64 weeks, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt16 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfWeekInterval(DayNum(d), weeks);
        }

-        static UInt16 execute(Int32 d, UInt64 weeks, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int32 d, Int64 weeks, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfWeekInterval(ExtendedDayNum(d), weeks);
        }

-        static UInt16 execute(UInt32 t, UInt64 weeks, const DateLUTImpl & time_zone)
+        static UInt16 execute(UInt32 t, Int64 weeks, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
        }

-        static UInt16 execute(Int64 t, UInt64 weeks, const DateLUTImpl & time_zone)
+        static UInt16 execute(Int64 t, Int64 weeks, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t), weeks);
+            return time_zone.toStartOfWeekInterval(time_zone.toDayNum(t / scale_multiplier), weeks);
        }
    };

    template <>
    struct Transform<IntervalKind::Day>
    {
-        static constexpr auto name = function_name;
-
-        static UInt32 execute(UInt16 d, UInt64 days, const DateLUTImpl & time_zone)
+        static UInt32 execute(UInt16 d, Int64 days, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
        }

-        static UInt32 execute(Int32 d, UInt64 days, const DateLUTImpl & time_zone)
+        static UInt32 execute(Int32 d, Int64 days, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfDayInterval(ExtendedDayNum(d), days);
        }

-        static UInt32 execute(UInt32 t, UInt64 days, const DateLUTImpl & time_zone)
+        static UInt32 execute(UInt32 t, Int64 days, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
        }

-        static UInt32 execute(Int64 t, UInt64 days, const DateLUTImpl & time_zone)
+        static Int64 execute(Int64 t, Int64 days, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfDayInterval(time_zone.toDayNum(t), days);
+            return time_zone.toStartOfDayInterval(time_zone.toDayNum(t / scale_multiplier), days);
        }
    };

    template <>
    struct Transform<IntervalKind::Hour>
    {
-        static constexpr auto name = function_name;
+        static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }

-        static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
-        static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
-        static UInt32 execute(UInt32 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
-        static UInt32 execute(Int64 t, UInt64 hours, const DateLUTImpl & time_zone) { return time_zone.toStartOfHourInterval(t, hours); }
+        static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(UInt32 t, Int64 hours, const DateLUTImpl & time_zone, Int64)
+        {
+            return time_zone.toStartOfHourInterval(t, hours);
+        }
+
+        static UInt32 execute(Int64 t, Int64 hours, const DateLUTImpl & time_zone, Int64 scale_multiplier)
+        {
+            return time_zone.toStartOfHourInterval(t / scale_multiplier, hours);
+        }
    };

    template <>
    struct Transform<IntervalKind::Minute>
    {
-        static constexpr auto name = function_name;
+        static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }

-        static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
+        static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }

-        static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
-
-        static UInt32 execute(UInt32 t, UInt64 minutes, const DateLUTImpl & time_zone)
+        static UInt32 execute(UInt32 t, Int64 minutes, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfMinuteInterval(t, minutes);
        }

-        static UInt32 execute(Int64 t, UInt64 minutes, const DateLUTImpl & time_zone)
+        static UInt32 execute(Int64 t, Int64 minutes, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfMinuteInterval(t, minutes);
+            return time_zone.toStartOfMinuteInterval(t / scale_multiplier, minutes);
        }
    };

    template <>
    struct Transform<IntervalKind::Second>
    {
-        static constexpr auto name = function_name;
+        static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }

-        static UInt32 execute(UInt16, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
+        static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }

-        static UInt32 execute(Int32, UInt64, const DateLUTImpl &) { return dateIsNotSupported(function_name); }
-
-        static UInt32 execute(UInt32 t, UInt64 seconds, const DateLUTImpl & time_zone)
+        static UInt32 execute(UInt32 t, Int64 seconds, const DateLUTImpl & time_zone, Int64)
        {
            return time_zone.toStartOfSecondInterval(t, seconds);
        }

-        static Int64 execute(Int64 t, UInt64 seconds, const DateLUTImpl & time_zone)
+        static UInt32 execute(Int64 t, Int64 seconds, const DateLUTImpl & time_zone, Int64 scale_multiplier)
        {
-            return time_zone.toStartOfSecondInterval(t, seconds);
+            return time_zone.toStartOfSecondInterval(t / scale_multiplier, seconds);
        }
    };

+    template <>
+    struct Transform<IntervalKind::Millisecond>
+    {
+        static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
+
+        static Int64 execute(Int64 t, Int64 milliseconds, const DateLUTImpl &, Int64 scale_multiplier)
+        {
+            if (scale_multiplier < 1000)
+            {
+                Int64 t_milliseconds = t * (static_cast<Int64>(1000) / scale_multiplier);
+                if (likely(t >= 0))
+                    return t_milliseconds / milliseconds * milliseconds;
+                else
+                    return ((t_milliseconds + 1) / milliseconds - 1) * milliseconds;
+            }
+            else if (scale_multiplier > 1000)
+            {
+                Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000);
+                if (likely(t >= 0))
+                    return t / milliseconds / scale_diff * milliseconds;
+                else
+                    return ((t + 1) / milliseconds / scale_diff - 1) * milliseconds;
+            }
+            else
+                if (likely(t >= 0))
+                    return t / milliseconds * milliseconds;
+                else
+                    return ((t + 1) / milliseconds - 1) * milliseconds;
+        }
+    };
+
+    template <>
+    struct Transform<IntervalKind::Microsecond>
+    {
+        static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
+
+        static Int64 execute(Int64 t, Int64 microseconds, const DateLUTImpl &, Int64 scale_multiplier)
+        {
+            if (scale_multiplier < 1000000)
+            {
+                Int64 t_microseconds = t * (static_cast<Int64>(1000000) / scale_multiplier);
+                if (likely(t >= 0))
+                    return t_microseconds / microseconds * microseconds;
+                else
+                    return ((t_microseconds + 1) / microseconds - 1) * microseconds;
+            }
+            else if (scale_multiplier > 1000000)
+            {
+                Int64 scale_diff = scale_multiplier / static_cast<Int64>(1000000);
+                if (likely(t >= 0))
+                    return t / microseconds / scale_diff * microseconds;
+                else
+                    return ((t + 1) / microseconds / scale_diff - 1) * microseconds;
+            }
+            else
+                if (likely(t >= 0))
+                    return t / microseconds * microseconds;
+                else
+                    return ((t + 1) / microseconds - 1) * microseconds;
+        }
+    };
+
+    template <>
+    struct Transform<IntervalKind::Nanosecond>
+    {
+        static UInt32 execute(UInt16, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(Int32, Int64, const DateLUTImpl &, Int64) { return dateIsNotSupported(function_name); }
+
+        static UInt32 execute(UInt32, Int64, const DateLUTImpl &, Int64) { return dateTimeIsNotSupported(function_name); }
+
+        static Int64 execute(Int64 t, Int64 nanoseconds, const DateLUTImpl &, Int64 scale_multiplier)
+        {
+            if (scale_multiplier < 1000000000)
+            {
+                Int64 t_nanoseconds = t * (static_cast<Int64>(1000000000) / scale_multiplier);
+                if (likely(t >= 0))
+                    return t_nanoseconds / nanoseconds * nanoseconds;
+                else
+                    return ((t_nanoseconds + 1) / nanoseconds - 1) * nanoseconds;
+            }
+            else
+                if (likely(t >= 0))
+                    return t / nanoseconds * nanoseconds;
+                else
+                    return ((t + 1) / nanoseconds - 1) * nanoseconds;
+        }
+    };

 class FunctionToStartOfInterval : public IFunction
 {
@ -240,6 +329,7 @@ public:

        const DataTypeInterval * interval_type = nullptr;
        bool result_type_is_date = false;
+        bool result_type_is_datetime = false;
        auto check_interval_argument = [&]
        {
            interval_type = checkAndGetDataType<DataTypeInterval>(arguments[1].type.get());
@ -251,6 +341,8 @@ public:
            result_type_is_date = (interval_type->getKind() == IntervalKind::Year)
                || (interval_type->getKind() == IntervalKind::Quarter) || (interval_type->getKind() == IntervalKind::Month)
                || (interval_type->getKind() == IntervalKind::Week);
+            result_type_is_datetime = (interval_type->getKind() == IntervalKind::Day) || (interval_type->getKind() == IntervalKind::Hour)
+                || (interval_type->getKind() == IntervalKind::Minute) || (interval_type->getKind() == IntervalKind::Second);
        };

        auto check_timezone_argument = [&]
@ -263,7 +355,7 @@ public:
            if (first_argument_is_date && result_type_is_date)
                throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                    "The timezone argument of function {} with interval type {} is allowed only when the 1st argument "
-                    "has the type DateTime",
+                    "has the type DateTime or DateTime64",
                        getName(), interval_type->getKind().toString());
        };

@ -288,19 +380,33 @@ public:

        if (result_type_is_date)
            return std::make_shared<DataTypeDate>();
-        else
+        else if (result_type_is_datetime)
            return std::make_shared<DataTypeDateTime>(extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
+        else
+        {
+            auto scale = 0;
+
+            if (interval_type->getKind() == IntervalKind::Nanosecond)
+                scale = 9;
+            else if (interval_type->getKind() == IntervalKind::Microsecond)
+                scale = 6;
+            else if (interval_type->getKind() == IntervalKind::Millisecond)
+                scale = 3;
+
+            return std::make_shared<DataTypeDateTime64>(scale, extractTimeZoneNameFromFunctionArguments(arguments, 2, 0));
+        }
+
    }

    bool useDefaultImplementationForConstants() const override { return true; }
    ColumnNumbers getArgumentsThatAreAlwaysConstant() const override { return {1, 2}; }

-    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t /* input_rows_count */) const override
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t /* input_rows_count */) const override
    {
        const auto & time_column = arguments[0];
        const auto & interval_column = arguments[1];
        const auto & time_zone = extractTimeZoneFromFunctionArguments(arguments, 2, 0);
-        auto result_column = dispatchForColumns(time_column, interval_column, time_zone);
+        auto result_column = dispatchForColumns(time_column, interval_column, result_type, time_zone);
        return result_column;
    }

@ -316,33 +422,36 @@ public:

 private:
    ColumnPtr dispatchForColumns(
-        const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const
+        const ColumnWithTypeAndName & time_column, const ColumnWithTypeAndName & interval_column, const DataTypePtr & result_type, const DateLUTImpl & time_zone) const
    {
        const auto & from_datatype = *time_column.type.get();
        const auto which_type = WhichDataType(from_datatype);
+
+        if (which_type.isDateTime64())
+        {
+            const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
+            auto scale = assert_cast<const DataTypeDateTime64 &>(from_datatype).getScale();
+
+            if (time_column_vec)
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone, scale);
+        }
        if (which_type.isDateTime())
        {
            const auto * time_column_vec = checkAndGetColumn<ColumnUInt32>(time_column.column.get());
            if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, time_zone);
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
        }
        if (which_type.isDate())
        {
            const auto * time_column_vec = checkAndGetColumn<ColumnUInt16>(time_column.column.get());
            if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, time_zone);
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDate&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
        }
        if (which_type.isDate32())
        {
            const auto * time_column_vec = checkAndGetColumn<ColumnInt32>(time_column.column.get());
            if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, time_zone);
-        }
-        if (which_type.isDateTime64())
-        {
-            const auto * time_column_vec = checkAndGetColumn<DataTypeDateTime64::ColumnType>(time_column.column.get());
-            if (time_column_vec)
-                return dispatchForIntervalColumn(assert_cast<const DataTypeDateTime64&>(from_datatype), *time_column_vec, interval_column, time_zone);
+                return dispatchForIntervalColumn(assert_cast<const DataTypeDate32&>(from_datatype), *time_column_vec, interval_column, result_type, time_zone);
        }
        throw Exception(
            "Illegal column for first argument of function " + getName() + ". Must contain dates or dates with time",
@ -351,7 +460,8 @@ private:

    template <typename ColumnType, typename FromDataType>
    ColumnPtr dispatchForIntervalColumn(
-        const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column, const DateLUTImpl & time_zone) const
+        const FromDataType & from, const ColumnType & time_column, const ColumnWithTypeAndName & interval_column,
+        const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale = 1) const
    {
        const auto * interval_type = checkAndGetDataType<DataTypeInterval>(interval_column.type.get());
        if (!interval_type)
@ -368,49 +478,52 @@ private:

        switch (interval_type->getKind())
        {
+            case IntervalKind::Nanosecond:
+                return execute<FromDataType, DataTypeDateTime64, IntervalKind::Nanosecond>(from, time_column, num_units, result_type, time_zone, scale);
+            case IntervalKind::Microsecond:
+                return execute<FromDataType, DataTypeDateTime64, IntervalKind::Microsecond>(from, time_column, num_units, result_type, time_zone, scale);
+            case IntervalKind::Millisecond:
+                return execute<FromDataType, DataTypeDateTime64, IntervalKind::Millisecond>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Second:
-                return execute<FromDataType, UInt32, IntervalKind::Second>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDateTime, IntervalKind::Second>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Minute:
-                return execute<FromDataType, UInt32, IntervalKind::Minute>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDateTime, IntervalKind::Minute>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Hour:
-                return execute<FromDataType, UInt32, IntervalKind::Hour>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDateTime, IntervalKind::Hour>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Day:
-                return execute<FromDataType, UInt32, IntervalKind::Day>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDateTime, IntervalKind::Day>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Week:
-                return execute<FromDataType, UInt16, IntervalKind::Week>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDate, IntervalKind::Week>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Month:
-                return execute<FromDataType, UInt16, IntervalKind::Month>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDate, IntervalKind::Month>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Quarter:
-                return execute<FromDataType, UInt16, IntervalKind::Quarter>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDate, IntervalKind::Quarter>(from, time_column, num_units, result_type, time_zone, scale);
            case IntervalKind::Year:
-                return execute<FromDataType, UInt16, IntervalKind::Year>(from, time_column, num_units, time_zone);
+                return execute<FromDataType, DataTypeDate, IntervalKind::Year>(from, time_column, num_units, result_type, time_zone, scale);
        }

        __builtin_unreachable();
    }

-
-    template <typename FromDataType, typename ToType, IntervalKind::Kind unit, typename ColumnType>
-    ColumnPtr execute(const FromDataType & from_datatype, const ColumnType & time_column, UInt64 num_units, const DateLUTImpl & time_zone) const
+    template <typename FromDataType, typename ToDataType, IntervalKind::Kind unit, typename ColumnType>
+    ColumnPtr execute(const FromDataType &, const ColumnType & time_column_type, Int64 num_units, const DataTypePtr & result_type, const DateLUTImpl & time_zone, const UInt16 scale) const
    {
-        const auto & time_data = time_column.getData();
-        size_t size = time_column.size();
-        auto result = ColumnVector<ToType>::create();
-        auto & result_data = result->getData();
+        using ToColumnType = typename ToDataType::ColumnType;
+
+        const auto & time_data = time_column_type.getData();
+        size_t size = time_data.size();
+
+        auto result_col = result_type->createColumn();
+        auto *col_to = assert_cast<ToColumnType *>(result_col.get());
+        auto & result_data = col_to->getData();
        result_data.resize(size);

-        if constexpr (std::is_same_v<FromDataType, DataTypeDateTime64>)
-        {
-            const auto transform = TransformDateTime64<Transform<unit>>{from_datatype.getScale()};
+        Int64 scale_multiplier = DecimalUtils::scaleMultiplier<DateTime64>(scale);
+
        for (size_t i = 0; i != size; ++i)
-                result_data[i] = transform.execute(time_data[i], num_units, time_zone);
-        }
-        else
-        {
-            for (size_t i = 0; i != size; ++i)
-                result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone);
-        }
-        return result;
+            result_data[i] = Transform<unit>::execute(time_data[i], num_units, time_zone, scale_multiplier);
+
+        return result_col;
    }
 };

--- a/src/Functions/toStartOfSubsecond.cpp
+++ b/src/Functions/toStartOfSubsecond.cpp
@ -0,0 +1,30 @@
+#include <Functions/FunctionFactory.h>
+#include <Functions/DateTimeTransforms.h>
+#include <Functions/FunctionDateOrDateTimeToSomething.h>
+
+
+namespace DB
+{
+
+using FunctionToStartOfMillisecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMillisecondImpl>;
+
+void registerFunctionToStartOfMillisecond(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionToStartOfMillisecond>();
+}
+
+using FunctionToStartOfMicrosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfMicrosecondImpl>;
+
+void registerFunctionToStartOfMicrosecond(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionToStartOfMicrosecond>();
+}
+
+using FunctionToStartOfNanosecond = FunctionDateOrDateTimeToSomething<DataTypeDateTime64, ToStartOfNanosecondImpl>;
+
+void registerFunctionToStartOfNanosecond(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionToStartOfNanosecond>();
+}
+
+}
--- a/src/IO/IOThreadPool.cpp
+++ b/src/IO/IOThreadPool.cpp
@ -0,0 +1,34 @@
+#include <IO/IOThreadPool.h>
+#include "Core/Field.h"
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+std::unique_ptr<ThreadPool> IOThreadPool::instance;
+
+void IOThreadPool::initialize(size_t max_threads, size_t max_free_threads, size_t queue_size)
+{
+    if (instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is initialized twice");
+    }
+
+    instance = std::make_unique<ThreadPool>(max_threads, max_free_threads, queue_size, false /*shutdown_on_exception*/);
+}
+
+ThreadPool & IOThreadPool::get()
+{
+    if (!instance)
+    {
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "The IO thread pool is not initialized");
+    }
+
+    return *instance;
+}
+
+}
--- a/src/IO/IOThreadPool.h
+++ b/src/IO/IOThreadPool.h
@ -0,0 +1,20 @@
+#pragma once
+
+#include <Common/ThreadPool.h>
+
+namespace DB
+{
+
+/*
+ * ThreadPool used for the IO.
+ */
+class IOThreadPool
+{
+    static std::unique_ptr<ThreadPool> instance;
+
+public:
+    static void initialize(size_t max_threads, size_t max_free_threads, size_t queue_size);
+    static ThreadPool & get();
+};
+
+}
--- a/src/IO/ParallelReadBuffer.cpp
+++ b/src/IO/ParallelReadBuffer.cpp
@ -0,0 +1,290 @@
+#include <IO/ParallelReadBuffer.h>
+#include <base/logger_useful.h>
+#include <Poco/Logger.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int CANNOT_SEEK_THROUGH_FILE;
+    extern const int SEEK_POSITION_OUT_OF_BOUND;
+
+}
+
+ParallelReadBuffer::ParallelReadBuffer(
+    std::unique_ptr<ReadBufferFactory> reader_factory_,
+    ThreadPool * pool_,
+    size_t max_working_readers_,
+    WorkerSetup worker_setup_,
+    WorkerCleanup worker_cleanup_)
+    : SeekableReadBufferWithSize(nullptr, 0)
+    , pool(pool_)
+    , max_working_readers(max_working_readers_)
+    , reader_factory(std::move(reader_factory_))
+    , worker_setup(std::move(worker_setup_))
+    , worker_cleanup(std::move(worker_cleanup_))
+{
+    std::unique_lock<std::mutex> lock{mutex};
+    addReaders(lock);
+}
+
+bool ParallelReadBuffer::addReaderToPool(std::unique_lock<std::mutex> & /*buffer_lock*/)
+{
+    auto reader = reader_factory->getReader();
+    if (!reader)
+    {
+        return false;
+    }
+
+    auto worker = read_workers.emplace_back(std::make_shared<ReadWorker>(std::move(reader)));
+
+    pool->scheduleOrThrow(
+        [&, this, worker = std::move(worker)]() mutable
+        {
+            ThreadStatus thread_status;
+
+            {
+                std::lock_guard lock{mutex};
+                ++active_working_reader;
+            }
+
+            SCOPE_EXIT({
+                worker_cleanup(thread_status);
+
+                std::lock_guard lock{mutex};
+                --active_working_reader;
+                if (active_working_reader == 0)
+                {
+                    readers_done.notify_all();
+                }
+            });
+            worker_setup(thread_status);
+
+            readerThreadFunction(std::move(worker));
+        });
+    return true;
+}
+
+void ParallelReadBuffer::addReaders(std::unique_lock<std::mutex> & buffer_lock)
+{
+    while (read_workers.size() < max_working_readers && addReaderToPool(buffer_lock))
+        ;
+}
+
+off_t ParallelReadBuffer::seek(off_t offset, int whence)
+{
+    if (whence != SEEK_SET)
+        throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+
+    if (offset < 0)
+        throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
+
+    if (!working_buffer.empty() && static_cast<size_t>(offset) >= current_position - working_buffer.size() && offset < current_position)
+    {
+        pos = working_buffer.end() - (current_position - offset);
+        assert(pos >= working_buffer.begin());
+        assert(pos <= working_buffer.end());
+
+        return offset;
+    }
+
+    std::unique_lock lock{mutex};
+    const auto offset_is_in_range
+        = [&](const auto & range) { return static_cast<size_t>(offset) >= range.left && static_cast<size_t>(offset) <= *range.right; };
+
+    while (!read_workers.empty() && (offset < current_position || !offset_is_in_range(read_workers.front()->range)))
+    {
+        read_workers.front()->cancel = true;
+        read_workers.pop_front();
+    }
+
+    if (!read_workers.empty())
+    {
+        auto & front_worker = read_workers.front();
+        auto & segments = front_worker->segments;
+        current_position = front_worker->range.left;
+        while (true)
+        {
+            next_condvar.wait(lock, [&] { return emergency_stop || !segments.empty(); });
+
+            if (emergency_stop)
+                handleEmergencyStop();
+
+            auto next_segment = front_worker->nextSegment();
+            if (static_cast<size_t>(offset) < current_position + next_segment.size())
+            {
+                current_segment = std::move(next_segment);
+                working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
+                current_position += current_segment.size();
+                pos = working_buffer.end() - (current_position - offset);
+                addReaders(lock);
+                return offset;
+            }
+
+            current_position += next_segment.size();
+        }
+    }
+
+    lock.unlock();
+    finishAndWait();
+
+    reader_factory->seek(offset, whence);
+    all_completed = false;
+    read_workers.clear();
+
+    current_position = offset;
+    resetWorkingBuffer();
+
+    emergency_stop = false;
+
+    lock.lock();
+    addReaders(lock);
+    return offset;
+}
+
+std::optional<size_t> ParallelReadBuffer::getTotalSize()
+{
+    std::lock_guard lock{mutex};
+    return reader_factory->getTotalSize();
+}
+
+off_t ParallelReadBuffer::getPosition()
+{
+    return current_position - available();
+}
+
+bool ParallelReadBuffer::currentWorkerReady() const
+{
+    assert(!read_workers.empty());
+    return read_workers.front()->finished || !read_workers.front()->segments.empty();
+}
+
+bool ParallelReadBuffer::currentWorkerCompleted() const
+{
+    assert(!read_workers.empty());
+    return read_workers.front()->finished && read_workers.front()->segments.empty();
+}
+
+void ParallelReadBuffer::handleEmergencyStop()
+{
+    // this can only be called from the main thread when there is an exception
+    assert(background_exception);
+    if (background_exception)
+        std::rethrow_exception(background_exception);
+}
+
+bool ParallelReadBuffer::nextImpl()
+{
+    if (all_completed)
+        return false;
+
+    while (true)
+    {
+        std::unique_lock lock(mutex);
+        next_condvar.wait(
+            lock,
+            [this]()
+            {
+                /// Check if no more readers left or current reader can be processed
+                return emergency_stop || currentWorkerReady();
+            });
+
+        bool worker_removed = false;
+        /// Remove completed units
+        while (!read_workers.empty() && currentWorkerCompleted() && !emergency_stop)
+        {
+            read_workers.pop_front();
+            worker_removed = true;
+        }
+
+        if (emergency_stop)
+            handleEmergencyStop();
+
+        if (worker_removed)
+            addReaders(lock);
+
+        /// All readers processed, stop
+        if (read_workers.empty())
+        {
+            all_completed = true;
+            return false;
+        }
+
+        auto & front_worker = read_workers.front();
+        /// Read data from first segment of the first reader
+        if (!front_worker->segments.empty())
+        {
+            current_segment = front_worker->nextSegment();
+            if (currentWorkerCompleted())
+            {
+                read_workers.pop_front();
+                all_completed = !addReaderToPool(lock) && read_workers.empty();
+            }
+            break;
+        }
+    }
+    working_buffer = internal_buffer = Buffer(current_segment.data(), current_segment.data() + current_segment.size());
+    current_position += working_buffer.size();
+    return true;
+}
+
+void ParallelReadBuffer::readerThreadFunction(ReadWorkerPtr read_worker)
+{
+    try
+    {
+        while (!emergency_stop && !read_worker->cancel)
+        {
+            if (!read_worker->reader->next())
+                throw Exception("Failed to read all the data from the reader", ErrorCodes::LOGICAL_ERROR);
+
+            if (emergency_stop || read_worker->cancel)
+                break;
+
+            Buffer buffer = read_worker->reader->buffer();
+            size_t bytes_to_copy = std::min(buffer.size(), read_worker->bytes_left);
+            Segment new_segment(bytes_to_copy, &arena);
+            memcpy(new_segment.data(), buffer.begin(), bytes_to_copy);
+            read_worker->reader->ignore(bytes_to_copy);
+            read_worker->bytes_left -= bytes_to_copy;
+            {
+                /// New data ready to be read
+                std::lock_guard lock(mutex);
+                read_worker->segments.emplace_back(std::move(new_segment));
+                read_worker->finished = read_worker->bytes_left == 0;
+                next_condvar.notify_all();
+            }
+
+            if (read_worker->finished)
+            {
+                break;
+            }
+        }
+    }
+    catch (...)
+    {
+        onBackgroundException();
+    }
+}
+
+void ParallelReadBuffer::onBackgroundException()
+{
+    std::lock_guard lock(mutex);
+    if (!background_exception)
+    {
+        background_exception = std::current_exception();
+    }
+    emergency_stop = true;
+    next_condvar.notify_all();
+}
+
+void ParallelReadBuffer::finishAndWait()
+{
+    emergency_stop = true;
+
+    std::unique_lock lock{mutex};
+    readers_done.wait(lock, [&] { return active_working_reader == 0; });
+}
+
+}
--- a/src/IO/ParallelReadBuffer.h
+++ b/src/IO/ParallelReadBuffer.h
@ -0,0 +1,174 @@
+#pragma once
+
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+#include <IO/SeekableReadBuffer.h>
+#include <Common/ArenaWithFreeLists.h>
+#include <Common/ThreadPool.h>
+
+namespace DB
+{
+
+/**
+ * Reads from multiple ReadBuffers in parallel.
+ * Preserves order of readers obtained from ReadBufferFactory.
+ *
+ * It consumes multiple readers and yields data from them in order as it passed.
+ * Each working reader save segments of data to internal queue.
+ *
+ * ParallelReadBuffer in nextImpl method take first available segment from first reader in deque and fed it to user.
+ * When first reader finish reading, they will be removed from worker deque and data from next reader consumed.
+ *
+ * Number of working readers limited by max_working_readers.
+ */
+class ParallelReadBuffer : public SeekableReadBufferWithSize
+{
+private:
+    /// Blocks until data occurred in the first reader or this reader indicate finishing
+    /// Finished readers removed from queue and data from next readers processed
+    bool nextImpl() override;
+
+    class Segment : private boost::noncopyable
+    {
+    public:
+        Segment(size_t size_, SynchronizedArenaWithFreeLists * arena_) : arena(arena_), m_data(arena->alloc(size_)), m_size(size_) { }
+
+        Segment() = default;
+
+        Segment(Segment && other) noexcept : arena(other.arena)
+        {
+            std::swap(m_data, other.m_data);
+            std::swap(m_size, other.m_size);
+        }
+
+        Segment & operator=(Segment && other) noexcept
+        {
+            arena = other.arena;
+            std::swap(m_data, other.m_data);
+            std::swap(m_size, other.m_size);
+            return *this;
+        }
+
+        ~Segment()
+        {
+            if (m_data)
+            {
+                arena->free(m_data, m_size);
+            }
+        }
+
+        auto data() const noexcept { return m_data; }
+        auto size() const noexcept { return m_size; }
+
+    private:
+        SynchronizedArenaWithFreeLists * arena{nullptr};
+        char * m_data{nullptr};
+        size_t m_size{0};
+    };
+
+public:
+    class ReadBufferFactory
+    {
+    public:
+        virtual SeekableReadBufferPtr getReader() = 0;
+        virtual ~ReadBufferFactory() = default;
+        virtual off_t seek(off_t off, int whence) = 0;
+        virtual std::optional<size_t> getTotalSize() = 0;
+    };
+
+    using WorkerSetup = std::function<void(ThreadStatus &)>;
+    using WorkerCleanup = std::function<void(ThreadStatus &)>;
+    explicit ParallelReadBuffer(
+        std::unique_ptr<ReadBufferFactory> reader_factory_,
+        ThreadPool * pool,
+        size_t max_working_readers,
+        WorkerSetup worker_setup = {},
+        WorkerCleanup worker_cleanup = {});
+
+    ~ParallelReadBuffer() override { finishAndWait(); }
+
+    off_t seek(off_t off, int whence) override;
+    std::optional<size_t> getTotalSize() override;
+    off_t getPosition() override;
+
+private:
+    /// Reader in progress with a list of read segments
+    struct ReadWorker
+    {
+        explicit ReadWorker(SeekableReadBufferPtr reader_) : reader(std::move(reader_)), range(reader->getRemainingReadRange())
+        {
+            assert(range.right);
+            bytes_left = *range.right - range.left + 1;
+        }
+
+        Segment nextSegment()
+        {
+            assert(!segments.empty());
+            auto next_segment = std::move(segments.front());
+            segments.pop_front();
+            range.left += next_segment.size();
+            return next_segment;
+        }
+
+        SeekableReadBufferPtr reader;
+        std::deque<Segment> segments;
+        bool finished{false};
+        SeekableReadBuffer::Range range;
+        size_t bytes_left{0};
+        std::atomic_bool cancel{false};
+    };
+
+    using ReadWorkerPtr = std::shared_ptr<ReadWorker>;
+
+    /// First worker in deque have new data or processed all available amount
+    bool currentWorkerReady() const;
+    /// First worker in deque processed and flushed all data
+    bool currentWorkerCompleted() const;
+
+    void handleEmergencyStop();
+
+    void addReaders(std::unique_lock<std::mutex> & buffer_lock);
+    bool addReaderToPool(std::unique_lock<std::mutex> & buffer_lock);
+
+    /// Process read_worker, read data and save into internal segments queue
+    void readerThreadFunction(ReadWorkerPtr read_worker);
+
+    void onBackgroundException();
+    void finishAndWait();
+
+    SynchronizedArenaWithFreeLists arena;
+
+    Segment current_segment;
+
+    ThreadPool * pool;
+    size_t max_working_readers;
+    size_t active_working_reader{0};
+    // Triggered when all reader workers are done
+    std::condition_variable readers_done;
+
+    std::unique_ptr<ReadBufferFactory> reader_factory;
+
+    WorkerSetup worker_setup;
+    WorkerCleanup worker_cleanup;
+
+    /**
+     * FIFO queue of readers.
+     * Each worker contains reader itself and downloaded segments.
+     * When reader read all available data it will be removed from
+     * deque and data from next reader will be consumed to user.
+     */
+    std::deque<ReadWorkerPtr> read_workers;
+
+    std::mutex mutex;
+    /// Triggered when new data available
+    std::condition_variable next_condvar;
+
+    std::exception_ptr background_exception = nullptr;
+    std::atomic_bool emergency_stop{false};
+
+    off_t current_position{0};
+
+    bool all_completed{false};
+};
+
+}
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@ -1,32 +1,33 @@
 #pragma once

 #include <functional>
-#include <base/types.h>
-#include <base/sleep.h>
 #include <IO/ConnectionTimeouts.h>
 #include <IO/HTTPCommon.h>
+#include <IO/ParallelReadBuffer.h>
 #include <IO/ReadBuffer.h>
 #include <IO/ReadBufferFromIStream.h>
 #include <IO/ReadHelpers.h>
 #include <IO/ReadSettings.h>
+#include <base/logger_useful.h>
+#include <base/sleep.h>
+#include <base/types.h>
 #include <Poco/Any.h>
 #include <Poco/Net/HTTPBasicCredentials.h>
 #include <Poco/Net/HTTPClientSession.h>
 #include <Poco/Net/HTTPRequest.h>
 #include <Poco/Net/HTTPResponse.h>
 #include <Poco/URI.h>
+#include <Poco/URIStreamFactory.h>
 #include <Poco/Version.h>
 #include <Common/DNSResolver.h>
 #include <Common/RemoteHostFilter.h>
 #include <Common/config.h>
 #include <Common/config_version.h>
-#include <base/logger_useful.h>
-#include <Poco/URIStreamFactory.h>


 namespace ProfileEvents
 {
-    extern const Event ReadBufferSeekCancelConnection;
+extern const Event ReadBufferSeekCancelConnection;
 }

 namespace DB
@ -48,7 +49,7 @@ class UpdatableSessionBase
 {
 protected:
    SessionPtr session;
-    UInt64 redirects { 0 };
+    UInt64 redirects{0};
    Poco::URI initial_uri;
    ConnectionTimeouts timeouts;
    UInt64 max_redirects;
@ -56,19 +57,12 @@ protected:
 public:
    virtual void buildNewSession(const Poco::URI & uri) = 0;

-    explicit UpdatableSessionBase(const Poco::URI uri,
-        const ConnectionTimeouts & timeouts_,
-        UInt64 max_redirects_)
-        : initial_uri { uri }
-        , timeouts { timeouts_ }
-        , max_redirects { max_redirects_ }
+    explicit UpdatableSessionBase(const Poco::URI uri, const ConnectionTimeouts & timeouts_, UInt64 max_redirects_)
+        : initial_uri{uri}, timeouts{timeouts_}, max_redirects{max_redirects_}
    {
    }

-    SessionPtr getSession()
-    {
-        return session;
-    }
+    SessionPtr getSession() { return session; }

    void updateSession(const Poco::URI & uri)
    {
@ -99,7 +93,7 @@ namespace detail
        /// HTTP range, including right bound [begin, end].
        struct Range
        {
-            size_t begin = 0;
+            std::optional<size_t> begin;
            std::optional<size_t> end;
        };

@ -144,10 +138,9 @@ namespace detail
            return read_range.begin || read_range.end || retry_with_range_header;
        }

-        size_t getOffset() const
-        {
-            return read_range.begin + offset_from_begin_pos;
-        }
+        size_t getRangeBegin() const { return read_range.begin.value_or(0); }
+
+        size_t getOffset() const { return getRangeBegin() + offset_from_begin_pos; }

        std::istream * callImpl(Poco::URI uri_, Poco::Net::HTTPResponse & response, const std::string & method_)
        {
@ -161,7 +154,7 @@ namespace detail
            if (out_stream_callback)
                request.setChunkedTransferEncoding(true);

-            for (auto & http_header_entry: http_header_entries)
+            for (auto & http_header_entry : http_header_entries)
                request.set(std::get<0>(http_header_entry), std::get<1>(http_header_entry));

            if (withPartialContent())
@ -207,26 +200,14 @@ namespace detail
        std::optional<size_t> getTotalSize() override
        {
            if (read_range.end)
-                return *read_range.end - read_range.begin;
+                return *read_range.end - getRangeBegin();

            Poco::Net::HTTPResponse response;
            for (size_t i = 0; i < 10; ++i)
            {
                try
                {
-                    call(response, Poco::Net::HTTPRequest::HTTP_HEAD);
-
-                    while (isRedirect(response.getStatus()))
-                    {
-                        Poco::URI uri_redirect(response.get("Location"));
-                        if (remote_host_filter)
-                            remote_host_filter->checkURL(uri_redirect);
-
-                        session->updateSession(uri_redirect);
-
-                        istr = callImpl(uri_redirect, response, method);
-                    }
-
+                    callWithRedirects(response, Poco::Net::HTTPRequest::HTTP_HEAD);
                    break;
                }
                catch (const Poco::Exception & e)
@ -236,7 +217,7 @@ namespace detail
            }

            if (response.hasContentLength())
-                read_range.end = read_range.begin + response.getContentLength();
+                read_range.end = getRangeBegin() + response.getContentLength();

            return read_range.end;
        }
@ -252,6 +233,21 @@ namespace detail

        InitializeError initialization_error = InitializeError::NONE;

+    private:
+        void setupExternalBuffer()
+        {
+            /**
+            * use_external_buffer -- means we read into the buffer which
+            * was passed to us from somewhere else. We do not check whether
+            * previously returned buffer was read or not (no hasPendingData() check is needed),
+            * because this branch means we are prefetching data,
+            * each nextImpl() call we can fill a different buffer.
+            */
+            impl->set(internal_buffer.begin(), internal_buffer.size());
+            assert(working_buffer.begin() != nullptr);
+            assert(!internal_buffer.empty());
+        }
+
    public:
        using NextCallback = std::function<void(size_t)>;
        using OutStreamCallback = std::function<void(std::ostream &)>;
@ -276,7 +272,7 @@ namespace detail
            , session {session_}
            , out_stream_callback {out_stream_callback_}
            , credentials {credentials_}
-            , http_header_entries {http_header_entries_}
+            , http_header_entries {std::move(http_header_entries_)}
            , remote_host_filter {remote_host_filter_}
            , buffer_size {buffer_size_}
            , use_external_buffer {use_external_buffer_}
@ -287,18 +283,21 @@ namespace detail
        {
            if (settings.http_max_tries <= 0 || settings.http_retry_initial_backoff_ms <= 0
                || settings.http_retry_initial_backoff_ms >= settings.http_retry_max_backoff_ms)
-                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
                    "Invalid setting for http backoff, "
                    "must be http_max_tries >= 1 (current is {}) and "
                    "0 < http_retry_initial_backoff_ms < settings.http_retry_max_backoff_ms (now 0 < {} < {})",
-                                settings.http_max_tries, settings.http_retry_initial_backoff_ms, settings.http_retry_max_backoff_ms);
+                    settings.http_max_tries,
+                    settings.http_retry_initial_backoff_ms,
+                    settings.http_retry_max_backoff_ms);

            // Configure User-Agent if it not already set.
            const std::string user_agent = "User-Agent";
-            auto iter = std::find_if(http_header_entries.begin(), http_header_entries.end(), [&user_agent](const HTTPHeaderEntry & entry)
-            {
-                return std::get<0>(entry) == user_agent;
-            });
+            auto iter = std::find_if(
+                http_header_entries.begin(),
+                http_header_entries.end(),
+                [&user_agent](const HTTPHeaderEntry & entry) { return std::get<0>(entry) == user_agent; });

            if (iter == http_header_entries.end())
            {
@ -313,7 +312,36 @@ namespace detail
            }
        }

-        void call(Poco::Net::HTTPResponse & response, const String & method_)
+        static bool isRetriableError(const Poco::Net::HTTPResponse::HTTPStatus http_status) noexcept
+        {
+            constexpr std::array non_retriable_errors{
+                Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST,
+                Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED,
+                Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND,
+                Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN,
+                Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED};
+
+            return std::all_of(
+                non_retriable_errors.begin(), non_retriable_errors.end(), [&](const auto status) { return http_status != status; });
+        }
+
+        void callWithRedirects(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
+        {
+            call(response, method_, throw_on_all_errors);
+
+            while (isRedirect(response.getStatus()))
+            {
+                Poco::URI uri_redirect(response.get("Location"));
+                if (remote_host_filter)
+                    remote_host_filter->checkURL(uri_redirect);
+
+                session->updateSession(uri_redirect);
+
+                istr = callImpl(uri_redirect, response, method);
+            }
+        }
+
+        void call(Poco::Net::HTTPResponse & response, const String & method_, bool throw_on_all_errors = false)
        {
            try
            {
@ -321,18 +349,18 @@ namespace detail
            }
            catch (...)
            {
+                if (throw_on_all_errors)
+                {
+                    throw;
+                }
+
                auto http_status = response.getStatus();

-                if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
-                    && http_skip_not_found_url)
+                if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND && http_skip_not_found_url)
                {
                    initialization_error = InitializeError::SKIP_NOT_FOUND_URL;
                }
-                else if (http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_BAD_REQUEST
-                    || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_UNAUTHORIZED
-                    || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_NOT_FOUND
-                    || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_FORBIDDEN
-                    || http_status == Poco::Net::HTTPResponse::HTTPStatus::HTTP_METHOD_NOT_ALLOWED)
+                else if (!isRetriableError(http_status))
                {
                    initialization_error = InitializeError::NON_RETRIABLE_ERROR;
                    exception = std::current_exception();
@ -372,12 +400,14 @@ namespace detail
            if (withPartialContent() && response.getStatus() != Poco::Net::HTTPResponse::HTTPStatus::HTTP_PARTIAL_CONTENT)
            {
                /// Having `200 OK` instead of `206 Partial Content` is acceptable in case we retried with range.begin == 0.
-                if (read_range.begin)
+                if (read_range.begin && *read_range.begin != 0)
                {
                    if (!exception)
-                        exception = std::make_exception_ptr(
-                            Exception(ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
-                                      "Cannot read with range: [{}, {}]", read_range.begin, read_range.end ? *read_range.end : '-'));
+                        exception = std::make_exception_ptr(Exception(
+                            ErrorCodes::HTTP_RANGE_NOT_SATISFIABLE,
+                            "Cannot read with range: [{}, {}]",
+                            *read_range.begin,
+                            read_range.end ? *read_range.end : '-'));

                    initialization_error = InitializeError::NON_RETRIABLE_ERROR;
                    return;
@ -386,12 +416,12 @@ namespace detail
                {
                    /// We could have range.begin == 0 and range.end != 0 in case of DiskWeb and failing to read with partial content
                    /// will affect only performance, so a warning is enough.
-                    LOG_WARNING(log, "Unable to read with range header: [{}, {}]", read_range.begin, *read_range.end);
+                    LOG_WARNING(log, "Unable to read with range header: [{}, {}]", getRangeBegin(), *read_range.end);
                }
            }

            if (!offset_from_begin_pos && !read_range.end && response.hasContentLength())
-                read_range.end = read_range.begin + response.getContentLength();
+                read_range.end = getRangeBegin() + response.getContentLength();

            try
            {
@ -399,12 +429,7 @@ namespace detail

                if (use_external_buffer)
                {
-                    /**
-                    * See comment 30 lines below.
-                    */
-                    impl->set(internal_buffer.begin(), internal_buffer.size());
-                    assert(working_buffer.begin() != nullptr);
-                    assert(!internal_buffer.empty());
+                    setupExternalBuffer();
                }
            }
            catch (const Poco::Exception & e)
@ -426,23 +451,17 @@ namespace detail
            if (next_callback)
                next_callback(count());

-            if (read_range.end && getOffset() == read_range.end.value())
+            if (read_range.end && getOffset() > read_range.end.value())
+            {
+                assert(getOffset() == read_range.end.value() + 1);
                return false;
+            }

            if (impl)
            {
                if (use_external_buffer)
                {
-                    /**
-                    * use_external_buffer -- means we read into the buffer which
-                    * was passed to us from somewhere else. We do not check whether
-                    * previously returned buffer was read or not (no hasPendingData() check is needed),
-                    * because this branch means we are prefetching data,
-                    * each nextImpl() call we can fill a different buffer.
-                    */
-                    impl->set(internal_buffer.begin(), internal_buffer.size());
-                    assert(working_buffer.begin() != nullptr);
-                    assert(!internal_buffer.empty());
+                    setupExternalBuffer();
                }
                else
                {
@ -477,10 +496,7 @@ namespace detail

                        if (use_external_buffer)
                        {
-                            /// See comment 40 lines above.
-                            impl->set(internal_buffer.begin(), internal_buffer.size());
-                            assert(working_buffer.begin() != nullptr);
-                            assert(!internal_buffer.empty());
+                            setupExternalBuffer();
                        }
                    }

@ -498,13 +514,18 @@ namespace detail
                    if (!can_retry_request)
                        throw;

-                    LOG_ERROR(log,
+                    LOG_ERROR(
+                        log,
                        "HTTP request to `{}` failed at try {}/{} with bytes read: {}/{}. "
                        "Error: {}. (Current backoff wait is {}/{} ms)",
-                              uri.toString(), i + 1, settings.http_max_tries,
-                              getOffset(), read_range.end ? toString(*read_range.end) : "unknown",
+                        uri.toString(),
+                        i + 1,
+                        settings.http_max_tries,
+                        getOffset(),
+                        read_range.end ? toString(*read_range.end) : "unknown",
                        e.displayText(),
-                              milliseconds_to_wait, settings.http_retry_max_backoff_ms);
+                        milliseconds_to_wait,
+                        settings.http_retry_max_backoff_ms);

                    retry_with_range_header = true;
                    exception = std::current_exception();
@ -529,10 +550,7 @@ namespace detail
            return true;
        }

-        off_t getPosition() override
-        {
-            return getOffset() - available();
-        }
+        off_t getPosition() override { return getOffset() - available(); }

        off_t seek(off_t offset_, int whence) override
        {
@ -540,12 +558,11 @@ namespace detail
                throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);

            if (offset_ < 0)
-                throw Exception("Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);
+                throw Exception(
+                    "Seek position is out of bounds. Offset: " + std::to_string(offset_), ErrorCodes::SEEK_POSITION_OUT_OF_BOUND);

            off_t current_offset = getOffset();
-            if (!working_buffer.empty()
-                && size_t(offset_) >= current_offset - working_buffer.size()
-                && offset_ < current_offset)
+            if (!working_buffer.empty() && size_t(offset_) >= current_offset - working_buffer.size() && offset_ < current_offset)
            {
                pos = working_buffer.end() - (current_offset - offset_);
                assert(pos >= working_buffer.begin());
@ -567,7 +584,6 @@ namespace detail

            if (impl)
            {
-
                ProfileEvents::increment(ProfileEvents::ReadBufferSeekCancelConnection);
                impl.reset();
            }
@ -580,6 +596,8 @@ namespace detail
            return offset_;
        }

+        SeekableReadBuffer::Range getRemainingReadRange() const override { return {getOffset(), read_range.end}; }
+
        std::string getResponseCookie(const std::string & name, const std::string & def) const
        {
            for (const auto & cookie : cookies)
@ -599,10 +617,7 @@ namespace detail
            next_callback(count());
        }

-        const std::string & getCompressionMethod() const
-        {
-            return content_encoding;
-        }
+        const std::string & getCompressionMethod() const { return content_encoding; }
    };
 }

@ -611,19 +626,50 @@ class UpdatableSession : public UpdatableSessionBase<HTTPSessionPtr>
    using Parent = UpdatableSessionBase<HTTPSessionPtr>;

 public:
-    UpdatableSession(
-        const Poco::URI uri,
-        const ConnectionTimeouts & timeouts_,
-        const UInt64 max_redirects_)
+    UpdatableSession(const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_)
        : Parent(uri, timeouts_, max_redirects_)
    {
        session = makeHTTPSession(initial_uri, timeouts);
    }

-    void buildNewSession(const Poco::URI & uri) override
+    void buildNewSession(const Poco::URI & uri) override { session = makeHTTPSession(uri, timeouts); }
+};
+
+class RangeGenerator
+{
+public:
+    explicit RangeGenerator(size_t total_size_, size_t range_step_, size_t range_start = 0)
+        : from(range_start), range_step(range_step_), total_size(total_size_)
    {
-        session = makeHTTPSession(uri, timeouts);
    }
+
+    size_t totalRanges() const { return static_cast<size_t>(round(static_cast<float>(total_size - from) / range_step)); }
+
+    using Range = std::pair<size_t, size_t>;
+
+    // return upper exclusive range of values, i.e. [from_range, to_range>
+    std::optional<Range> nextRange()
+    {
+        if (from >= total_size)
+        {
+            return std::nullopt;
+        }
+
+        auto to = from + range_step;
+        if (to >= total_size)
+        {
+            to = total_size;
+        }
+
+        Range range{from, to};
+        from = to;
+        return std::move(range);
+    }
+
+private:
+    size_t from;
+    size_t range_step;
+    size_t total_size;
 };

 class ReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatableSession>>
@ -646,14 +692,117 @@ public:
        bool delay_initialization_ = true,
        bool use_external_buffer_ = false,
        bool skip_not_found_url_ = false)
-        : Parent(std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
-            uri_, credentials_, method_, out_stream_callback_, buffer_size_,
-            settings_, http_header_entries_, read_range_, remote_host_filter_,
-            delay_initialization_, use_external_buffer_, skip_not_found_url_)
+        : Parent(
+            std::make_shared<UpdatableSession>(uri_, timeouts, max_redirects),
+            uri_,
+            credentials_,
+            method_,
+            out_stream_callback_,
+            buffer_size_,
+            settings_,
+            http_header_entries_,
+            read_range_,
+            remote_host_filter_,
+            delay_initialization_,
+            use_external_buffer_,
+            skip_not_found_url_)
    {
    }
 };

+class RangedReadWriteBufferFromHTTPFactory : public ParallelReadBuffer::ReadBufferFactory
+{
+    using OutStreamCallback = ReadWriteBufferFromHTTP::OutStreamCallback;
+
+public:
+    RangedReadWriteBufferFromHTTPFactory(
+        size_t total_object_size_,
+        size_t range_step_,
+        Poco::URI uri_,
+        std::string method_,
+        OutStreamCallback out_stream_callback_,
+        ConnectionTimeouts timeouts_,
+        const Poco::Net::HTTPBasicCredentials & credentials_,
+        UInt64 max_redirects_ = 0,
+        size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
+        ReadSettings settings_ = {},
+        ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries_ = {},
+        const RemoteHostFilter * remote_host_filter_ = nullptr,
+        bool delay_initialization_ = true,
+        bool use_external_buffer_ = false,
+        bool skip_not_found_url_ = false)
+        : range_generator(total_object_size_, range_step_)
+        , total_object_size(total_object_size_)
+        , range_step(range_step_)
+        , uri(uri_)
+        , method(std::move(method_))
+        , out_stream_callback(out_stream_callback_)
+        , timeouts(std::move(timeouts_))
+        , credentials(credentials_)
+        , max_redirects(max_redirects_)
+        , buffer_size(buffer_size_)
+        , settings(std::move(settings_))
+        , http_header_entries(std::move(http_header_entries_))
+        , remote_host_filter(remote_host_filter_)
+        , delay_initialization(delay_initialization_)
+        , use_external_buffer(use_external_buffer_)
+        , skip_not_found_url(skip_not_found_url_)
+    {
+    }
+
+    SeekableReadBufferPtr getReader() override
+    {
+        const auto next_range = range_generator.nextRange();
+        if (!next_range)
+        {
+            return nullptr;
+        }
+
+        return std::make_shared<ReadWriteBufferFromHTTP>(
+            uri,
+            method,
+            out_stream_callback,
+            timeouts,
+            credentials,
+            max_redirects,
+            buffer_size,
+            settings,
+            http_header_entries,
+            // HTTP Range has inclusive bounds, i.e. [from, to]
+            ReadWriteBufferFromHTTP::Range{next_range->first, next_range->second - 1},
+            remote_host_filter,
+            delay_initialization,
+            use_external_buffer,
+            skip_not_found_url);
+    }
+
+    off_t seek(off_t off, [[maybe_unused]] int whence) override
+    {
+        range_generator = RangeGenerator{total_object_size, range_step, static_cast<size_t>(off)};
+        return off;
+    }
+
+    std::optional<size_t> getTotalSize() override { return total_object_size; }
+
+private:
+    RangeGenerator range_generator;
+    size_t total_object_size;
+    size_t range_step;
+    Poco::URI uri;
+    std::string method;
+    OutStreamCallback out_stream_callback;
+    ConnectionTimeouts timeouts;
+    const Poco::Net::HTTPBasicCredentials & credentials;
+    UInt64 max_redirects;
+    size_t buffer_size;
+    ReadSettings settings;
+    ReadWriteBufferFromHTTP::HTTPHeaderEntries http_header_entries;
+    const RemoteHostFilter * remote_host_filter;
+    bool delay_initialization;
+    bool use_external_buffer;
+    bool skip_not_found_url;
+};
+
 class UpdatablePooledSession : public UpdatableSessionBase<PooledHTTPSessionPtr>
 {
    using Parent = UpdatableSessionBase<PooledHTTPSessionPtr>;
@ -662,20 +811,14 @@ private:
    size_t per_endpoint_pool_size;

 public:
-    explicit UpdatablePooledSession(const Poco::URI uri,
-        const ConnectionTimeouts & timeouts_,
-        const UInt64 max_redirects_,
-        size_t per_endpoint_pool_size_)
-        : Parent(uri, timeouts_, max_redirects_)
-        , per_endpoint_pool_size { per_endpoint_pool_size_ }
+    explicit UpdatablePooledSession(
+        const Poco::URI uri, const ConnectionTimeouts & timeouts_, const UInt64 max_redirects_, size_t per_endpoint_pool_size_)
+        : Parent(uri, timeouts_, max_redirects_), per_endpoint_pool_size{per_endpoint_pool_size_}
    {
        session = makePooledHTTPSession(initial_uri, timeouts, per_endpoint_pool_size);
    }

-    void buildNewSession(const Poco::URI & uri) override
-    {
-        session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size);
-    }
+    void buildNewSession(const Poco::URI & uri) override { session = makePooledHTTPSession(uri, timeouts, per_endpoint_pool_size); }
 };

 class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>
@ -683,7 +826,8 @@ class PooledReadWriteBufferFromHTTP : public detail::ReadWriteBufferFromHTTPBase
    using Parent = detail::ReadWriteBufferFromHTTPBase<std::shared_ptr<UpdatablePooledSession>>;

 public:
-    explicit PooledReadWriteBufferFromHTTP(Poco::URI uri_,
+    explicit PooledReadWriteBufferFromHTTP(
+        Poco::URI uri_,
        const std::string & method_ = {},
        OutStreamCallback out_stream_callback_ = {},
        const ConnectionTimeouts & timeouts_ = {},
@ -691,7 +835,8 @@ public:
        size_t buffer_size_ = DBMS_DEFAULT_BUFFER_SIZE,
        const UInt64 max_redirects = 0,
        size_t max_connections_per_endpoint = DEFAULT_COUNT_OF_HTTP_CONNECTIONS_PER_ENDPOINT)
-        : Parent(std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
+        : Parent(
+            std::make_shared<UpdatablePooledSession>(uri_, timeouts_, max_redirects, max_connections_per_endpoint),
            uri_,
            credentials_,
            method_,
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@ -372,8 +372,8 @@ SetPtr makeExplicitSet(
            element_type = low_cardinality_type->getDictionaryType();

    auto set_key = PreparedSetKey::forLiteral(*right_arg, set_element_types);
-    if (prepared_sets.count(set_key))
-        return prepared_sets.at(set_key); /// Already prepared.
+    if (auto it = prepared_sets.find(set_key); it != prepared_sets.end())
+        return it->second; /// Already prepared.

    Block block;
    const auto & right_arg_func = std::dynamic_pointer_cast<ASTFunction>(right_arg);
@ -388,7 +388,7 @@ SetPtr makeExplicitSet(
    set->insertFromBlock(block.getColumnsWithTypeAndName());
    set->finishInsert();

-    prepared_sets[set_key] = set;
+    prepared_sets.emplace(set_key, set);
    return set;
 }

@ -707,7 +707,7 @@ ASTs ActionsMatcher::doUntuple(const ASTFunction * function, ActionsMatcher::Dat
        if (tid != 0)
            tuple_ast = tuple_ast->clone();

-        auto literal = std::make_shared<ASTLiteral>(UInt64(++tid));
+        auto literal = std::make_shared<ASTLiteral>(UInt64{++tid});
        visit(*literal, literal, data);

        auto func = makeASTFunction("tupleElement", tuple_ast, literal);
@ -814,13 +814,12 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
            if (!data.only_consts)
            {
                /// We are in the part of the tree that we are not going to compute. You just need to define types.
-                /// Do not subquery and create sets. We replace "in*" function to "in*IgnoreSet".
+                /// Do not evaluate subquery and create sets. We replace "in*" function to "in*IgnoreSet".

                auto argument_name = node.arguments->children.at(0)->getColumnName();
-
                data.addFunction(
                    FunctionFactory::instance().get(node.name + "IgnoreSet", data.getContext()),
-                        { argument_name, argument_name },
+                    {argument_name, argument_name},
                    column_name);
            }
            return;
@ -1145,8 +1144,8 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
        if (no_subqueries)
            return {};
        auto set_key = PreparedSetKey::forSubquery(*right_in_operand);
-        if (data.prepared_sets.count(set_key))
-            return data.prepared_sets.at(set_key);
+        if (auto it = data.prepared_sets.find(set_key); it != data.prepared_sets.end())
+            return it->second;

        /// A special case is if the name of the table is specified on the right side of the IN statement,
        ///  and the table has the type Set (a previously prepared set).
@ -1160,7 +1159,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
                StorageSet * storage_set = dynamic_cast<StorageSet *>(table.get());
                if (storage_set)
                {
-                    data.prepared_sets[set_key] = storage_set->getSet();
+                    data.prepared_sets.emplace(set_key, storage_set->getSet());
                    return storage_set->getSet();
                }
            }
@ -1174,7 +1173,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
        /// If you already created a Set with the same subquery / table.
        if (subquery_for_set.set)
        {
-            data.prepared_sets[set_key] = subquery_for_set.set;
+            data.prepared_sets.emplace(set_key, subquery_for_set.set);
            return subquery_for_set.set;
        }

@ -1196,7 +1195,7 @@ SetPtr ActionsMatcher::makeSet(const ASTFunction & node, Data & data, bool no_su
        }

        subquery_for_set.set = set;
-        data.prepared_sets[set_key] = set;
+        data.prepared_sets.emplace(set_key, set);
        return set;
    }
    else
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@ -10,6 +10,7 @@
 namespace DB
 {

+class ASTExpressionList;
 class ASTFunction;

 class ExpressionActions;
@ -89,10 +90,7 @@ struct ScopeStack : WithContext
    void addColumn(ColumnWithTypeAndName column);
    void addAlias(const std::string & name, std::string alias);
    void addArrayJoin(const std::string & source_name, std::string result_name);
-    void addFunction(
-            const FunctionOverloadResolverPtr & function,
-            const Names & argument_names,
-            std::string result_name);
+    void addFunction(const FunctionOverloadResolverPtr & function, const Names & argument_names, std::string result_name);

    ActionsDAGPtr popLevel();

--- a/src/Interpreters/CatBoostModel.cpp
+++ b/src/Interpreters/CatBoostModel.cpp
@ -26,10 +26,10 @@ extern const int CANNOT_LOAD_CATBOOST_MODEL;
 extern const int CANNOT_APPLY_CATBOOST_MODEL;
 }

-
 /// CatBoost wrapper interface functions.
-struct CatBoostWrapperAPI
+class CatBoostWrapperAPI
 {
+public:
    using ModelCalcerHandle = void;

    ModelCalcerHandle * (* ModelCalcerCreate)(); // NOLINT
@ -68,9 +68,6 @@ struct CatBoostWrapperAPI
 };


-namespace
-{
-
 class CatBoostModelHolder
 {
 private:
@ -84,7 +81,61 @@ public:
 };


-class CatBoostModelImpl : public ICatBoostModel
+/// Holds CatBoost wrapper library and provides wrapper interface.
+class CatBoostLibHolder
+{
+public:
+    explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
+
+    const CatBoostWrapperAPI & getAPI() const { return api; }
+    const std::string & getCurrentPath() const { return lib_path; }
+
+private:
+    CatBoostWrapperAPI api;
+    std::string lib_path;
+    SharedLibrary lib;
+
+    void initAPI()
+    {
+        load(api.ModelCalcerCreate, "ModelCalcerCreate");
+        load(api.ModelCalcerDelete, "ModelCalcerDelete");
+        load(api.GetErrorString, "GetErrorString");
+        load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
+        load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
+        load(api.CalcModelPrediction, "CalcModelPrediction");
+        load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
+        load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
+        load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
+        load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
+        load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
+        tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
+        tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
+        tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
+        tryLoad(api.GetTreeCount, "GetTreeCount");
+        tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
+    }
+
+    template <typename T>
+    void load(T& func, const std::string & name) { func = lib.get<T>(name); }
+
+    template <typename T>
+    void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
+};
+
+std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
+{
+    static std::shared_ptr<CatBoostLibHolder> ptr;
+    static std::mutex mutex;
+
+    std::lock_guard lock(mutex);
+
+    if (!ptr || ptr->getCurrentPath() != lib_path)
+        ptr = std::make_shared<CatBoostLibHolder>(lib_path);
+
+    return ptr;
+}
+
+class CatBoostModelImpl
 {
 public:
    CatBoostModelImpl(const CatBoostWrapperAPI * api_, const std::string & model_path) : api(api_)
@ -92,13 +143,15 @@ public:
        handle = std::make_unique<CatBoostModelHolder>(api);
        if (!handle)
        {
-            std::string msg = "Cannot create CatBoost model: ";
-            throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL);
+            throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
+                "Cannot create CatBoost model: {}",
+                api->GetErrorString());
        }
        if (!api->LoadFullModelFromFile(handle->get(), model_path.c_str()))
        {
-            std::string msg = "Cannot load CatBoost model: ";
-            throw Exception(msg + api->GetErrorString(), ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL);
+            throw Exception(ErrorCodes::CANNOT_LOAD_CATBOOST_MODEL,
+                "Cannot load CatBoost model: {}",
+                api->GetErrorString());
        }

        float_features_count = api->GetFloatFeaturesCount(handle->get());
@ -108,32 +161,22 @@ public:
            tree_count = api->GetDimensionsCount(handle->get());
    }

-    ColumnPtr evaluate(const ColumnRawPtrs & columns) const override
+    ColumnPtr evaluate(const ColumnRawPtrs & columns) const
    {
        if (columns.empty())
-            throw Exception("Got empty columns list for CatBoost model.", ErrorCodes::BAD_ARGUMENTS);
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Got empty columns list for CatBoost model.");

        if (columns.size() != float_features_count + cat_features_count)
-        {
-            std::string msg;
-            {
-                WriteBufferFromString buffer(msg);
-                buffer << "Number of columns is different with number of features: ";
-                buffer << columns.size() << " vs " << float_features_count << " + " << cat_features_count;
-            }
-            throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
-        }
+            throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                "Number of columns is different with number of features: columns size {} float features size {} + cat features size {}",
+                float_features_count,
+                cat_features_count);

        for (size_t i = 0; i < float_features_count; ++i)
        {
            if (!columns[i]->isNumeric())
            {
-                std::string msg;
-                {
-                    WriteBufferFromString buffer(msg);
-                    buffer << "Column " << i << " should be numeric to make float feature.";
-                }
-                throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric to make float feature.", i);
            }
        }

@ -142,16 +185,13 @@ public:
        {
            const auto * column = columns[i];
            if (column->isNumeric())
+            {
                cat_features_are_strings = false;
+            }
            else if (!(typeid_cast<const ColumnString *>(column)
                       || typeid_cast<const ColumnFixedString *>(column)))
            {
-                std::string msg;
-                {
-                    WriteBufferFromString buffer(msg);
-                    buffer << "Column " << i << " should be numeric or string.";
-                }
-                throw Exception(msg, ErrorCodes::BAD_ARGUMENTS);
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Column {} should be numeric or string.", i);
            }
        }

@ -187,9 +227,9 @@ public:
        return ColumnTuple::create(std::move(mutable_columns));
    }

-    size_t getFloatFeaturesCount() const override { return float_features_count; }
-    size_t getCatFeaturesCount() const override { return cat_features_count; }
-    size_t getTreeCount() const override { return tree_count; }
+    size_t getFloatFeaturesCount() const { return float_features_count; }
+    size_t getCatFeaturesCount() const { return cat_features_count; }
+    size_t getTreeCount() const { return tree_count; }

 private:
    std::unique_ptr<CatBoostModelHolder> handle;
@ -435,66 +475,6 @@ private:
    }
 };

-
-/// Holds CatBoost wrapper library and provides wrapper interface.
-class CatBoostLibHolder: public CatBoostWrapperAPIProvider
-{
-public:
-    explicit CatBoostLibHolder(std::string lib_path_) : lib_path(std::move(lib_path_)), lib(lib_path) { initAPI(); }
-
-    const CatBoostWrapperAPI & getAPI() const override { return api; }
-    const std::string & getCurrentPath() const { return lib_path; }
-
-private:
-    CatBoostWrapperAPI api;
-    std::string lib_path;
-    SharedLibrary lib;
-
-    void initAPI();
-
-    template <typename T>
-    void load(T& func, const std::string & name) { func = lib.get<T>(name); }
-
-    template <typename T>
-    void tryLoad(T& func, const std::string & name) { func = lib.tryGet<T>(name); }
-};
-
-void CatBoostLibHolder::initAPI()
-{
-    load(api.ModelCalcerCreate, "ModelCalcerCreate");
-    load(api.ModelCalcerDelete, "ModelCalcerDelete");
-    load(api.GetErrorString, "GetErrorString");
-    load(api.LoadFullModelFromFile, "LoadFullModelFromFile");
-    load(api.CalcModelPredictionFlat, "CalcModelPredictionFlat");
-    load(api.CalcModelPrediction, "CalcModelPrediction");
-    load(api.CalcModelPredictionWithHashedCatFeatures, "CalcModelPredictionWithHashedCatFeatures");
-    load(api.GetStringCatFeatureHash, "GetStringCatFeatureHash");
-    load(api.GetIntegerCatFeatureHash, "GetIntegerCatFeatureHash");
-    load(api.GetFloatFeaturesCount, "GetFloatFeaturesCount");
-    load(api.GetCatFeaturesCount, "GetCatFeaturesCount");
-    tryLoad(api.CheckModelMetadataHasKey, "CheckModelMetadataHasKey");
-    tryLoad(api.GetModelInfoValueSize, "GetModelInfoValueSize");
-    tryLoad(api.GetModelInfoValue, "GetModelInfoValue");
-    tryLoad(api.GetTreeCount, "GetTreeCount");
-    tryLoad(api.GetDimensionsCount, "GetDimensionsCount");
-}
-
-std::shared_ptr<CatBoostLibHolder> getCatBoostWrapperHolder(const std::string & lib_path)
-{
-    static std::shared_ptr<CatBoostLibHolder> ptr;
-    static std::mutex mutex;
-
-    std::lock_guard lock(mutex);
-
-    if (!ptr || ptr->getCurrentPath() != lib_path)
-        ptr = std::make_shared<CatBoostLibHolder>(lib_path);
-
-    return ptr;
-}
-
-}
-
-
 CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::string lib_path_,
                             const ExternalLoadableLifetime & lifetime_)
    : name(std::move(name_)), model_path(std::move(model_path_)), lib_path(std::move(lib_path_)), lifetime(lifetime_)
@ -502,43 +482,28 @@ CatBoostModel::CatBoostModel(std::string name_, std::string model_path_, std::st
    api_provider = getCatBoostWrapperHolder(lib_path);
    api = &api_provider->getAPI();
    model = std::make_unique<CatBoostModelImpl>(api, model_path);
-    float_features_count = model->getFloatFeaturesCount();
-    cat_features_count = model->getCatFeaturesCount();
-    tree_count = model->getTreeCount();
 }

-const ExternalLoadableLifetime & CatBoostModel::getLifetime() const
-{
-    return lifetime;
-}
-
-bool CatBoostModel::isModified() const
-{
-    return true;
-}
-
-std::shared_ptr<const IExternalLoadable> CatBoostModel::clone() const
-{
-    return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
-}
+CatBoostModel::~CatBoostModel() = default;

 size_t CatBoostModel::getFloatFeaturesCount() const
 {
-    return float_features_count;
+    return model->getFloatFeaturesCount();
 }

 size_t CatBoostModel::getCatFeaturesCount() const
 {
-    return cat_features_count;
+    return model->getCatFeaturesCount();
 }

 size_t CatBoostModel::getTreeCount() const
 {
-    return tree_count;
+    return model->getTreeCount();
 }

 DataTypePtr CatBoostModel::getReturnType() const
 {
+    size_t tree_count = getTreeCount();
    auto type = std::make_shared<DataTypeFloat64>();
    if (tree_count == 1)
        return type;
@ -552,6 +517,7 @@ ColumnPtr CatBoostModel::evaluate(const ColumnRawPtrs & columns) const
 {
    if (!model)
        throw Exception("CatBoost model was not loaded.", ErrorCodes::LOGICAL_ERROR);
+
    return model->evaluate(columns);
 }

--- a/src/Interpreters/CatBoostModel.h
+++ b/src/Interpreters/CatBoostModel.h
@ -8,47 +8,32 @@
 namespace DB
 {

-/// CatBoost wrapper interface functions.
-struct CatBoostWrapperAPI;
-class CatBoostWrapperAPIProvider
-{
-public:
-    virtual ~CatBoostWrapperAPIProvider() = default;
-    virtual const CatBoostWrapperAPI & getAPI() const = 0;
-};
-
-/// CatBoost model interface.
-class ICatBoostModel
-{
-public:
-    virtual ~ICatBoostModel() = default;
-    /// Evaluate model. Use first `float_features_count` columns as float features,
-    /// the others `cat_features_count` as categorical features.
-    virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
-
-    virtual size_t getFloatFeaturesCount() const = 0;
-    virtual size_t getCatFeaturesCount() const = 0;
-    virtual size_t getTreeCount() const = 0;
-};
+class CatBoostLibHolder;
+class CatBoostWrapperAPI;
+class CatBoostModelImpl;

 class IDataType;
 using DataTypePtr = std::shared_ptr<const IDataType>;

 /// General ML model evaluator interface.
-class IModel : public IExternalLoadable
+class IMLModel : public IExternalLoadable
 {
 public:
+    IMLModel() = default;
    virtual ColumnPtr evaluate(const ColumnRawPtrs & columns) const = 0;
    virtual std::string getTypeName() const = 0;
    virtual DataTypePtr getReturnType() const = 0;
+    virtual ~IMLModel() override = default;
 };

-class CatBoostModel : public IModel
+class CatBoostModel : public IMLModel
 {
 public:
    CatBoostModel(std::string name, std::string model_path,
                  std::string lib_path, const ExternalLoadableLifetime & lifetime);

+    ~CatBoostModel() override;
+
    ColumnPtr evaluate(const ColumnRawPtrs & columns) const override;
    std::string getTypeName() const override { return "catboost"; }

@ -59,29 +44,28 @@ public:

    /// IExternalLoadable interface.

-    const ExternalLoadableLifetime & getLifetime() const override;
+    const ExternalLoadableLifetime & getLifetime() const override { return lifetime; }

    std::string getLoadableName() const override { return name; }

    bool supportUpdates() const override { return true; }

-    bool isModified() const override;
+    bool isModified() const override { return true; }

-    std::shared_ptr<const IExternalLoadable> clone() const override;
+    std::shared_ptr<const IExternalLoadable> clone() const override
+    {
+        return std::make_shared<CatBoostModel>(name, model_path, lib_path, lifetime);
+    }

 private:
    const std::string name;
    std::string model_path;
    std::string lib_path;
    ExternalLoadableLifetime lifetime;
-    std::shared_ptr<CatBoostWrapperAPIProvider> api_provider;
+    std::shared_ptr<CatBoostLibHolder> api_provider;
    const CatBoostWrapperAPI * api;

-    std::unique_ptr<ICatBoostModel> model;
-
-    size_t float_features_count;
-    size_t cat_features_count;
-    size_t tree_count;
+    std::unique_ptr<CatBoostModelImpl> model;

    void init();
 };
--- a/src/Interpreters/Cluster.cpp
+++ b/src/Interpreters/Cluster.cpp
@ -132,7 +132,9 @@ Cluster::Address::Address(
    bool secure_,
    Int64 priority_,
    UInt32 shard_index_,
-    UInt32 replica_index_)
+    UInt32 replica_index_,
+    String cluster_name_,
+    String cluster_secret_)
    : user(user_), password(password_)
 {
    bool can_be_local = true;
@ -164,6 +166,8 @@ Cluster::Address::Address(
    is_local = can_be_local && isLocal(clickhouse_port);
    shard_index = shard_index_;
    replica_index = replica_index_;
+    cluster = cluster_name_;
+    cluster_secret = cluster_secret_;
 }


@ -537,10 +541,14 @@ Cluster::Cluster(
    bool treat_local_as_remote,
    bool treat_local_port_as_remote,
    bool secure,
-    Int64 priority)
+    Int64 priority,
+    String cluster_name,
+    String cluster_secret)
 {
    UInt32 current_shard_num = 1;

+    secret = cluster_secret;
+
    for (const auto & shard : names)
    {
        Addresses current;
@ -554,7 +562,9 @@ Cluster::Cluster(
                secure,
                priority,
                current_shard_num,
-                current.size() + 1);
+                current.size() + 1,
+                cluster_name,
+                cluster_secret);

        addresses_with_failover.emplace_back(current);

@ -690,6 +700,9 @@ Cluster::Cluster(Cluster::ReplicasAsShardsTag, const Cluster & from, const Setti
        }
    }

+    secret = from.secret;
+    name = from.name;
+
    initMisc();
 }

@ -704,6 +717,9 @@ Cluster::Cluster(Cluster::SubclusterTag, const Cluster & from, const std::vector
            addresses_with_failover.emplace_back(from.addresses_with_failover.at(index));
    }

+    secret = from.secret;
+    name = from.name;
+
    initMisc();
 }

--- a/src/Interpreters/Cluster.h
+++ b/src/Interpreters/Cluster.h
@ -55,7 +55,9 @@ public:
        bool treat_local_as_remote,
        bool treat_local_port_as_remote,
        bool secure = false,
-        Int64 priority = 1);
+        Int64 priority = 1,
+        String cluster_name = "",
+        String cluster_secret = "");

    Cluster(const Cluster &)= delete;
    Cluster & operator=(const Cluster &) = delete;
@ -127,7 +129,9 @@ public:
            bool secure_ = false,
            Int64 priority_ = 1,
            UInt32 shard_index_ = 0,
-            UInt32 replica_index_ = 0);
+            UInt32 replica_index_ = 0,
+            String cluster_name = "",
+            String cluster_secret_ = "");

        /// Returns 'escaped_host_name:port'
        String toString() const;
--- a/src/Interpreters/DDLWorker.cpp
+++ b/src/Interpreters/DDLWorker.cpp
@ -350,6 +350,12 @@ void DDLWorker::scheduleTasks(bool reinitialized)
            bool maybe_concurrently_deleting = task && !zookeeper->exists(fs::path(task->entry_path) / "active");
            return task && !maybe_concurrently_deleting && !maybe_currently_processing;
        }
+        else if (last_skipped_entry_name.has_value() && !queue_fully_loaded_after_initialization_debug_helper)
+        {
+            /// If connection was lost during queue loading
+            /// we may start processing from finished task (because we don't know yet that it's finished) and it's ok.
+            return false;
+        }
        else
        {
            /// Return true if entry should not be scheduled.
@ -365,7 +371,11 @@ void DDLWorker::scheduleTasks(bool reinitialized)

        String reason;
        auto task = initAndCheckTask(entry_name, reason, zookeeper);
-        if (!task)
+        if (task)
+        {
+            queue_fully_loaded_after_initialization_debug_helper = true;
+        }
+        else
        {
            LOG_DEBUG(log, "Will not execute task {}: {}", entry_name, reason);
            updateMaxDDLEntryID(entry_name);
--- a/src/Interpreters/DDLWorker.h
+++ b/src/Interpreters/DDLWorker.h
@ -131,6 +131,9 @@ protected:
    std::optional<String> first_failed_task_name;
    std::list<DDLTaskPtr> current_tasks;

+    /// This flag is needed for debug assertions only
+    bool queue_fully_loaded_after_initialization_debug_helper = false;
+
    Coordination::Stat queue_node_stat;
    std::shared_ptr<Poco::Event> queue_updated_event = std::make_shared<Poco::Event>();
    std::shared_ptr<Poco::Event> cleanup_event = std::make_shared<Poco::Event>();
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@ -100,20 +100,9 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q
 {
    auto columns = select_query->select()->children;

-    const auto * group_by_expr_with_alias = dynamic_cast<const ASTWithAlias *>(argument.get());
-    if (group_by_expr_with_alias && !group_by_expr_with_alias->alias.empty())
-    {
-        for (const auto & column : columns)
-        {
-            const auto * col_with_alias = dynamic_cast<const ASTWithAlias *>(column.get());
-            if (col_with_alias)
-            {
-                const auto & alias = col_with_alias->alias;
-                if (!alias.empty() && alias == group_by_expr_with_alias->alias)
+    const auto * expr_with_alias = dynamic_cast<const ASTWithAlias *>(argument.get());
+    if (expr_with_alias && !expr_with_alias->alias.empty())
        return false;
-            }
-        }
-    }

    const auto * ast_literal = typeid_cast<const ASTLiteral *>(argument.get());
    if (!ast_literal)
@ -130,7 +119,7 @@ bool checkPositionalArguments(ASTPtr & argument, const ASTSelectQuery * select_q
                        pos, columns.size());

    const auto & column = columns[--pos];
-    if (typeid_cast<const ASTIdentifier *>(column.get()))
+    if (typeid_cast<const ASTIdentifier *>(column.get()) || typeid_cast<const ASTLiteral *>(column.get()))
    {
        argument = column->clone();
    }
@ -259,7 +248,7 @@ NamesAndTypesList ExpressionAnalyzer::getColumnsAfterArrayJoin(ActionsDAGPtr & a
    if (!array_join_expression_list)
        return src_columns;

-    getRootActionsNoMakeSet(array_join_expression_list, true, actions, false);
+    getRootActionsNoMakeSet(array_join_expression_list, actions, false);

    auto array_join = addMultipleArrayJoinAction(actions, is_array_join_left);
    auto sample_columns = actions->getResultColumns();
@ -294,7 +283,7 @@ NamesAndTypesList ExpressionAnalyzer::analyzeJoin(ActionsDAGPtr & actions, const
    const ASTTablesInSelectQueryElement * join = select_query->join();
    if (join)
    {
-        getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), true, actions, false);
+        getRootActionsNoMakeSet(analyzedJoin().leftKeysList(), actions, false);
        auto sample_columns = actions->getNamesAndTypesList();
        syntax->analyzed_join->addJoinedColumnsAndCorrectTypes(sample_columns, true);
        actions = std::make_shared<ActionsDAG>(sample_columns);
@ -332,14 +321,14 @@ void ExpressionAnalyzer::analyzeAggregation(ActionsDAGPtr & temp_actions)
        {
            NameSet unique_keys;
            ASTs & group_asts = group_by_ast->children;
-            for (ssize_t i = 0; i < ssize_t(group_asts.size()); ++i)
+            for (ssize_t i = 0; i < static_cast<ssize_t>(group_asts.size()); ++i)
            {
                ssize_t size = group_asts.size();

                if (getContext()->getSettingsRef().enable_positional_arguments)
                    replaceForPositionalArguments(group_asts[i], select_query, ASTSelectQuery::Expression::GROUP_BY);

-                getRootActionsNoMakeSet(group_asts[i], true, temp_actions, false);
+                getRootActionsNoMakeSet(group_asts[i], temp_actions, false);

                const auto & column_name = group_asts[i]->getColumnName();

@ -405,8 +394,8 @@ void ExpressionAnalyzer::initGlobalSubqueriesAndExternalTables(bool do_global)
 {
    if (do_global)
    {
-        GlobalSubqueriesVisitor::Data subqueries_data(getContext(), subquery_depth, isRemoteStorage(),
-                                                   external_tables, subqueries_for_sets, has_global_subqueries);
+        GlobalSubqueriesVisitor::Data subqueries_data(
+            getContext(), subquery_depth, isRemoteStorage(), external_tables, subqueries_for_sets, has_global_subqueries);
        GlobalSubqueriesVisitor(subqueries_data).visit(query);
    }
 }
@ -416,7 +405,7 @@ void ExpressionAnalyzer::tryMakeSetForIndexFromSubquery(const ASTPtr & subquery_
 {
    auto set_key = PreparedSetKey::forSubquery(*subquery_or_table_name);

-    if (prepared_sets.count(set_key))
+    if (prepared_sets.contains(set_key))
        return; /// Already prepared.

    if (auto set_ptr_from_storage_set = isPlainStorageSetInSubquery(subquery_or_table_name))
@ -509,33 +498,62 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
 }


-void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
+void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
 {
    LogAST log;
-    ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
-                                   sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
-                                   no_subqueries, false, only_consts, !isRemoteStorage());
+    ActionsVisitor::Data visitor_data(
+        getContext(),
+        settings.size_limits_for_set,
+        subquery_depth,
+        sourceColumns(),
+        std::move(actions),
+        prepared_sets,
+        subqueries_for_sets,
+        no_makeset_for_subqueries,
+        false /* no_makeset */,
+        only_consts,
+        !isRemoteStorage() /* create_source_for_in */);
    ActionsVisitor(visitor_data, log.stream()).visit(ast);
    actions = visitor_data.getActions();
 }


-void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
+void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts)
 {
    LogAST log;
-    ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
-                                   sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
-                                   no_subqueries, true, only_consts, !isRemoteStorage());
+    ActionsVisitor::Data visitor_data(
+        getContext(),
+        settings.size_limits_for_set,
+        subquery_depth,
+        sourceColumns(),
+        std::move(actions),
+        prepared_sets,
+        subqueries_for_sets,
+        true /* no_makeset_for_subqueries, no_makeset implies no_makeset_for_subqueries */,
+        true /* no_makeset */,
+        only_consts,
+        !isRemoteStorage() /* create_source_for_in */);
    ActionsVisitor(visitor_data, log.stream()).visit(ast);
    actions = visitor_data.getActions();
 }

-void ExpressionAnalyzer::getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts)
+
+void ExpressionAnalyzer::getRootActionsForHaving(
+    const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
 {
    LogAST log;
-    ActionsVisitor::Data visitor_data(getContext(), settings.size_limits_for_set, subquery_depth,
-                                   sourceColumns(), std::move(actions), prepared_sets, subqueries_for_sets,
-                                   no_subqueries, false, only_consts, true);
+    ActionsVisitor::Data visitor_data(
+        getContext(),
+        settings.size_limits_for_set,
+        subquery_depth,
+        sourceColumns(),
+        std::move(actions),
+        prepared_sets,
+        subqueries_for_sets,
+        no_makeset_for_subqueries,
+        false /* no_makeset */,
+        only_consts,
+        true /* create_source_for_in */);
    ActionsVisitor(visitor_data, log.stream()).visit(ast);
    actions = visitor_data.getActions();
 }
@ -547,7 +565,7 @@ void ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions, Aggr
    {
        AggregateDescription aggregate;
        if (node->arguments)
-            getRootActionsNoMakeSet(node->arguments, true, actions);
+            getRootActionsNoMakeSet(node->arguments, actions);

        aggregate.column_name = node->getColumnName();

@ -746,8 +764,7 @@ void ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr actions)
        // Requiring a constant reference to a shared pointer to non-const AST
        // doesn't really look sane, but the visitor does indeed require it.
        // Hence we clone the node (not very sane either, I know).
-        getRootActionsNoMakeSet(window_function.function_node->clone(),
-            true, actions);
+        getRootActionsNoMakeSet(window_function.function_node->clone(), actions);

        const ASTs & arguments
            = window_function.function_node->arguments->children;
@ -867,8 +884,7 @@ ArrayJoinActionPtr SelectQueryExpressionAnalyzer::appendArrayJoin(ExpressionActi
    auto array_join = addMultipleArrayJoinAction(step.actions(), is_array_join_left);
    before_array_join = chain.getLastActions();

-    chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(
-            array_join, step.getResultColumns()));
+    chain.steps.push_back(std::make_unique<ExpressionActionsChain::ArrayJoinStep>(array_join, step.getResultColumns()));

    chain.addStep();

@ -1099,8 +1115,8 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendPrewhere(
            }
        }

-        chain.steps.emplace_back(std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(
-                std::make_shared<ActionsDAG>(std::move(columns))));
+        chain.steps.emplace_back(
+            std::make_unique<ExpressionActionsChain::ExpressionActionsStep>(std::make_shared<ActionsDAG>(std::move(columns))));
        chain.steps.back()->additional_input = std::move(unused_source_columns);
        chain.getLastActions();
        chain.addStep();
@ -1210,8 +1226,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
    // recursively together with (1b) as ASTFunction::window_definition.
    if (getSelectQuery()->window())
    {
-        getRootActionsNoMakeSet(getSelectQuery()->window(),
-            true /* no_subqueries */, step.actions());
+        getRootActionsNoMakeSet(getSelectQuery()->window(), step.actions());
    }

    for (const auto & [_, w] : window_descriptions)
@ -1222,8 +1237,7 @@ void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
            // definitions (1a).
            // Requiring a constant reference to a shared pointer to non-const AST
            // doesn't really look sane, but the visitor does indeed require it.
-            getRootActionsNoMakeSet(f.function_node->clone(),
-                true /* no_subqueries */, step.actions());
+            getRootActionsNoMakeSet(f.function_node->clone(), step.actions());

            // (2b) Required function argument columns.
            for (const auto & a : f.function_node->arguments->children)
@ -1299,8 +1313,10 @@ ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChai
            throw Exception("Bad ORDER BY expression AST", ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);

        if (getContext()->getSettingsRef().enable_positional_arguments)
+        {
            replaceForPositionalArguments(ast->children.at(0), select_query, ASTSelectQuery::Expression::ORDER_BY);
        }
+    }

    getRootActions(select_query->orderBy(), only_types, step.actions());

@ -1456,7 +1472,7 @@ ActionsDAGPtr ExpressionAnalyzer::getActionsDAG(bool add_aliases, bool project_r
            alias = name;
        result_columns.emplace_back(name, alias);
        result_names.push_back(alias);
-        getRootActions(ast, false, actions_dag);
+        getRootActions(ast, false /* no_makeset_for_subqueries */, actions_dag);
    }

    if (add_aliases)
@ -1496,7 +1512,7 @@ ExpressionActionsPtr ExpressionAnalyzer::getConstActions(const ColumnsWithTypeAn
 {
    auto actions = std::make_shared<ActionsDAG>(constant_inputs);

-    getRootActions(query, true, actions, true);
+    getRootActions(query, true /* no_makeset_for_subqueries */, actions, true /* only_consts */);
    return std::make_shared<ExpressionActions>(actions, ExpressionActionsSettings::fromContext(getContext()));
 }

--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@ -172,15 +172,15 @@ protected:

    ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;

-    void getRootActions(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
+    void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);

    /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
      * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
      * prepared sets would not be applicable for MergeTree index optimization.
      */
-    void getRootActionsNoMakeSet(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
+    void getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGPtr & actions, bool only_consts = false);

-    void getRootActionsForHaving(const ASTPtr & ast, bool no_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
+    void getRootActionsForHaving(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);

    /** Add aggregation keys to aggregation_keys, aggregate functions to aggregate_descriptions,
      * Create a set of columns aggregated_columns resulting after the aggregation, if any,
--- a/src/Interpreters/ExternalModelsLoader.h
+++ b/src/Interpreters/ExternalModelsLoader.h
@ -15,14 +15,14 @@ namespace DB
 class ExternalModelsLoader : public ExternalLoader, WithContext
 {
 public:
-    using ModelPtr = std::shared_ptr<const IModel>;
+    using ModelPtr = std::shared_ptr<const IMLModel>;

    /// Models will be loaded immediately and then will be updated in separate thread, each 'reload_period' seconds.
    explicit ExternalModelsLoader(ContextPtr context_);

    ModelPtr getModel(const std::string & model_name) const
    {
-        return std::static_pointer_cast<const IModel>(load(model_name));
+        return std::static_pointer_cast<const IMLModel>(load(model_name));
    }

    void reloadModel(const std::string & model_name) const
--- a/src/Interpreters/GlobalSubqueriesVisitor.h
+++ b/src/Interpreters/GlobalSubqueriesVisitor.h
@ -10,6 +10,7 @@
 #include <Interpreters/interpretSubquery.h>
 #include <Interpreters/SubqueryForSet.h>
 #include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSubquery.h>
@ -17,7 +18,11 @@
 #include <Parsers/IAST.h>
 #include <Processors/Executors/CompletedPipelineExecutor.h>
 #include <Processors/Sinks/SinkToStorage.h>
+#include <Processors/QueryPlan/QueryPlan.h>
 #include <Common/typeid_cast.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/ConstraintsDescription.h>
+#include <Storages/IStorage.h>

 namespace DB
 {
@ -34,7 +39,6 @@ public:
    {
        size_t subquery_depth;
        bool is_remote;
-        size_t external_table_id;
        TemporaryTablesMapping & external_tables;
        SubqueriesForSets & subqueries_for_sets;
        bool & has_global_subqueries;
@ -49,7 +53,6 @@ public:
            : WithContext(context_)
            , subquery_depth(subquery_depth_)
            , is_remote(is_remote_)
-            , external_table_id(1)
            , external_tables(tables)
            , subqueries_for_sets(subqueries_for_sets_)
            , has_global_subqueries(has_global_subqueries_)
@ -92,48 +95,33 @@ public:
            {
                /// If this is already an external table, you do not need to add anything. Just remember its presence.
                auto temporary_table_name = getIdentifierName(subquery_or_table_name);
-                bool exists_in_local_map = external_tables.end() != external_tables.find(temporary_table_name);
+                bool exists_in_local_map = external_tables.contains(temporary_table_name);
                bool exists_in_context = static_cast<bool>(getContext()->tryResolveStorageID(
                    StorageID("", temporary_table_name), Context::ResolveExternal));
                if (exists_in_local_map || exists_in_context)
                    return;
            }

-            String external_table_name = subquery_or_table_name->tryGetAlias();
-            if (external_table_name.empty())
+            String alias = subquery_or_table_name->tryGetAlias();
+            String external_table_name;
+            if (alias.empty())
            {
-                /// Generate the name for the external table.
-                external_table_name = "_data" + toString(external_table_id);
-                while (external_tables.count(external_table_name))
-                {
-                    ++external_table_id;
-                    external_table_name = "_data" + toString(external_table_id);
+                auto hash = subquery_or_table_name->getTreeHash();
+                external_table_name = fmt::format("_data_{}_{}", hash.first, hash.second);
            }
-            }
-
-            auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
-
-            Block sample = interpreter->getSampleBlock();
-            NamesAndTypesList columns = sample.getNamesAndTypesList();
-
-            auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
-                getContext(),
-                ColumnsDescription{columns},
-                ConstraintsDescription{},
-                nullptr,
-                /*create_for_global_subquery*/ true);
-            StoragePtr external_storage = external_storage_holder->getTable();
+            else
+                external_table_name = alias;

            /** We replace the subquery with the name of the temporary table.
                * It is in this form, the request will go to the remote server.
                * This temporary table will go to the remote server, and on its side,
                *  instead of doing a subquery, you just need to read it.
+                *  TODO We can do better than using alias to name external tables
                */

            auto database_and_table_name = std::make_shared<ASTTableIdentifier>(external_table_name);
            if (set_alias)
            {
-                String alias = subquery_or_table_name->tryGetAlias();
                if (auto * table_name = subquery_or_table_name->as<ASTTableIdentifier>())
                    if (alias.empty())
                        alias = table_name->shortName();
@ -151,8 +139,27 @@ public:
            else
                ast = database_and_table_name;

-            external_tables[external_table_name] = external_storage_holder;
+            if (external_tables.contains(external_table_name))
+                return;

+            auto interpreter = interpretSubquery(subquery_or_table_name, getContext(), subquery_depth, {});
+
+            Block sample = interpreter->getSampleBlock();
+            NamesAndTypesList columns = sample.getNamesAndTypesList();
+
+            auto external_storage_holder = std::make_shared<TemporaryTableHolder>(
+                getContext(),
+                ColumnsDescription{columns},
+                ConstraintsDescription{},
+                nullptr,
+                /*create_for_global_subquery*/ true);
+            StoragePtr external_storage = external_storage_holder->getTable();
+
+            external_tables.emplace(external_table_name, external_storage_holder);
+
+            /// We need to materialize external tables immediately because reading from distributed
+            /// tables might generate local plans which can refer to external tables during index
+            /// analysis. It's too late to populate the external table via CreatingSetsTransform.
            if (getContext()->getSettingsRef().use_index_for_in_with_subqueries)
            {
                auto external_table = external_storage_holder->getTable();
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@ -962,18 +962,29 @@ public:
            /// If it's joinGetOrNull, we need to wrap not-nullable columns in StorageJoin.
            for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
            {
-                const auto & column = *block.getByPosition(right_indexes[j]).column;
-                if (auto * nullable_col = typeid_cast<ColumnNullable *>(columns[j].get()); nullable_col && !column.isNullable())
-                    nullable_col->insertFromNotNullable(column, row_num);
+                auto column_from_block = block.getByPosition(right_indexes[j]);
+                if (type_name[j].type->lowCardinality() != column_from_block.type->lowCardinality())
+                {
+                    JoinCommon::changeLowCardinalityInplace(column_from_block);
+                }
+
+                if (auto * nullable_col = typeid_cast<ColumnNullable *>(columns[j].get());
+                    nullable_col && !column_from_block.column->isNullable())
+                    nullable_col->insertFromNotNullable(*column_from_block.column, row_num);
                else
-                    columns[j]->insertFrom(column, row_num);
+                    columns[j]->insertFrom(*column_from_block.column, row_num);
            }
        }
        else
        {
            for (size_t j = 0, size = right_indexes.size(); j < size; ++j)
            {
-                columns[j]->insertFrom(*block.getByPosition(right_indexes[j]).column, row_num);
+                auto column_from_block = block.getByPosition(right_indexes[j]);
+                if (type_name[j].type->lowCardinality() != column_from_block.type->lowCardinality())
+                {
+                    JoinCommon::changeLowCardinalityInplace(column_from_block);
+                }
+                columns[j]->insertFrom(*column_from_block.column, row_num);
            }
        }
    }
@ -1013,6 +1024,7 @@ private:

    void addColumn(const ColumnWithTypeAndName & src_column, const std::string & qualified_name)
    {
+
        columns.push_back(src_column.column->cloneEmpty());
        columns.back()->reserve(src_column.column->size());
        type_name.emplace_back(src_column.type, src_column.name, qualified_name);
@ -1237,16 +1249,16 @@ NO_INLINE IColumn::Filter joinRightColumns(
                {
                    const IColumn & left_asof_key = added_columns.leftAsofKey();

-                    auto [block, row_num] = mapped->findAsof(left_asof_key, i);
-                    if (block)
+                    auto row_ref = mapped->findAsof(left_asof_key, i);
+                    if (row_ref.block)
                    {
                        setUsed<need_filter>(filter, i);
                        if constexpr (multiple_disjuncts)
-                            used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(block, row_num, 0);
+                            used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(row_ref.block, row_ref.row_num, 0);
                        else
                            used_flags.template setUsed<jf.need_flags, multiple_disjuncts>(find_result);

-                        added_columns.appendFromBlock<jf.add_missing>(*block, row_num);
+                        added_columns.appendFromBlock<jf.add_missing>(*row_ref.block, row_ref.row_num);
                    }
                    else
                        addNotFoundRow<jf.add_missing, jf.need_replication>(added_columns, current_offset);
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@ -1180,11 +1180,10 @@ bool InterpreterCreateQuery::doCreateTable(ASTCreateQuery & create,
        /// old instance of the storage. For example, AsynchronousMetrics may cause ATTACH to fail,
        /// so we allow waiting here. If database_atomic_wait_for_drop_and_detach_synchronously is disabled
        /// and old storage instance still exists it will throw exception.
-        bool throw_if_table_in_use = getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously;
-        if (throw_if_table_in_use)
-            database->checkDetachedTableNotInUse(create.uuid);
-        else
+        if (getContext()->getSettingsRef().database_atomic_wait_for_drop_and_detach_synchronously)
            database->waitDetachedTableNotInUse(create.uuid);
+        else
+            database->checkDetachedTableNotInUse(create.uuid);
    }

    StoragePtr res;
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -359,6 +359,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
            table_lock.reset();
            table_id = StorageID::createEmpty();
            metadata_snapshot = nullptr;
+            storage_snapshot = nullptr;
        }
    }

@ -1241,10 +1242,6 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
            }

            preliminary_sort();
-
-            // If there is no global subqueries, we can run subqueries only when receive them on server.
-            if (!query_analyzer->hasGlobalSubqueries() && !subqueries_for_sets.empty())
-                executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
        }

        if (expressions.second_stage || from_aggregation_stage)
@ -1427,7 +1424,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, std::optional<P
        }
    }

-    if (!subqueries_for_sets.empty() && (expressions.hasHaving() || query_analyzer->hasGlobalSubqueries()))
+    if (!subqueries_for_sets.empty())
        executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
 }

@ -1891,7 +1888,7 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
        && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset
        && limit_length + limit_offset < max_block_size)
    {
-        max_block_size = std::max(UInt64(1), limit_length + limit_offset);
+        max_block_size = std::max(UInt64{1}, limit_length + limit_offset);
        max_threads_execute_query = max_streams = 1;
    }

@ -2577,11 +2574,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)

 void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets)
 {
-    const auto & input_order_info = query_info.input_order_info
-        ? query_info.input_order_info
-        : (query_info.projection ? query_info.projection->input_order_info : nullptr);
-    if (input_order_info)
-        executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
+    // const auto & input_order_info = query_info.input_order_info
+    //     ? query_info.input_order_info
+    //     : (query_info.projection ? query_info.projection->input_order_info : nullptr);
+    // if (input_order_info)
+    //     executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");

    const Settings & settings = context->getSettingsRef();

--- a/src/Interpreters/ProcessList.h
+++ b/src/Interpreters/ProcessList.h
@ -351,15 +351,6 @@ public:
        max_size = max_size_;
    }

-    // Before calling this method you should be sure
-    // that lock is acquired.
-    template <typename F>
-    void processEachQueryStatus(F && func) const
-    {
-        for (auto && query : processes)
-            func(query);
-    }
-
    void setMaxInsertQueriesAmount(size_t max_insert_queries_amount_)
    {
        std::lock_guard lock(mutex);
--- a/src/Interpreters/RowRefs.cpp
+++ b/src/Interpreters/RowRefs.cpp
@ -1,5 +1,6 @@
 #include <Interpreters/RowRefs.h>

+#include <Common/RadixSort.h>
 #include <AggregateFunctions/Helpers.h>
 #include <Columns/IColumn.h>
 #include <DataTypes/IDataType.h>
@ -44,38 +45,52 @@ class SortedLookupVector : public SortedLookupVectorBase
 {
    struct Entry
    {
-        /// We don't store a RowRef and instead keep it's members separately (and return a tuple) to reduce the memory usage.
-        /// For example, for sizeof(T) == 4 => sizeof(Entry) == 16 (while before it would be 20). Then when you put it into a vector, the effect is even greater
-        decltype(RowRef::block) block;
-        decltype(RowRef::row_num) row_num;
-        TKey asof_value;
+        TKey value;
+        uint32_t row_ref_index;

        Entry() = delete;
-        Entry(TKey v, const Block * b, size_t r) : block(b), row_num(r), asof_value(v) { }
+        Entry(TKey value_, uint32_t row_ref_index_)
+            : value(value_)
+            , row_ref_index(row_ref_index_)
+        { }

-        bool operator<(const Entry & other) const { return asof_value < other.asof_value; }
+    };
+
+    struct LessEntryOperator
+    {
+        ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const
+        {
+            return lhs.value < rhs.value;
+        }
    };

    struct GreaterEntryOperator
    {
-        bool operator()(Entry const & a, Entry const & b) const { return a.asof_value > b.asof_value; }
+        ALWAYS_INLINE bool operator()(const Entry & lhs, const Entry & rhs) const
+        {
+            return lhs.value > rhs.value;
+        }
    };


 public:
-    using Base = std::vector<Entry>;
    using Keys = std::vector<TKey>;
-    static constexpr bool isDescending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
-    static constexpr bool isStrict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);
+    using Entries = PaddedPODArray<Entry>;
+    using RowRefs = PaddedPODArray<RowRef>;
+
+    static constexpr bool is_descending = (inequality == ASOF::Inequality::Greater || inequality == ASOF::Inequality::GreaterOrEquals);
+    static constexpr bool is_strict = (inequality == ASOF::Inequality::Less) || (inequality == ASOF::Inequality::Greater);

    void insert(const IColumn & asof_column, const Block * block, size_t row_num) override
    {
        using ColumnType = ColumnVectorOrDecimal<TKey>;
        const auto & column = assert_cast<const ColumnType &>(asof_column);
-        TKey k = column.getElement(row_num);
+        TKey key = column.getElement(row_num);

        assert(!sorted.load(std::memory_order_acquire));
-        array.emplace_back(k, block, row_num);
+
+        entries.emplace_back(key, row_refs.size());
+        row_refs.emplace_back(RowRef(block, row_num));
    }

    /// Unrolled version of upper_bound and lower_bound
@ -84,30 +99,30 @@ public:
    /// at https://en.algorithmica.org/hpc/data-structures/s-tree/
    size_t boundSearch(TKey value)
    {
-        size_t size = array.size();
+        size_t size = entries.size();
        size_t low = 0;

        /// This is a single binary search iteration as a macro to unroll. Takes into account the inequality:
-        /// isStrict -> Equal values are not requested
-        /// isDescending -> The vector is sorted in reverse (for greater or greaterOrEquals)
+        /// is_strict -> Equal values are not requested
+        /// is_descending -> The vector is sorted in reverse (for greater or greaterOrEquals)
 #define BOUND_ITERATION \
    { \
        size_t half = size / 2; \
        size_t other_half = size - half; \
        size_t probe = low + half; \
        size_t other_low = low + other_half; \
-        TKey v = array[probe].asof_value; \
+        TKey & v = entries[probe].value; \
        size = half; \
-        if constexpr (isDescending) \
+        if constexpr (is_descending) \
        { \
-            if constexpr (isStrict) \
+            if constexpr (is_strict) \
                low = value <= v ? other_low : low; \
            else \
                low = value < v ? other_low : low; \
        } \
        else \
        { \
-            if constexpr (isStrict) \
+            if constexpr (is_strict) \
                low = value >= v ? other_low : low; \
            else \
                low = value > v ? other_low : low; \
@ -130,7 +145,7 @@ public:
        return low;
    }

-    std::tuple<decltype(RowRef::block), decltype(RowRef::row_num)> findAsof(const IColumn & asof_column, size_t row_num) override
+    RowRef findAsof(const IColumn & asof_column, size_t row_num) override
    {
        sort();

@ -139,8 +154,11 @@ public:
        TKey k = column.getElement(row_num);

        size_t pos = boundSearch(k);
-        if (pos != array.size())
-            return std::make_tuple(array[pos].block, array[pos].row_num);
+        if (pos != entries.size())
+        {
+            size_t row_ref_index = entries[pos].row_ref_index;
+            return row_refs[row_ref_index];
+        }

        return {nullptr, 0};
    }
@ -148,7 +166,8 @@ public:
 private:
    std::atomic<bool> sorted = false;
    mutable std::mutex lock;
-    Base array;
+    Entries entries;
+    RowRefs row_refs;

    // Double checked locking with SC atomics works in C++
    // https://preshing.com/20130930/double-checked-locking-is-fixed-in-cpp11/
@ -160,12 +179,37 @@ private:
        if (!sorted.load(std::memory_order_acquire))
        {
            std::lock_guard<std::mutex> l(lock);
+
            if (!sorted.load(std::memory_order_relaxed))
            {
-                if constexpr (isDescending)
-                    ::sort(array.begin(), array.end(), GreaterEntryOperator());
+                if constexpr (std::is_arithmetic_v<TKey> && !std::is_floating_point_v<TKey>)
+                {
+                    if (likely(entries.size() > 256))
+                    {
+                        struct RadixSortTraits : RadixSortNumTraits<TKey>
+                        {
+                            using Element = Entry;
+                            using Result = Element;
+
+                            static TKey & extractKey(Element & elem) { return elem.value; }
+                            static Element extractResult(Element & elem) { return elem; }
+                        };
+
+                        if constexpr (is_descending)
+                            RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), true);
                        else
-                    ::sort(array.begin(), array.end());
+                            RadixSort<RadixSortTraits>::executeLSD(entries.data(), entries.size(), false);
+
+                        sorted.store(true, std::memory_order_release);
+                        return;
+                    }
+                }
+
+                if constexpr (is_descending)
+                    ::sort(entries.begin(), entries.end(), GreaterEntryOperator());
+                else
+                    ::sort(entries.begin(), entries.end(), LessEntryOperator());
+
                sorted.store(true, std::memory_order_release);
            }
        }
--- a/Show More
+++ b/Show More