Merge branch 'master' of github.com:ClickHouse/ClickHouse into fix-web-disk-2

2024-11-24 08:32:02 +00:00 · 2021-09-08 20:54:07 +03:00 · 2021-09-08 20:54:07 +03:00 · 6108a4139c
commit 6108a4139c
parent 7141b5d041 5ff689a5cd
170 changed files with 3274 additions and 972 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -203,3 +203,5 @@ CheckOptions:
    value: CamelCase
  - key: readability-identifier-naming.UsingCase
    value: CamelCase
+  - key: modernize-loop-convert.UseCxx20ReverseRanges
+    value: false
--- a/base/common/logger_useful.h
+++ b/base/common/logger_useful.h
@ -42,6 +42,7 @@ namespace
 } while (false)


+#define LOG_TEST(logger, ...)    LOG_IMPL(logger, DB::LogsLevel::test, Poco::Message::PRIO_TEST, __VA_ARGS__)
 #define LOG_TRACE(logger, ...)   LOG_IMPL(logger, DB::LogsLevel::trace, Poco::Message::PRIO_TRACE, __VA_ARGS__)
 #define LOG_DEBUG(logger, ...)   LOG_IMPL(logger, DB::LogsLevel::debug, Poco::Message::PRIO_DEBUG, __VA_ARGS__)
 #define LOG_INFO(logger, ...)    LOG_IMPL(logger, DB::LogsLevel::information, Poco::Message::PRIO_INFORMATION, __VA_ARGS__)
--- a/base/mysqlxx/Pool.cpp
+++ b/base/mysqlxx/Pool.cpp
@ -7,10 +7,22 @@
 #endif

 #include <mysqlxx/Pool.h>
-
 #include <common/sleep.h>
-
 #include <Poco/Util/LayeredConfiguration.h>
+#include <ctime>
+
+
+namespace
+{
+
+inline uint64_t clock_gettime_ns(clockid_t clock_type = CLOCK_MONOTONIC)
+{
+    struct timespec ts;
+    clock_gettime(clock_type, &ts);
+    return uint64_t(ts.tv_sec * 1000000000LL + ts.tv_nsec);
+}
+
+}


 namespace mysqlxx
@ -124,10 +136,15 @@ Pool::~Pool()
 }


-Pool::Entry Pool::get()
+Pool::Entry Pool::get(uint64_t wait_timeout)
 {
    std::unique_lock<std::mutex> lock(mutex);

+    uint64_t deadline = 0;
+    /// UINT64_MAX -- wait indefinitely
+    if (wait_timeout && wait_timeout != UINT64_MAX)
+        deadline = clock_gettime_ns() + wait_timeout * 1'000'000'000;
+
    initialize();
    for (;;)
    {
@ -153,6 +170,12 @@ Pool::Entry Pool::get()
            logger.trace("(%s): Unable to create a new connection: Max number of connections has been reached.", getDescription());
        }

+        if (!wait_timeout)
+            throw Poco::Exception("mysqlxx::Pool is full (wait is disabled, see connection_wait_timeout setting)");
+
+        if (deadline && clock_gettime_ns() >= deadline)
+            throw Poco::Exception("mysqlxx::Pool is full (connection_wait_timeout is exceeded)");
+
        lock.unlock();
        logger.trace("(%s): Sleeping for %d seconds.", getDescription(), MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
        sleepForSeconds(MYSQLXX_POOL_SLEEP_ON_CONNECT_FAIL);
--- a/base/mysqlxx/Pool.h
+++ b/base/mysqlxx/Pool.h
@ -189,7 +189,7 @@ public:
    ~Pool();

    /// Allocates connection.
-    Entry get();
+    Entry get(uint64_t wait_timeout);

    /// Allocates connection.
    /// If database is not accessible, returns empty Entry object.
--- a/base/mysqlxx/PoolWithFailover.cpp
+++ b/base/mysqlxx/PoolWithFailover.cpp
@ -21,8 +21,9 @@ PoolWithFailover::PoolWithFailover(
        const unsigned max_connections_,
        const size_t max_tries_)
    : max_tries(max_tries_)
+    , shareable(config_.getBool(config_name_ + ".share_connection", false))
+    , wait_timeout(UINT64_MAX)
 {
-    shareable = config_.getBool(config_name_ + ".share_connection", false);
    if (config_.has(config_name_ + ".replica"))
    {
        Poco::Util::AbstractConfiguration::Keys replica_keys;
@ -80,9 +81,11 @@ PoolWithFailover::PoolWithFailover(
        const std::string & password,
        unsigned default_connections_,
        unsigned max_connections_,
-        size_t max_tries_)
+        size_t max_tries_,
+        uint64_t wait_timeout_)
    : max_tries(max_tries_)
    , shareable(false)
+    , wait_timeout(wait_timeout_)
 {
    /// Replicas have the same priority, but traversed replicas are moved to the end of the queue.
    for (const auto & [host, port] : addresses)
@ -101,6 +104,7 @@ PoolWithFailover::PoolWithFailover(
 PoolWithFailover::PoolWithFailover(const PoolWithFailover & other)
    : max_tries{other.max_tries}
    , shareable{other.shareable}
+    , wait_timeout(other.wait_timeout)
 {
    if (shareable)
    {
@ -140,7 +144,7 @@ PoolWithFailover::Entry PoolWithFailover::get()

                try
                {
-                    Entry entry = shareable ? pool->get() : pool->tryGet();
+                    Entry entry = shareable ? pool->get(wait_timeout) : pool->tryGet();

                    if (!entry.isNull())
                    {
@ -172,7 +176,7 @@ PoolWithFailover::Entry PoolWithFailover::get()
    if (full_pool)
    {
        app.logger().error("All connections failed, trying to wait on a full pool " + (*full_pool)->getDescription());
-        return (*full_pool)->get();
+        return (*full_pool)->get(wait_timeout);
    }

    std::stringstream message;
--- a/base/mysqlxx/PoolWithFailover.h
+++ b/base/mysqlxx/PoolWithFailover.h
@ -80,6 +80,8 @@ namespace mysqlxx
        std::mutex mutex;
        /// Can the Pool be shared
        bool shareable;
+        /// Timeout for waiting free connection.
+        uint64_t wait_timeout = 0;

    public:
        using Entry = Pool::Entry;
@ -96,6 +98,7 @@ namespace mysqlxx
         * default_connections   Number of connection in pool to each replica at start.
         * max_connections       Maximum number of connections in pool to each replica.
         * max_tries_            Max number of connection tries.
+         * wait_timeout_         Timeout for waiting free connection.
         */
        PoolWithFailover(
            const std::string & config_name_,
@ -117,7 +120,8 @@ namespace mysqlxx
            const std::string & password,
            unsigned default_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_START_CONNECTIONS,
            unsigned max_connections_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_CONNECTIONS,
-            size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES);
+            size_t max_tries_ = MYSQLXX_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES,
+            uint64_t wait_timeout_ = UINT64_MAX);

        PoolWithFailover(const PoolWithFailover & other);

--- a/cmake/analysis.cmake
+++ b/cmake/analysis.cmake
@ -6,7 +6,7 @@ if (ENABLE_CLANG_TIDY)
        message(FATAL_ERROR "clang-tidy requires CMake version at least 3.6.")
    endif()

-    find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")
+    find_program (CLANG_TIDY_PATH NAMES "clang-tidy" "clang-tidy-12" "clang-tidy-11" "clang-tidy-10" "clang-tidy-9" "clang-tidy-8")

    if (CLANG_TIDY_PATH)
        message(STATUS
--- a/cmake/find/amqpcpp.cmake
+++ b/cmake/find/amqpcpp.cmake
@ -17,7 +17,7 @@ if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/CMakeLists.txt")
 endif ()

 set (USE_AMQPCPP 1)
-set (AMQPCPP_LIBRARY amqp-cpp)
+set (AMQPCPP_LIBRARY amqp-cpp ${OPENSSL_LIBRARIES})

 set (AMQPCPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/AMQP-CPP/include")
 list (APPEND AMQPCPP_INCLUDE_DIR
--- a/cmake/freebsd/toolchain-x86_64.cmake
+++ b/cmake/freebsd/toolchain-x86_64.cmake
@ -10,7 +10,7 @@ set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)  # disable linkage check - it
 set (CMAKE_AR "/usr/bin/ar" CACHE FILEPATH "" FORCE)
 set (CMAKE_RANLIB "/usr/bin/ranlib" CACHE FILEPATH "" FORCE)

-set (LINKER_NAME "lld" CACHE STRING "" FORCE)
+set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE)

 set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
 set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@ -13,7 +13,7 @@ set (CMAKE_C_FLAGS_INIT "${CMAKE_C_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_D
 set (CMAKE_CXX_FLAGS_INIT "${CMAKE_CXX_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")
 set (CMAKE_ASM_FLAGS_INIT "${CMAKE_ASM_FLAGS} --gcc-toolchain=${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64")

-set (LINKER_NAME "lld" CACHE STRING "" FORCE)
+set (LINKER_NAME "ld.lld" CACHE STRING "" FORCE)

 set (CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld")
 set (CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld")
--- a/cmake/tools.cmake
+++ b/cmake/tools.cmake
@ -79,8 +79,9 @@ endif ()

 if (LINKER_NAME)
    if (COMPILER_CLANG AND (CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 12.0.0 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 12.0.0))
-        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LINKER_NAME}")
-        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LINKER_NAME}")
+        find_program (LLD_PATH NAMES ${LINKER_NAME})
+        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}")
+        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}")
    else ()
        set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
        set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=${LINKER_NAME}")
--- a/contrib/amqpcpp-cmake/CMakeLists.txt
+++ b/contrib/amqpcpp-cmake/CMakeLists.txt
@ -41,6 +41,4 @@ target_compile_options (amqp-cpp
 )

 target_include_directories (amqp-cpp SYSTEM PUBLIC "${LIBRARY_DIR}/include")
-
-target_link_libraries (amqp-cpp PUBLIC ssl)
-
+target_link_libraries(amqp-cpp PUBLIC ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY})
--- a/contrib/boringssl-cmake/CMakeLists.txt
+++ b/contrib/boringssl-cmake/CMakeLists.txt
@ -15,12 +15,12 @@ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
 endif()

 if(CMAKE_COMPILER_IS_GNUCXX OR CLANG)
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fvisibility=hidden -fno-common -fno-exceptions -fno-rtti")
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -fno-common -fno-exceptions -fno-rtti")
  if(APPLE AND CLANG)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
  endif()

-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fvisibility=hidden -fno-common")
+  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-common")
  if((CMAKE_C_COMPILER_VERSION VERSION_GREATER "4.8.99") OR CLANG)
    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c11")
  else()
--- a/contrib/poco
+++ b/contrib/poco
@ -1 +1 @@
-Subproject commit 7351c4691b5d401f59e3959adfc5b4fa263b32da
+Subproject commit 46c80daf1b015aa10474ce82e3d24b578c6ae422
--- a/docker/builder/Dockerfile
+++ b/docker/builder/Dockerfile
@ -1,6 +1,6 @@
 FROM ubuntu:20.04

-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12

 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

--- a/docker/builder/build.sh
+++ b/docker/builder/build.sh
@ -4,7 +4,7 @@ set -e
 #ccache -s # uncomment to display CCache statistics
 mkdir -p /server/build_docker
 cd /server/build_docker
-cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-11)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-11)"
+cmake -G Ninja /server "-DCMAKE_C_COMPILER=$(command -v clang-12)" "-DCMAKE_CXX_COMPILER=$(command -v clang++-12)"

 # Set the number of build jobs to the half of number of virtual CPU cores (rounded up).
 # By default, ninja use all virtual CPU cores, that leads to very high memory consumption without much improvement in build time.
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -1,7 +1,7 @@
 #  docker build -t yandex/clickhouse-binary-builder .
 FROM ubuntu:20.04

-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12

 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

@ -39,8 +39,6 @@ RUN apt-get update \
        bash \
        build-essential \
        ccache \
-        clang-11 \
-        clang-tidy-11 \
        cmake \
        curl \
        g++-10 \
@ -50,9 +48,13 @@ RUN apt-get update \
        gperf \
        libicu-dev \
        libreadline-dev \
-        lld-11 \
-        llvm-11 \
-        llvm-11-dev \
+        clang-12 \
+        clang-tidy-12 \
+        lld-12 \
+        llvm-12 \
+        llvm-12-dev \
+        libicu-dev \
+        libreadline-dev \
        moreutils \
        ninja-build \
        pigz \
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -4,7 +4,6 @@ set -x -e

 mkdir -p build/cmake/toolchain/darwin-x86_64
 tar xJf MacOSX11.0.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
-
 ln -sf darwin-x86_64 build/cmake/toolchain/darwin-aarch64

 mkdir -p build/cmake/toolchain/linux-aarch64
@ -23,6 +22,7 @@ cd build/build_docker
 rm -f CMakeCache.txt
 # Read cmake arguments into array (possibly empty)
 read -ra CMAKE_FLAGS <<< "${CMAKE_FLAGS:-}"
+env
 cmake --debug-trycompile --verbose=1 -DCMAKE_VERBOSE_MAKEFILE=1 -LA "-DCMAKE_BUILD_TYPE=$BUILD_TYPE" "-DSANITIZE=$SANITIZER" -DENABLE_CHECK_HEAVY_BUILDS=1 "${CMAKE_FLAGS[@]}" ..

 ccache --show-config ||:
--- a/docker/packager/deb/Dockerfile
+++ b/docker/packager/deb/Dockerfile
@ -1,7 +1,7 @@
 # docker build -t yandex/clickhouse-deb-builder .
 FROM ubuntu:20.04

-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12

 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

@ -37,17 +37,17 @@ RUN curl -O https://clickhouse-datasets.s3.yandex.net/utils/1/dpkg-deb \
 RUN apt-get update \
    && apt-get install \
        alien \
-        clang-11 \
-        clang-tidy-11 \
+        clang-12 \
+        clang-tidy-12 \
        cmake \
        debhelper \
        devscripts \
        gdb  \
        git \
        gperf \
-        lld-11 \
-        llvm-11 \
-        llvm-11-dev \
+        lld-12 \
+        llvm-12 \
+        llvm-12-dev \
        moreutils \
        ninja-build \
        perl \
--- a/docker/packager/packager
+++ b/docker/packager/packager
@ -75,7 +75,7 @@ def parse_env_variables(build_type, compiler, sanitizer, package_type, image_typ
    # Explicitly use LLD with Clang by default.
    # Don't force linker for cross-compilation.
    if is_clang and not is_cross_compile:
-        cmake_flags.append("-DLINKER_NAME=lld")
+        cmake_flags.append("-DLINKER_NAME=ld.lld")

    if is_cross_darwin:
        cc = compiler[:-len(DARWIN_SUFFIX)]
@ -204,7 +204,8 @@ if __name__ == "__main__":
    parser.add_argument("--output-dir", required=True)
    parser.add_argument("--build-type", choices=("debug", ""), default="")
    parser.add_argument("--compiler", choices=("clang-11", "clang-11-darwin", "clang-11-darwin-aarch64", "clang-11-aarch64",
-                                               "clang-11-freebsd", "gcc-10"), default="clang-11")
+                                               "clang-12", "clang-12-darwin", "clang-12-darwin-aarch64", "clang-12-aarch64",
+                                               "clang-11-freebsd", "clang-12-freebsd", "gcc-10"), default="clang-12")
    parser.add_argument("--sanitizer", choices=("address", "thread", "memory", "undefined", ""), default="")
    parser.add_argument("--unbundled", action="store_true")
    parser.add_argument("--split-binary", action="store_true")
--- a/docker/test/base/Dockerfile
+++ b/docker/test/base/Dockerfile
@ -1,7 +1,7 @@
 # docker build -t yandex/clickhouse-test-base .
 FROM ubuntu:20.04

-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12

 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

--- a/docker/test/codebrowser/Dockerfile
+++ b/docker/test/codebrowser/Dockerfile
@ -11,7 +11,7 @@ RUN apt-get update && apt-get --yes --allow-unauthenticated install clang-9 libl
 # https://github.com/ClickHouse-Extras/woboq_codebrowser/commit/37e15eaf377b920acb0b48dbe82471be9203f76b
 RUN git clone https://github.com/ClickHouse-Extras/woboq_codebrowser

-RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-9 -DCMAKE_C_COMPILER=clang-9 && make -j
+RUN cd woboq_codebrowser && cmake . -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_COMPILER=clang\+\+-12 -DCMAKE_C_COMPILER=clang-12 && make -j

 ENV CODEGEN=/woboq_codebrowser/generator/codebrowser_generator
 ENV CODEINDEX=/woboq_codebrowser/indexgenerator/codebrowser_indexgenerator
@ -24,7 +24,7 @@ ENV SHA=nosha
 ENV DATA="data"

 CMD mkdir -p $BUILD_DIRECTORY && cd $BUILD_DIRECTORY && \
-    cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-11 -DCMAKE_C_COMPILER=/usr/bin/clang-11 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
+    cmake $SOURCE_DIRECTORY -DCMAKE_CXX_COMPILER=/usr/bin/clang\+\+-12 -DCMAKE_C_COMPILER=/usr/bin/clang-12 -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DENABLE_EMBEDDED_COMPILER=0 -DENABLE_S3=0 && \
    mkdir -p $HTML_RESULT_DIRECTORY && \
    $CODEGEN -b $BUILD_DIRECTORY -a -o $HTML_RESULT_DIRECTORY -p ClickHouse:$SOURCE_DIRECTORY:$SHA -d $DATA | ts '%Y-%m-%d %H:%M:%S' && \
    cp -r $STATIC_DATA $HTML_RESULT_DIRECTORY/ &&\
--- a/docker/test/fasttest/Dockerfile
+++ b/docker/test/fasttest/Dockerfile
@ -1,7 +1,7 @@
 #  docker build -t yandex/clickhouse-fasttest .
 FROM ubuntu:20.04

-ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=11
+ENV DEBIAN_FRONTEND=noninteractive LLVM_VERSION=12

 RUN sed -i 's|http://archive|http://ru.archive|g' /etc/apt/sources.list

--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -9,7 +9,7 @@ trap 'kill $(jobs -pr) ||:' EXIT
 stage=${stage:-}

 # Compiler version, normally set by Dockerfile
-export LLVM_VERSION=${LLVM_VERSION:-11}
+export LLVM_VERSION=${LLVM_VERSION:-12}

 # A variable to pass additional flags to CMake.
 # Here we explicitly default it to nothing so that bash doesn't complain about
@ -401,6 +401,9 @@ function run_tests

        # depends on Go
        02013_zlib_read_after_eof
+
+        # Accesses CH via mysql table function (which is unavailable)
+        01747_system_session_log_long
    )

    time clickhouse-test --hung-check -j 8 --order=random --use-skip-list \
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -12,7 +12,7 @@ stage=${stage:-}
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
 repo_dir=ch
-BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-11_debug_none_bundled_unsplitted_disable_False_binary"}
+BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-12_debug_none_bundled_unsplitted_disable_False_binary"}

 function clone
 {
--- a/docker/test/keeper-jepsen/run.sh
+++ b/docker/test/keeper-jepsen/run.sh
@ -2,7 +2,7 @@
 set -euo pipefail


-CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-11_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
+CLICKHOUSE_PACKAGE=${CLICKHOUSE_PACKAGE:="https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-12_relwithdebuginfo_none_bundled_unsplitted_disable_False_binary/clickhouse"}
 CLICKHOUSE_REPO_PATH=${CLICKHOUSE_REPO_PATH:=""}


--- a/docker/test/pvs/Dockerfile
+++ b/docker/test/pvs/Dockerfile
@ -28,7 +28,7 @@ RUN apt-get update --yes \
 ENV PKG_VERSION="pvs-studio-latest"

 RUN set -x \
-    && export PUBKEY_HASHSUM="686e5eb8b3c543a5c54442c39ec876b6c2d912fe8a729099e600017ae53c877dda3368fe38ed7a66024fe26df6b5892a" \
+    && export PUBKEY_HASHSUM="ad369a2e9d8b8c30f5a9f2eb131121739b79c78e03fef0f016ea51871a5f78cd4e6257b270dca0ac3be3d1f19d885516" \
    && wget -nv https://files.viva64.com/etc/pubkey.txt -O /tmp/pubkey.txt \
    && echo "${PUBKEY_HASHSUM} /tmp/pubkey.txt" | sha384sum -c \
    && apt-key add /tmp/pubkey.txt \
@ -38,7 +38,7 @@ RUN set -x \
    && dpkg -i "${PKG_VERSION}.deb"

 CMD echo "Running PVS version $PKG_VERSION" && cd /repo_folder && pvs-studio-analyzer credentials $LICENCE_NAME $LICENCE_KEY -o ./licence.lic  \
-    && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF \
+    && cmake . -D"ENABLE_EMBEDDED_COMPILER"=OFF -D"USE_INTERNAL_PROTOBUF_LIBRARY"=OFF -D"USE_INTERNAL_GRPC_LIBRARY"=OFF -DCMAKE_C_COMPILER=clang-12 -DCMAKE_CXX_COMPILER=clang\+\+-12 \
    && ninja re2_st clickhouse_grpc_protos \
    && pvs-studio-analyzer analyze -o pvs-studio.log -e contrib -j 4 -l ./licence.lic; \
    cp /repo_folder/pvs-studio.log /test_output; \
--- a/docs/en/development/build.md
+++ b/docs/en/development/build.md
@ -23,7 +23,7 @@ $ sudo apt-get install git cmake python ninja-build

 Or cmake3 instead of cmake on older systems.

-### Install clang-11 (recommended) {#install-clang-11}
+### Install clang-12 (recommended) {#install-clang-12}

 On Ubuntu/Debian you can use the automatic installation script (check [official webpage](https://apt.llvm.org/))

@ -33,11 +33,11 @@ sudo bash -c "$(wget -O - https://apt.llvm.org/llvm.sh)"

 For other Linux distribution - check the availability of the [prebuild packages](https://releases.llvm.org/download.html) or build clang [from sources](https://clang.llvm.org/get_started.html).

-#### Use clang-11 for Builds
+#### Use clang-12 for Builds

 ``` bash
-$ export CC=clang-11
-$ export CXX=clang++-11
+$ export CC=clang-12
+$ export CXX=clang++-12
 ```

 Gcc can also be used though it is discouraged.
--- a/docs/en/engines/database-engines/materialized-postgresql.md
+++ b/docs/en/engines/database-engines/materialized-postgresql.md
@ -31,6 +31,10 @@ ENGINE = MaterializedPostgreSQL('host:port', ['database' | database], 'user', 'p

 -   [materialized_postgresql_allow_automatic_update](../../operations/settings/settings.md#materialized-postgresql-allow-automatic-update)

+-   [materialized_postgresql_replication_slot](../../operations/settings/settings.md#materialized-postgresql-replication-slot)
+
+-   [materialized_postgresql_snapshot](../../operations/settings/settings.md#materialized-postgresql-snapshot)
+
 ``` sql
 CREATE DATABASE database1
 ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres_user', 'postgres_password')
@ -73,7 +77,7 @@ WHERE oid = 'postgres_table'::regclass;

 !!! warning "Warning"
    Replication of [**TOAST**](https://www.postgresql.org/docs/9.5/storage-toast.html) values is not supported. The default value for the data type will be used.
-	
+
 ## Example of Use {#example-of-use}

 ``` sql
@ -82,3 +86,11 @@ ENGINE = MaterializedPostgreSQL('postgres1:5432', 'postgres_database', 'postgres

 SELECT * FROM postgresql_db.postgres_table;
 ```
+
+## Notes {#notes}
+
+- Failover of the logical replication slot.
+
+Logical Replication Slots which exist on the primary are not available on standby replicas.
+So if there is a failover, new primary (the old physical standby) won’t be aware of any slots which were existing with old primary. This will lead to a broken replication from PostgreSQL.
+A solution to this is to manage replication slots yourself and define a permanent replication slot (some information can be found [here](https://patroni.readthedocs.io/en/latest/SETTINGS.html)). You'll need to pass slot name via `materialized_postgresql_replication_slot` setting, and it has to be exported with `EXPORT SNAPSHOT` option. The snapshot identifier needs to be passed via `materialized_postgresql_snapshot` setting.
--- a/docs/en/engines/table-engines/integrations/mysql.md
+++ b/docs/en/engines/table-engines/integrations/mysql.md
@ -19,6 +19,7 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
 SETTINGS
    [connection_pool_size=16, ]
    [connection_max_tries=3, ]
+    [connection_wait_timeout=5, ] /* 0 -- do not wait */
    [connection_auto_close=true ]
 ;
 ```
--- a/docs/en/engines/table-engines/integrations/rabbitmq.md
+++ b/docs/en/engines/table-engines/integrations/rabbitmq.md
@ -21,11 +21,12 @@ CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2],
    ...
 ) ENGINE = RabbitMQ SETTINGS
-    rabbitmq_host_port = 'host:port',
+    rabbitmq_host_port = 'host:port' [or rabbitmq_address = 'amqp(s)://guest:guest@localhost/vhost'],
    rabbitmq_exchange_name = 'exchange_name',
    rabbitmq_format = 'data_format'[,]
    [rabbitmq_exchange_type = 'exchange_type',]
    [rabbitmq_routing_key_list = 'key1,key2,...',]
+    [rabbitmq_secure = 0,]
    [rabbitmq_row_delimiter = 'delimiter_symbol',]
    [rabbitmq_schema = '',]
    [rabbitmq_num_consumers = N,]
@ -59,6 +60,11 @@ Optional parameters:
 -   `rabbitmq_max_block_size`
 -   `rabbitmq_flush_interval_ms`

+SSL connection:
+
+Use either `rabbitmq_secure = 1` or `amqps` in connection address: `rabbitmq_address = 'amqps://guest:guest@localhost/vhost'`.
+The default behaviour of the used library is not to check if the created TLS connection is sufficiently secure. Whether the certificate is expired, self-signed, missing or invalid: the connection is simply permitted. More strict checking of certificates can possibly be implemented in the future.
+
 Also format settings can be added along with rabbitmq-related settings.

 Example:
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1270,6 +1270,8 @@ You can insert Parquet data from a file into ClickHouse table by the following c
 $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
 ```

+To insert data into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs values you must switch on the [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested) setting.
+
 You can select data from a ClickHouse table and save them into some file in the Parquet format by the following command:

 ``` bash
@ -1328,6 +1330,8 @@ You can insert Arrow data from a file into ClickHouse table by the following com
 $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
 ```

+To insert data into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs values you must switch on the [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested) setting.
+
 ### Selecting Data {#selecting-data-arrow}

 You can select data from a ClickHouse table and save them into some file in the Arrow format by the following command:
@ -1384,6 +1388,8 @@ You can insert ORC data from a file into ClickHouse table by the following comma
 $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
 ```

+To insert data into [Nested](../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs values you must switch on the [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested) setting.
+
 ### Selecting Data {#selecting-data-2}

 You can select data from a ClickHouse table and save them into some file in the ORC format by the following command:
--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@ -25,7 +25,7 @@ toc_title: Adopters
 | <a href="https://badoo.com" class="favicon">Badoo</a> | Dating | Timeseries | — | — | [Slides in Russian, December 2019](https://presentations.clickhouse.tech/meetup38/forecast.pdf) |
 | <a href="https://www.benocs.com/" class="favicon">Benocs</a> | Network Telemetry and Analytics | Main Product | — | — | [Slides in English, October 2017](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup9/lpm.pdf) |
 | <a href="https://www.bigo.sg/" class="favicon">BIGO</a> | Video | Computing Platform | — | — | [Blog Article, August 2020](https://www.programmersought.com/article/44544895251/) |
-| <a href="https://www.bloomberg.com/" class="favicon">Bloomberg</a> | Finance, Media | Monitoring | 102 servers | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
+| <a href="https://www.bloomberg.com/">Bloomberg</a> | Finance, Media | Monitoring | — | — | [Slides, May 2018](https://www.slideshare.net/Altinity/http-analytics-for-6m-requests-per-second-using-clickhouse-by-alexander-bocharov) |
 | <a href="https://bloxy.info" class="favicon">Bloxy</a> | Blockchain | Analytics | — | — | [Slides in Russian, August 2018](https://github.com/ClickHouse/clickhouse-presentations/blob/master/meetup17/4_bloxy.pptx) |
 | <a href="https://www.bytedance.com" class="favicon">Bytedance</a> | Social platforms | — | — | — | [The ClickHouse Meetup East, October 2020](https://www.youtube.com/watch?v=ckChUkC3Pns) |
 | <a href="https://cardsmobile.ru/" class="favicon">CardsMobile</a> | Finance | Analytics | — | — | [VC.ru](https://vc.ru/s/cardsmobile/143449-rukovoditel-gruppy-analiza-dannyh) |
--- a/docs/en/operations/server-configuration-parameters/settings.md
+++ b/docs/en/operations/server-configuration-parameters/settings.md
@ -1253,7 +1253,7 @@ If this section is specified, the path from [users_config](../../operations/serv

 The `user_directories` section can contain any number of items, the order of the items means their precedence (the higher the item the higher the precedence).

-**Example**
+**Examples**

 ``` xml
 <user_directories>
@ -1263,13 +1263,23 @@ The `user_directories` section can contain any number of items, the order of the
    <local_directory>
        <path>/var/lib/clickhouse/access/</path>
    </local_directory>
+</user_directories>
+```
+
+Users, roles, row policies, quotas, and profiles can be also stored in ZooKeeper:
+
+``` xml
+<user_directories>
+    <users_xml>
+        <path>/etc/clickhouse-server/users.xml</path>
+    </users_xml>
    <replicated>
        <zookeeper_path>/clickhouse/access/</zookeeper_path>
    </replicated>
 </user_directories>
 ```

-You can also specify settings `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server.
+You can also define sections `memory` — means storing information only in memory, without writing to disk, and `ldap` — means storing information on an LDAP server.

 To add an LDAP server as a remote user directory of users that are not defined locally, define a single `ldap` section with a following parameters:
 -   `server` — one of LDAP server names defined in `ldap_servers` config section. This parameter is mandatory and cannot be empty.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -260,6 +260,39 @@ If an error occurred while reading rows but the error counter is still less than

 If both `input_format_allow_errors_num` and `input_format_allow_errors_ratio` are exceeded, ClickHouse throws an exception.

+## input_format_parquet_import_nested {#input_format_parquet_import_nested}
+
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Parquet](../../interfaces/formats.md#data-format-parquet) input format.
+
+Possible values:
+
+-   0 — Data can not be inserted into `Nested` columns as an array of structs.
+-   1 — Data can be inserted into `Nested` columns as an array of structs.
+
+Default value: `0`.
+
+## input_format_arrow_import_nested {#input_format_arrow_import_nested}
+
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [Arrow](../../interfaces/formats.md#data_types-matching-arrow) input format.
+
+Possible values:
+
+-   0 — Data can not be inserted into `Nested` columns as an array of structs.
+-   1 — Data can be inserted into `Nested` columns as an array of structs.
+
+Default value: `0`.
+
+## input_format_orc_import_nested {#input_format_orc_import_nested}
+
+Enables or disables the ability to insert the data into [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) columns as an array of structs in [ORC](../../interfaces/formats.md#data-format-orc) input format.
+
+Possible values:
+
+-   0 — Data can not be inserted into `Nested` columns as an array of structs.
+-   1 — Data can be inserted into `Nested` columns as an array of structs.
+
+Default value: `0`.
+
 ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions}

 Enables or disables the full SQL parser if the fast stream parser can’t parse the data. This setting is used only for the [Values](../../interfaces/formats.md#data-format-values) format at the data insertion. For more information about syntax parsing, see the [Syntax](../../sql-reference/syntax.md) section.
@ -3436,6 +3469,14 @@ Possible values:

 Default value: `0`.

+## materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot}
+
+Allows to have user-managed replication slots. Must be used together with `materialized_postgresql_snapshot`.
+
+## materialized_postgresql_replication_slot {#materialized-postgresql-replication-slot}
+
+A text string identifying a snapshot, from which initial dump of tables will be performed. Must be used together with `materialized_postgresql_replication_slot`.
+
 ## allow_experimental_projection_optimization {#allow-experimental-projection-optimization}

 Enables or disables [projection](../../engines/table-engines/mergetree-family/mergetree.md#projections) optimization when processing `SELECT` queries.
@ -3449,7 +3490,7 @@ Default value: `0`.

 ## force_optimize_projection {#force-optimize-projection}

-Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting). 
+Enables or disables the obligatory use of [projections](../../engines/table-engines/mergetree-family/mergetree.md#projections) in `SELECT` queries, when projection optimization is enabled (see [allow_experimental_projection_optimization](#allow-experimental-projection-optimization) setting).

 Possible values:

@ -3457,3 +3498,13 @@ Possible values:
 -   1 — Projection optimization is obligatory.

 Default value: `0`.
+
+## regexp_max_matches_per_row {#regexp-max-matches-per-row}
+
+Sets the maximum number of matches for a single regular expression per row. Use it to protect against memory overload when using greedy regular expression in the [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md#extractallgroups-horizontal) function.
+
+Possible values:
+
+-   Positive integer.
+
+Default value: `1000`.
--- a/docs/en/operations/system-tables/views.md
+++ b/docs/en/operations/system-tables/views.md
@ -1,44 +0,0 @@
-# system.views {#system-views}
-
-Contains the dependencies of all views and the type to which the view belongs. The metadata of the view comes from the [system.tables](tables.md).
-
-Columns:
-
-   `database` ([String](../../sql-reference/data-types/string.md)) — The name of the database the view is in.
-
-   `name` ([String](../../sql-reference/data-types/string.md)) — Name of the view.
-
-   `main_dependency_database` ([String](../../sql-reference/data-types/string.md)) — The name of the database on which the view depends.
-
-   `main_dependency_table` ([String](../../sql-reference/data-types/string.md)) - The name of the table on which the view depends.
-
-   `view_type` ([Enum8](../../sql-reference/data-types/enum.md)) — Type of the view. Values:
-    -   `'Default' = 1` — [Default views](../../sql-reference/statements/create/view.md#normal). Should not appear in this log.
-    -   `'Materialized' = 2` — [Materialized views](../../sql-reference/statements/create/view.md#materialized).
-    -   `'Live' = 3` — [Live views](../../sql-reference/statements/create/view.md#live-view).
-
-**Example**
-
-```sql
-SELECT * FROM system.views LIMIT 2 FORMAT Vertical;
-```
-
-```text
-Row 1:
-──────
-database:                 default
-name:                     live_view
-main_dependency_database: default
-main_dependency_table:    view_source_tb
-view_type:                Live
-
-Row 2:
-──────
-database:                 default
-name:                     materialized_view
-main_dependency_database: default
-main_dependency_table:    view_source_tb
-view_type:                Materialized
-```
-
-[Original article](https://clickhouse.tech/docs/en/operations/system-tables/views) <!--hide-->
--- a/docs/en/sql-reference/data-types/nested-data-structures/nested.md
+++ b/docs/en/sql-reference/data-types/nested-data-structures/nested.md
@ -3,7 +3,9 @@ toc_priority: 57
 toc_title: Nested(Name1 Type1, Name2 Type2, ...)
 ---

-# Nested(name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2}
+# Nested {#nested}
+
+## Nested(name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2}

 A nested data structure is like a table inside a cell. The parameters of a nested data structure – the column names and types – are specified the same way as in a [CREATE TABLE](../../../sql-reference/statements/create/table.md) query. Each table row can correspond to any number of rows in a nested data structure.

--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@ -1438,9 +1438,9 @@ Result:
 └───────────────────────────────────────────┘
 ```

-## snowflakeToDateTime {#snowflakeToDateTime}
+## snowflakeToDateTime {#snowflaketodatetime}

-Extract time from snowflake id as DateTime format.
+Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime](../data-types/datetime.md) format.

 **Syntax**

@ -1450,12 +1450,12 @@ snowflakeToDateTime(value [, time_zone])

 **Parameters**

-   `value` — `snowflake id`, Int64 value.
+-   `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
 -   `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).

 **Returned value**

-  value converted to the `DateTime` data type.
+-  Input value converted to the [DateTime](../data-types/datetime.md) data type.

 **Example**

@ -1474,9 +1474,9 @@ Result:
 └──────────────────────────────────────────────────────────────────┘
 ```

-## snowflakeToDateTime64 {#snowflakeToDateTime64}
+## snowflakeToDateTime64 {#snowflaketodatetime64}

-Extract time from snowflake id as DateTime64 format.
+Extracts time from [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as [DateTime64](../data-types/datetime64.md) format.

 **Syntax**

@ -1486,12 +1486,12 @@ snowflakeToDateTime64(value [, time_zone])

 **Parameters**

-   `value` — `snowflake id`, Int64 value.
+-   `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
 -   `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).

 **Returned value**

-  value converted to the `DateTime64` data type.
+-  Input value converted to the [DateTime64](../data-types/datetime64.md) data type.

 **Example**

@ -1510,9 +1510,9 @@ Result:
 └────────────────────────────────────────────────────────────────────┘
 ```

-## dateTimeToSnowflake {#dateTimeToSnowflake}
+## dateTimeToSnowflake {#datetimetosnowflake}

-Convert DateTime to the first snowflake id at the giving time.
+Converts [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.

 **Syntax**

@ -1524,33 +1524,29 @@ dateTimeToSnowflake(value)

 -   `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md).

-
 **Returned value**

-   `value` converted to the `Int64` data type as the first snowflake id at that time.
+-   Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.

 **Example**

 Query:

 ``` sql
-WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt
-SELECT dateTimeToSnowflake(dt);
+WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
 ```

 Result:

 ``` text
-
 ┌─dateTimeToSnowflake(dt)─┐
 │     1426860702823350272 │
 └─────────────────────────┘
 ```

+## dateTime64ToSnowflake {#datetime64tosnowflake}

-## dateTime64ToSnowflake {#dateTime64ToSnowflake}
-
-Convert DateTime64 to the first snowflake id at the giving time.
+Convert [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.

 **Syntax**

@ -1562,18 +1558,16 @@ dateTime64ToSnowflake(value)

 -   `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).

-
 **Returned value**

-   `value` converted to the `Int64` data type as the first snowflake id at that time.
+-   Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.

 **Example**

 Query:

 ``` sql
-WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64
-SELECT dateTime64ToSnowflake(dt64);
+WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
 ```

 Result:
@ -1582,4 +1576,4 @@ Result:
 ┌─dateTime64ToSnowflake(dt64)─┐
 │         1426860704886947840 │
 └─────────────────────────────┘
-```
+```
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@ -1180,7 +1180,7 @@ ClickHouse поддерживает настраиваемую точность

 Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.

-### Вставка и выборка данных {#vstavka-i-vyborka-dannykh}
+### Вставка и выборка данных {#inserting-and-selecting-data}

 Чтобы вставить в ClickHouse данные из файла в формате Parquet, выполните команду следующего вида:

@ -1188,6 +1188,8 @@ ClickHouse поддерживает настраиваемую точность
 $ cat {filename} | clickhouse-client --query="INSERT INTO {some_table} FORMAT Parquet"
 ```

+Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_parquet_import_nested](../operations/settings/settings.md#input_format_parquet_import_nested).
+
 Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Parquet, используйте команду следующего вида:

 ``` bash
@ -1246,6 +1248,8 @@ ClickHouse поддерживает настраиваемую точность
 $ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
 ```

+Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_arrow_import_nested](../operations/settings/settings.md#input_format_arrow_import_nested).
+
 ### Вывод данных {#selecting-data-arrow}

 Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида:
@ -1294,7 +1298,7 @@ ClickHouse поддерживает настраиваемую точность

 Типы данных столбцов в таблицах ClickHouse могут отличаться от типов данных для соответствующих полей ORC. При вставке данных ClickHouse интерпретирует типы данных ORC согласно таблице соответствия, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к типу, установленному для столбца таблицы ClickHouse.

-### Вставка данных {#vstavka-dannykh-1}
+### Вставка данных {#inserting-data-2}

 Чтобы вставить в ClickHouse данные из файла в формате ORC, используйте команду следующего вида:

@ -1302,7 +1306,9 @@ ClickHouse поддерживает настраиваемую точность
 $ cat filename.orc | clickhouse-client --query="INSERT INTO some_table FORMAT ORC"
 ```

-### Вывод данных {#vyvod-dannykh-1}
+Чтобы вставить данные в колонки типа [Nested](../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур, нужно включить настройку [input_format_orc_import_nested](../operations/settings/settings.md#input_format_orc_import_nested).
+
+### Вывод данных {#selecting-data-2}

 Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата ORC, используйте команду следующего вида:

--- a/docs/ru/operations/server-configuration-parameters/settings.md
+++ b/docs/ru/operations/server-configuration-parameters/settings.md
@ -1200,12 +1200,13 @@ ClickHouse использует ZooKeeper для хранения метадан
 Секция конфигурационного файла,которая содержит настройки:
 -   Путь к конфигурационному файлу с предустановленными пользователями.
 -   Путь к файлу, в котором содержатся пользователи, созданные при помощи SQL команд.
+-   Путь к узлу ZooKeeper, где хранятся и реплицируются пользователи, созданные с помощью команд SQL (экспериментальная функциональность).

 Если эта секция определена, путь из [users_config](../../operations/server-configuration-parameters/settings.md#users-config) и [access_control_path](../../operations/server-configuration-parameters/settings.md#access_control_path) не используется.

 Секция `user_directories` может содержать любое количество элементов, порядок расположения элементов обозначает их приоритет (чем выше элемент, тем выше приоритет).

-**Пример**
+**Примеры**

 ``` xml
 <user_directories>
@ -1218,7 +1219,20 @@ ClickHouse использует ZooKeeper для хранения метадан
 </user_directories>
 ```

-Также вы можете указать настройку `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol).
+Пользователи, роли, политики доступа к строкам, квоты и профили могут храниться в ZooKeeper:
+
+``` xml
+<user_directories>
+    <users_xml>
+        <path>/etc/clickhouse-server/users.xml</path>
+    </users_xml>
+    <replicated>
+        <zookeeper_path>/clickhouse/access/</zookeeper_path>
+    </replicated>
+</user_directories>
+```
+
+Также вы можете добавить секции `memory` — означает хранение информации только в памяти, без записи на диск, и `ldap` — означает хранения информации на [LDAP-сервере](https://en.wikipedia.org/wiki/Lightweight_Directory_Access_Protocol).

 Чтобы добавить LDAP-сервер в качестве удаленного каталога пользователей, которые не определены локально, определите один раздел `ldap` со следующими параметрами:
 -   `server` — имя одного из LDAP-серверов, определенных в секции `ldap_servers` конфигурациионного файла. Этот параметр явялется необязательным и может быть пустым.
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -237,6 +237,39 @@ ClickHouse применяет настройку в тех случаях, ко

 В случае превышения `input_format_allow_errors_ratio` ClickHouse генерирует исключение.

+## input_format_parquet_import_nested {#input_format_parquet_import_nested}
+
+Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур  в формате ввода [Parquet](../../interfaces/formats.md#data-format-parquet).
+
+Возможные значения:
+
+-   0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
+-   0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
+
+Значение по умолчанию: `0`.
+
+## input_format_arrow_import_nested {#input_format_arrow_import_nested}
+
+Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [Arrow](../../interfaces/formats.md#data_types-matching-arrow).
+
+Возможные значения:
+
+-   0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
+-   0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
+
+Значение по умолчанию: `0`.
+
+## input_format_orc_import_nested {#input_format_orc_import_nested}
+
+Включает или отключает возможность вставки данных в колонки типа [Nested](../../sql-reference/data-types/nested-data-structures/nested.md) в виде массива структур в формате ввода [ORC](../../interfaces/formats.md#data-format-orc).
+
+Возможные значения:
+
+-   0 — данные не могут быть вставлены в колонки типа `Nested` в виде массива структур.
+-   0 — данные могут быть вставлены в колонки типа `Nested` в виде массива структур.
+
+Значение по умолчанию: `0`.
+
 ## input_format_values_interpret_expressions {#settings-input_format_values_interpret_expressions}

 Включает или отключает парсер SQL, если потоковый парсер не может проанализировать данные. Этот параметр используется только для формата [Values](../../interfaces/formats.md#data-format-values) при вставке данных. Дополнительные сведения о парсерах читайте в разделе [Синтаксис](../../sql-reference/syntax.md).
@ -3273,4 +3306,14 @@ SETTINGS index_granularity = 8192 │
 -   0 — Проекции используются опционально.
 -   1 — Проекции обязательно используются.

-Значение по умолчанию: `0`.
+Значение по умолчанию: `0`.
+
+## regexp_max_matches_per_row {#regexp-max-matches-per-row}
+
+Задает максимальное количество совпадений для регулярного выражения. Настройка применяется для защиты памяти от перегрузки при использовании "жадных" квантификаторов в регулярном выражении для функции [extractAllGroupsHorizontal](../../sql-reference/functions/string-search-functions.md#extractallgroups-horizontal).
+
+Возможные значения:
+
+-   Положительное целое число.
+
+Значение по умолчанию: `1000`.
--- a/docs/ru/sql-reference/data-types/nested-data-structures/nested.md
+++ b/docs/ru/sql-reference/data-types/nested-data-structures/nested.md
@ -1,4 +1,6 @@
-# Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2}
+# Nested {#nested}
+
+## Nested(Name1 Type1, Name2 Type2, …) {#nestedname1-type1-name2-type2}

 Вложенная структура данных - это как будто вложенная таблица. Параметры вложенной структуры данных - имена и типы столбцов, указываются так же, как у запроса CREATE. Каждой строке таблицы может соответствовать произвольное количество строк вложенной структуры данных.

@ -95,4 +97,3 @@ LIMIT 10
 При запросе DESCRIBE, столбцы вложенной структуры данных перечисляются так же по отдельности.

 Работоспособность запроса ALTER для элементов вложенных структур данных, является сильно ограниченной.
-
--- a/docs/ru/sql-reference/functions/type-conversion-functions.md
+++ b/docs/ru/sql-reference/functions/type-conversion-functions.md
@ -1436,3 +1436,144 @@ FROM numbers(3);
 │ 2,"good"                                  │
 └───────────────────────────────────────────┘
 ```
+
+## snowflakeToDateTime {#snowflaketodatetime}
+
+Извлекает время из [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) в формате [DateTime](../data-types/datetime.md).
+
+**Синтаксис**
+
+``` sql
+snowflakeToDateTime(value [, time_zone])
+```
+
+**Аргументы**
+
+-   `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
+-   `time_zone` — [временная зона сервера](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция распознает `time_string` в соответствии с часовым поясом. Необязательный. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-  Значение, преобразованное в фомат [DateTime](../data-types/datetime.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
+```
+
+Результат:
+
+``` text
+
+┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
+│                                              2021-08-15 10:57:56 │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+## snowflakeToDateTime64 {#snowflaketodatetime64}
+
+Извлекает время из [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) в формате [DateTime64](../data-types/datetime64.md).
+
+**Синтаксис**
+
+``` sql
+snowflakeToDateTime64(value [, time_zone])
+```
+
+**Аргументы**
+
+-   `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
+-   `time_zone` — [временная зона сервера](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). Функция распознает `time_string` в соответствии с часовым поясом. Необязательный. [String](../../sql-reference/data-types/string.md).
+
+**Возвращаемое значение**
+
+-  Значение, преобразованное в фомат [DateTime64](../data-types/datetime64.md).
+
+**Пример**
+
+Запрос:
+
+``` sql
+SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
+```
+
+Результат:
+
+``` text
+
+┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
+│                                            2021-08-15 10:58:19.841 │
+└────────────────────────────────────────────────────────────────────┘
+```
+
+## dateTimeToSnowflake {#datetimetosnowflake}
+
+Преобразует значение [DateTime](../data-types/datetime.md) в первый идентификатор [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) на текущий момент.
+
+**Syntax**
+
+``` sql
+dateTimeToSnowflake(value)
+```
+
+**Аргументы**
+
+-   `value` — дата и время. [DateTime](../../sql-reference/data-types/datetime.md).
+
+**Возвращаемое значение**
+
+-   Значение, преобразованное в [Int64](../data-types/int-uint.md), как первый идентификатор Snowflake ID в момент выполнения.
+
+**Пример**
+
+Запрос:
+
+``` sql
+WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
+```
+
+Результат:
+
+``` text
+┌─dateTimeToSnowflake(dt)─┐
+│     1426860702823350272 │
+└─────────────────────────┘
+```
+
+## dateTime64ToSnowflake {#datetime64tosnowflake}
+
+Преобразует значение [DateTime64](../data-types/datetime64.md) в первый идентификатор [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) на текущий момент.
+
+**Синтаксис**
+
+``` sql
+dateTime64ToSnowflake(value)
+```
+
+**Аргументы**
+
+-   `value` — дата и время. [DateTime64](../data-types/datetime64.md).
+
+**Возвращаемое значение**
+
+-   Значение, преобразованное в [Int64](../data-types/int-uint.md), как первый идентификатор Snowflake ID в момент выполнения.
+
+
+**Пример**
+
+Запрос:
+
+``` sql
+WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
+```
+
+Результат:
+
+``` text
+┌─dateTime64ToSnowflake(dt64)─┐
+│         1426860704886947840 │
+└─────────────────────────────┘
+```
--- a/docs/ru/sql-reference/statements/create/table.md
+++ b/docs/ru/sql-reference/statements/create/table.md
@ -247,6 +247,7 @@ CREATE TABLE codec_example
 )
 ENGINE = MergeTree()
 ```
+
 ## Временные таблицы {#temporary-tables}

 ClickHouse поддерживает временные таблицы со следующими характеристиками:
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -393,6 +393,7 @@ void LocalServer::processQueries()
    auto context = session.makeQueryContext();
    context->makeSessionContext(); /// initial_create_query requires a session context to be set.
    context->setCurrentQueryId("");
+
    applyCmdSettings(context);

    /// Use the same query_id (and thread group) for all queries
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -45,16 +45,14 @@
 #include <IO/UseSSL.h>
 #include <Interpreters/AsynchronousMetrics.h>
 #include <Interpreters/DDLWorker.h>
+#include <Interpreters/DNSCacheUpdater.h>
+#include <Interpreters/DatabaseCatalog.h>
 #include <Interpreters/ExternalDictionariesLoader.h>
 #include <Interpreters/ExternalModelsLoader.h>
 #include <Interpreters/ProcessList.h>
 #include <Interpreters/loadMetadata.h>
-#include <Interpreters/DatabaseCatalog.h>
-#include <Interpreters/DNSCacheUpdater.h>
-#include <Interpreters/ExternalLoaderXMLConfigRepository.h>
-#include <Interpreters/InterserverCredentials.h>
-#include <Interpreters/UserDefinedObjectsLoader.h>
 #include <Interpreters/JIT/CompiledExpressionCache.h>
+#include <Interpreters/UserDefinedObjectsLoader.h>
 #include <Access/AccessControlManager.h>
 #include <Storages/StorageReplicatedMergeTree.h>
 #include <Storages/System/attachSystemTables.h>
@ -1131,6 +1129,10 @@ if (ThreadFuzzer::instance().isEffective())
        global_context->setSystemZooKeeperLogAfterInitializationIfNeeded();
        /// After the system database is created, attach virtual system tables (in addition to query_log and part_log)
        attachSystemTablesServer(*database_catalog.getSystemDatabase(), has_zookeeper);
+        /// Firstly remove partially dropped databases, to avoid race with MaterializedMySQLSyncThread,
+        /// that may execute DROP before loadMarkedAsDroppedTables() in background,
+        /// and so loadMarkedAsDroppedTables() will find it and try to add, and UUID will overlap.
+        database_catalog.loadMarkedAsDroppedTables();
        /// Then, load remaining databases
        loadMetadata(global_context, default_database);
        database_catalog.loadDatabases();
--- a/programs/server/config.xml
+++ b/programs/server/config.xml
@ -18,6 +18,7 @@
          - information
          - debug
          - trace
+          - test (not for production usage)

            [1]: https://github.com/pocoproject/poco/blob/poco-1.9.4-release/Foundation/include/Poco/Logger.h#L105-L114
        -->
@ -964,6 +965,14 @@
        <flush_interval_milliseconds>1000</flush_interval_milliseconds>
    </crash_log>

+    <!-- Session log. Stores user log in (successful or not) and log out events. -->
+    <session_log>
+        <database>system</database>
+        <table>session_log</table>
+
+        <partition_by>toYYYYMM(event_date)</partition_by>
+        <flush_interval_milliseconds>7500</flush_interval_milliseconds>
+    </session_log>

    <!-- Parameters for embedded dictionaries, used in Yandex.Metrica.
         See https://clickhouse.yandex/docs/en/dicts/internal_dicts/
--- a/programs/server/users.d/session_log_test.xml
+++ b/programs/server/users.d/session_log_test.xml
@ -0,0 +1 @@
+../../../tests/config/users.d/session_log_test.xml
--- a/src/Access/AccessControlManager.h
+++ b/src/Access/AccessControlManager.h
@ -143,10 +143,11 @@ public:

    std::vector<QuotaUsage> getAllQuotasUsage() const;

-    std::shared_ptr<const EnabledSettings> getEnabledSettings(const UUID & user_id,
-                                                              const SettingsProfileElements & settings_from_user,
-                                                              const boost::container::flat_set<UUID> & enabled_roles,
-                                                              const SettingsProfileElements & settings_from_enabled_roles) const;
+    std::shared_ptr<const EnabledSettings> getEnabledSettings(
+        const UUID & user_id,
+        const SettingsProfileElements & settings_from_user,
+        const boost::container::flat_set<UUID> & enabled_roles,
+        const SettingsProfileElements & settings_from_enabled_roles) const;

    std::shared_ptr<const SettingsProfilesInfo> getSettingsProfileInfo(const UUID & profile_id);

--- a/src/Access/SettingsProfilesInfo.h
+++ b/src/Access/SettingsProfilesInfo.h
@ -36,6 +36,16 @@ struct SettingsProfilesInfo
    friend bool operator ==(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs);
    friend bool operator !=(const SettingsProfilesInfo & lhs, const SettingsProfilesInfo & rhs) { return !(lhs == rhs); }

+    Strings getProfileNames() const
+    {
+        Strings result;
+        result.reserve(profiles.size());
+        for (const auto & profile_id : profiles)
+            result.push_back(names_of_profiles.at(profile_id));
+
+        return result;
+    }
+
 private:
    const AccessControlManager & manager;
 };
--- a/src/Common/FiberStack.h
+++ b/src/Common/FiberStack.h
@ -27,7 +27,12 @@ private:
    size_t stack_size;
    size_t page_size = 0;
 public:
-    static constexpr size_t default_stack_size = 128 * 1024; /// 64KB was not enough for tests
+    /// NOTE: If you see random segfaults in CI and stack starts from boost::context::...fiber...
+    /// probably it worth to try to increase stack size for coroutines.
+    ///
+    /// Current value is just enough for all tests in our CI. It's not selected in some special
+    /// way. We will have 36 pages with 4KB page size.
+    static constexpr size_t default_stack_size = 144 * 1024; /// 64KB was not enough for tests

    explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
    {
@ -43,6 +48,8 @@ public:
        if (MAP_FAILED == vp)
            DB::throwFromErrno(fmt::format("FiberStack: Cannot mmap {}.", ReadableSize(num_bytes)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);

+        /// TODO: make reports on illegal guard page access more clear.
+        /// Currently we will see segfault and almost random stacktrace.
        if (-1 == ::mprotect(vp, page_size, PROT_NONE))
        {
            ::munmap(vp, num_bytes);
--- a/src/Common/tests/gtest_log.cpp
+++ b/src/Common/tests/gtest_log.cpp
@ -6,6 +6,8 @@
 #include <Poco/Logger.h>
 #include <Poco/AutoPtr.h>
 #include <Poco/NullChannel.h>
+#include <Poco/StreamChannel.h>
+#include <sstream>


 TEST(Logger, Log)
@ -17,3 +19,34 @@ TEST(Logger, Log)
    /// This test checks that we don't pass this string to fmtlib, because it is the only argument.
    EXPECT_NO_THROW(LOG_INFO(log, "Hello {} World"));
 }
+
+TEST(Logger, TestLog)
+{
+    {   /// Test logs visible for test level
+
+        std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+        auto my_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(oss));
+        auto * log = &Poco::Logger::create("TestLogger", my_channel.get());
+        log->setLevel("test");
+        LOG_TEST(log, "Hello World");
+
+        EXPECT_EQ(oss.str(), "Hello World\n");
+        Poco::Logger::destroy("TestLogger");
+    }
+
+    {   /// Test logs invisible for other levels
+        for (const auto & level : {"trace", "debug", "information", "warning", "error", "fatal"})
+        {
+            std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM
+            auto my_channel = Poco::AutoPtr<Poco::StreamChannel>(new Poco::StreamChannel(oss));
+            auto * log = &Poco::Logger::create(std::string{level} + "_Logger", my_channel.get());
+            log->setLevel(level);
+            LOG_TEST(log, "Hello World");
+
+            EXPECT_EQ(oss.str(), "");
+
+            Poco::Logger::destroy(std::string{level} + "_Logger");
+        }
+    }
+
+}
--- a/src/Compression/CompressionCodecEncrypted.cpp
+++ b/src/Compression/CompressionCodecEncrypted.cpp
@ -113,7 +113,8 @@ namespace DB

    std::string CompressionCodecEncrypted::deriveKey(const std::string_view & master_key)
    {
-        std::string_view salt(""); // No salt: derive keys in a deterministic manner.
+        /// No salt: derive keys in a deterministic manner.
+        std::string_view salt(""); // NOLINT
        std::string_view info("Codec Encrypted('AES-128-GCM-SIV') key generation key");
        std::array<char, 32> result;

--- a/src/Coordination/KeeperServer.cpp
+++ b/src/Coordination/KeeperServer.cpp
@ -357,7 +357,7 @@ void KeeperServer::waitInit()
        throw Exception(ErrorCodes::RAFT_ERROR, "Failed to wait RAFT initialization");
 }

-std::unordered_set<int64_t> KeeperServer::getDeadSessions()
+std::vector<int64_t> KeeperServer::getDeadSessions()
 {
    return state_machine->getDeadSessions();
 }
--- a/src/Coordination/KeeperServer.h
+++ b/src/Coordination/KeeperServer.h
@ -71,7 +71,7 @@ public:
    RaftAppendResult putRequestBatch(const KeeperStorage::RequestsForSessions & requests);

    /// Return set of the non-active sessions
-    std::unordered_set<int64_t> getDeadSessions();
+    std::vector<int64_t> getDeadSessions();

    bool isLeader() const;

--- a/src/Coordination/KeeperStateMachine.cpp
+++ b/src/Coordination/KeeperStateMachine.cpp
@ -122,6 +122,10 @@ nuraft::ptr<nuraft::buffer> KeeperStateMachine::commit(const uint64_t log_idx, n
    }
    else
    {
+        LOG_TEST(log, "Commit request for session {} with type {}, log id {}{}",
+                 request_for_session.session_id, toString(request_for_session.request->getOpNum()), log_idx,
+                 request_for_session.request->getPath().empty() ? "" : ", path " + request_for_session.request->getPath());
+
        std::lock_guard lock(storage_and_responses_lock);
        KeeperStorage::ResponsesForSessions responses_for_sessions = storage->processRequest(request_for_session.request, request_for_session.session_id, log_idx);
        for (auto & response_for_session : responses_for_sessions)
@ -304,7 +308,7 @@ void KeeperStateMachine::processReadRequest(const KeeperStorage::RequestForSessi
        responses_queue.push(response);
 }

-std::unordered_set<int64_t> KeeperStateMachine::getDeadSessions()
+std::vector<int64_t> KeeperStateMachine::getDeadSessions()
 {
    std::lock_guard lock(storage_and_responses_lock);
    return storage->getDeadSessions();
--- a/src/Coordination/KeeperStateMachine.h
+++ b/src/Coordination/KeeperStateMachine.h
@ -71,7 +71,7 @@ public:
    /// Process local read request
    void processReadRequest(const KeeperStorage::RequestForSession & request_for_session);

-    std::unordered_set<int64_t> getDeadSessions();
+    std::vector<int64_t> getDeadSessions();

    void shutdownStorage();

--- a/src/Coordination/KeeperStorage.cpp
+++ b/src/Coordination/KeeperStorage.cpp
@ -1078,7 +1078,8 @@ KeeperStorage::ResponsesForSessions KeeperStorage::processRequest(const Coordina
        zxid = *new_last_zxid;
    }

-    session_expiry_queue.update(session_id, session_and_timeout[session_id]);
+    /// ZooKeeper update sessions expirity for each request, not only for heartbeats
+    session_expiry_queue.addNewSessionOrUpdate(session_id, session_and_timeout[session_id]);

    if (zk_request->getOpNum() == Coordination::OpNum::Close) /// Close request is special
    {
--- a/src/Coordination/KeeperStorage.h
+++ b/src/Coordination/KeeperStorage.h
@ -123,7 +123,7 @@ public:
    {
        auto result = session_id_counter++;
        session_and_timeout.emplace(result, session_timeout_ms);
-        session_expiry_queue.update(result, session_timeout_ms);
+        session_expiry_queue.addNewSessionOrUpdate(result, session_timeout_ms);
        return result;
    }

@ -131,7 +131,7 @@ public:
    void addSessionID(int64_t session_id, int64_t session_timeout_ms)
    {
        session_and_timeout.emplace(session_id, session_timeout_ms);
-        session_expiry_queue.update(session_id, session_timeout_ms);
+        session_expiry_queue.addNewSessionOrUpdate(session_id, session_timeout_ms);
    }

    /// Process user request and return response.
@ -172,7 +172,7 @@ public:
    }

    /// Get all dead sessions
-    std::unordered_set<int64_t> getDeadSessions()
+    std::vector<int64_t> getDeadSessions()
    {
        return session_expiry_queue.getExpiredSessions();
    }
--- a/src/Coordination/SessionExpiryQueue.cpp
+++ b/src/Coordination/SessionExpiryQueue.cpp
@ -1,82 +1,96 @@
 #include <Coordination/SessionExpiryQueue.h>
 #include <common/logger_useful.h>
+
 namespace DB
 {

 bool SessionExpiryQueue::remove(int64_t session_id)
 {
-    auto session_it = session_to_timeout.find(session_id);
-    if (session_it != session_to_timeout.end())
+    auto session_it = session_to_expiration_time.find(session_id);
+    if (session_it != session_to_expiration_time.end())
    {
        auto set_it = expiry_to_sessions.find(session_it->second);
        if (set_it != expiry_to_sessions.end())
            set_it->second.erase(session_id);

+        /// No more sessions in this bucket
+        if (set_it->second.empty())
+            expiry_to_sessions.erase(set_it);
+
+        session_to_expiration_time.erase(session_it);
+
        return true;
    }

    return false;
 }

-bool SessionExpiryQueue::update(int64_t session_id, int64_t timeout_ms)
+void SessionExpiryQueue::addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms)
 {
-    auto session_it = session_to_timeout.find(session_id);
    int64_t now = getNowMilliseconds();
+    /// round up to next interval
    int64_t new_expiry_time = roundToNextInterval(now + timeout_ms);

-    if (session_it != session_to_timeout.end())
+    auto session_it = session_to_expiration_time.find(session_id);
+    /// We already registered this session
+    if (session_it != session_to_expiration_time.end())
    {
-        if (new_expiry_time == session_it->second)
-            return false;
+        int64_t prev_expiry_time = session_it->second;
+        session_it->second = new_expiry_time;
+        /// Nothing changed, session stay in the some bucket
+        if (new_expiry_time == prev_expiry_time)
+            return;

+        /// This bucket doesn't exist, let's create it
        auto set_it = expiry_to_sessions.find(new_expiry_time);
        if (set_it == expiry_to_sessions.end())
            std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());

+        /// Add session to the next bucket
        set_it->second.insert(session_id);
-        int64_t prev_expiry_time = session_it->second;

-        if (prev_expiry_time != new_expiry_time)
-        {
-            auto prev_set_it = expiry_to_sessions.find(prev_expiry_time);
-            if (prev_set_it != expiry_to_sessions.end())
-                prev_set_it->second.erase(session_id);
-        }
-        session_it->second = new_expiry_time;
-        return true;
+        auto prev_set_it = expiry_to_sessions.find(prev_expiry_time);
+        /// Remove session from previous bucket
+        if (prev_set_it != expiry_to_sessions.end())
+            prev_set_it->second.erase(session_id);
+
+        /// No more sessions in this bucket
+        if (prev_set_it->second.empty())
+            expiry_to_sessions.erase(prev_set_it);
    }
    else
    {
-        session_to_timeout[session_id] = new_expiry_time;
+        /// Just add sessions to the new bucket
+        session_to_expiration_time[session_id] = new_expiry_time;
+
        auto set_it = expiry_to_sessions.find(new_expiry_time);
        if (set_it == expiry_to_sessions.end())
            std::tie(set_it, std::ignore) = expiry_to_sessions.emplace(new_expiry_time, std::unordered_set<int64_t>());
+
        set_it->second.insert(session_id);
-        return false;
    }
 }

-std::unordered_set<int64_t> SessionExpiryQueue::getExpiredSessions()
+std::vector<int64_t> SessionExpiryQueue::getExpiredSessions() const
 {
    int64_t now = getNowMilliseconds();
-    if (now < next_expiration_time)
-        return {};
+    std::vector<int64_t> result;

-    auto set_it = expiry_to_sessions.find(next_expiration_time);
-    int64_t new_expiration_time = next_expiration_time + expiration_interval;
-    next_expiration_time = new_expiration_time;
-    if (set_it != expiry_to_sessions.end())
+    /// Check all buckets
+    for (const auto & [expire_time, expired_sessions] : expiry_to_sessions)
    {
-        auto result = set_it->second;
-        expiry_to_sessions.erase(set_it);
-        return result;
+        if (expire_time <= now)
+            result.insert(result.end(), expired_sessions.begin(), expired_sessions.end());
+        else
+            break;
    }
-    return {};
+
+    return result;
 }

 void SessionExpiryQueue::clear()
 {
-    session_to_timeout.clear();
+    session_to_expiration_time.clear();
    expiry_to_sessions.clear();
 }

--- a/src/Coordination/SessionExpiryQueue.h
+++ b/src/Coordination/SessionExpiryQueue.h
@ -1,19 +1,32 @@
 #pragma once
+#include <map>
 #include <unordered_map>
 #include <unordered_set>
+#include <vector>
 #include <chrono>

 namespace DB
 {

+/// Simple class for checking expired sessions. Main idea -- to round sessions
+/// timeouts and place all sessions into buckets rounded by their expired time.
+/// So we will have not too many different buckets and can check expired
+/// sessions quite fast.
+/// So buckets looks like this:
+/// [1630580418000] -> {1, 5, 6}
+/// [1630580418500] -> {2, 3}
+/// ...
+/// When new session appear it's added to the existing bucket or create new bucket.
 class SessionExpiryQueue
 {
 private:
-    std::unordered_map<int64_t, int64_t> session_to_timeout;
-    std::unordered_map<int64_t, std::unordered_set<int64_t>> expiry_to_sessions;
+    /// Session -> timeout ms
+    std::unordered_map<int64_t, int64_t> session_to_expiration_time;
+
+    /// Expire time -> session expire near this time
+    std::map<int64_t, std::unordered_set<int64_t>> expiry_to_sessions;

    int64_t expiration_interval;
-    int64_t next_expiration_time;

    static int64_t getNowMilliseconds()
    {
@ -21,23 +34,30 @@ private:
        return duration_cast<milliseconds>(system_clock::now().time_since_epoch()).count();
    }

+    /// Round time to the next expiration interval. The result used as a key for
+    /// expiry_to_sessions map.
    int64_t roundToNextInterval(int64_t time) const
    {
        return (time / expiration_interval + 1) * expiration_interval;
    }

 public:
+    /// expiration_interval -- how often we will check new sessions and how small
+    /// buckets we will have. In ZooKeeper normal session timeout is around 30 seconds
+    /// and expiration_interval is about 500ms.
    explicit SessionExpiryQueue(int64_t expiration_interval_)
        : expiration_interval(expiration_interval_)
-        , next_expiration_time(roundToNextInterval(getNowMilliseconds()))
    {
    }

+    /// Session was actually removed
    bool remove(int64_t session_id);

-    bool update(int64_t session_id, int64_t timeout_ms);
+    /// Update session expiry time (must be called on hearbeats)
+    void addNewSessionOrUpdate(int64_t session_id, int64_t timeout_ms);

-    std::unordered_set<int64_t> getExpiredSessions();
+    /// Get all expired sessions
+    std::vector<int64_t> getExpiredSessions() const;

    void clear();
 };
--- a/src/Coordination/tests/gtest_coordination.cpp
+++ b/src/Coordination/tests/gtest_coordination.cpp
@ -1318,6 +1318,7 @@ TEST(CoordinationTest, TestRotateIntervalChanges)
        }
    }

+
    EXPECT_TRUE(fs::exists("./logs/changelog_1_100.bin"));

    DB::KeeperLogStore changelog_1("./logs", 10, true);
@ -1347,6 +1348,7 @@ TEST(CoordinationTest, TestRotateIntervalChanges)
    }

    changelog_2.compact(105);
+
    EXPECT_FALSE(fs::exists("./logs/changelog_1_100.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_101_110.bin"));
    EXPECT_TRUE(fs::exists("./logs/changelog_111_117.bin"));
@ -1375,6 +1377,23 @@ TEST(CoordinationTest, TestRotateIntervalChanges)
    EXPECT_TRUE(fs::exists("./logs/changelog_142_146.bin"));
 }

+TEST(CoordinationTest, TestSessionExpiryQueue)
+{
+    using namespace Coordination;
+    SessionExpiryQueue queue(500);
+
+    queue.addNewSessionOrUpdate(1, 1000);
+
+    for (size_t i = 0; i < 2; ++i)
+    {
+        EXPECT_EQ(queue.getExpiredSessions(), std::vector<int64_t>({}));
+        std::this_thread::sleep_for(std::chrono::milliseconds(400));
+    }
+
+    std::this_thread::sleep_for(std::chrono::milliseconds(700));
+    EXPECT_EQ(queue.getExpiredSessions(), std::vector<int64_t>({1}));
+}
+

 int main(int argc, char ** argv)
 {
--- a/src/Core/PostgreSQLProtocol.h
+++ b/src/Core/PostgreSQLProtocol.h
@ -900,8 +900,7 @@ public:
        Messaging::MessageTransport & mt,
        const Poco::Net::SocketAddress & address)
    {
-        Authentication::Type user_auth_type = session.getAuthenticationType(user_name);
-
+        const Authentication::Type user_auth_type = session.getAuthenticationTypeOrLogInFailure(user_name);
        if (type_to_method.find(user_auth_type) != type_to_method.end())
        {
            type_to_method[user_auth_type]->authenticate(user_name, session, mt, address);
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -70,8 +70,8 @@ class IColumn;
    M(UInt64, idle_connection_timeout, 3600, "Close idle TCP connections after specified number of seconds.", 0) \
    M(UInt64, distributed_connections_pool_size, DBMS_DEFAULT_DISTRIBUTED_CONNECTIONS_POOL_SIZE, "Maximum number of connections with one remote server in the pool.", 0) \
    M(UInt64, connections_with_failover_max_tries, DBMS_CONNECTION_POOL_WITH_FAILOVER_DEFAULT_MAX_TRIES, "The maximum number of attempts to connect to replicas.", 0) \
-    M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
-    M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
+    M(UInt64, s3_min_upload_part_size, 32*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
+    M(UInt64, s3_max_single_part_upload_size, 32*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
    M(UInt64, s3_max_single_read_retries, 4, "The maximum number of retries during single S3 read.", 0) \
    M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
    M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@ -79,8 +79,8 @@ IMPLEMENT_SETTING_ENUM(LogsLevel, ErrorCodes::BAD_ARGUMENTS,
     {"warning",     LogsLevel::warning},
     {"information", LogsLevel::information},
     {"debug",       LogsLevel::debug},
-     {"trace",       LogsLevel::trace}})
-
+     {"trace",       LogsLevel::trace},
+     {"test",        LogsLevel::test}})

 IMPLEMENT_SETTING_ENUM_WITH_RENAME(LogQueriesType, ErrorCodes::BAD_ARGUMENTS,
    {{"QUERY_START",                QUERY_START},
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@ -94,6 +94,7 @@ enum class LogsLevel
    information,
    debug,
    trace,
+    test,
 };

 DECLARE_SETTING_ENUM(LogsLevel)
--- a/src/DataStreams/ShellCommandSource.h
+++ b/src/DataStreams/ShellCommandSource.h
@ -3,8 +3,11 @@
 #include <memory>

 #include <common/logger_useful.h>
+#include <common/BorrowedObjectPool.h>
+
 #include <Common/ShellCommand.h>
 #include <Common/ThreadPool.h>
+
 #include <IO/ReadHelpers.h>
 #include <Formats/FormatFactory.h>
 #include <Processors/ISimpleTransform.h>
@ -17,44 +20,85 @@
 namespace DB
 {

-/** A stream, that runs child process and sends data to its stdin in background thread,
-  * and receives data from its stdout.
+/** A stream, that get child process and sends data using tasks in background threads.
+  * For each send data task background thread is created. Send data task must send data to process input pipes.
+  * ShellCommandPoolSource receives data from process stdout.
+  *
+  * If process_pool is passed in constructor then after source is destroyed process is returned to pool.
  */
+
+using ProcessPool = BorrowedObjectPool<std::unique_ptr<ShellCommand>>;
+
+struct ShellCommandSourceConfiguration
+{
+    /// Read fixed number of rows from command output
+    bool read_fixed_number_of_rows = false;
+    /// Valid only if read_fixed_number_of_rows = true
+    bool read_number_of_rows_from_process_output = false;
+    /// Valid only if read_fixed_number_of_rows = true
+    size_t number_of_rows_to_read = 0;
+    /// Max block size
+    size_t max_block_size = DBMS_DEFAULT_BUFFER_SIZE;
+};
+
 class ShellCommandSource final : public SourceWithProgress
 {
 public:
-    using SendDataTask = std::function<void (void)>;
+
+    using SendDataTask = std::function<void(void)>;

    ShellCommandSource(
        ContextPtr context,
        const std::string & format,
        const Block & sample_block,
-        std::unique_ptr<ShellCommand> command_,
+        std::unique_ptr<ShellCommand> && command_,
        Poco::Logger * log_,
-        std::vector<SendDataTask> && send_data_tasks,
-        size_t max_block_size = DEFAULT_BLOCK_SIZE)
+        std::vector<SendDataTask> && send_data_tasks = {},
+        const ShellCommandSourceConfiguration & configuration_ = {},
+        std::shared_ptr<ProcessPool> process_pool_ = nullptr)
        : SourceWithProgress(sample_block)
        , command(std::move(command_))
+        , configuration(configuration_)
        , log(log_)
+        , process_pool(process_pool_)
    {
        for (auto && send_data_task : send_data_tasks)
-            send_data_threads.emplace_back([task = std::move(send_data_task)]() { task(); });
+        {
+            send_data_threads.emplace_back([task = std::move(send_data_task), this]()
+            {
+                try
+                {
+                    task();
+                }
+                catch (...)
+                {
+                    std::lock_guard<std::mutex> lock(send_data_lock);
+                    exception_during_send_data = std::current_exception();
+                }
+            });
+        }

-        pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size)));
-        executor = std::make_unique<PullingPipelineExecutor>(pipeline);
-    }
+        size_t max_block_size = configuration.max_block_size;
+
+        if (configuration.read_fixed_number_of_rows)
+        {
+            /** Currently parallel parsing input format cannot read exactly max_block_size rows from input,
+              * so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof.
+              */
+            auto context_for_reading = Context::createCopy(context);
+            context_for_reading->setSetting("input_format_parallel_parsing", false);
+            context = context_for_reading;
+
+            if (configuration.read_number_of_rows_from_process_output)
+            {
+                readText(configuration.number_of_rows_to_read, command->out);
+                char dummy;
+                readChar(dummy, command->out);
+            }
+
+            max_block_size = configuration.number_of_rows_to_read;
+        }

-    ShellCommandSource(
-        ContextPtr context,
-        const std::string & format,
-        const Block & sample_block,
-        std::unique_ptr<ShellCommand> command_,
-        Poco::Logger * log_,
-        size_t max_block_size = DEFAULT_BLOCK_SIZE)
-        : SourceWithProgress(sample_block)
-        , command(std::move(command_))
-        , log(log_)
-    {
        pipeline.init(Pipe(FormatFactory::instance().getInput(format, command->out, sample_block, context, max_block_size)));
        executor = std::make_unique<PullingPipelineExecutor>(pipeline);
    }
@ -64,33 +108,50 @@ public:
        for (auto & thread : send_data_threads)
            if (thread.joinable())
                thread.join();
+
+        if (command && process_pool)
+            process_pool->returnObject(std::move(command));
    }

 protected:
+
    Chunk generate() override
    {
+        rethrowExceptionDuringSendDataIfNeeded();
+
+        if (configuration.read_fixed_number_of_rows && configuration.number_of_rows_to_read == current_read_rows)
+            return {};
+
        Chunk chunk;
-        executor->pull(chunk);
+
+        try
+        {
+            if (!executor->pull(chunk))
+                return {};
+
+            current_read_rows += chunk.getNumRows();
+        }
+        catch (...)
+        {
+            tryLogCurrentException(log);
+            command = nullptr;
+            throw;
+        }
+
        return chunk;
    }

-public:
    Status prepare() override
    {
        auto status = SourceWithProgress::prepare();

        if (status == Status::Finished)
        {
-            std::string err;
-            readStringUntilEOF(err, command->err);
-            if (!err.empty())
-                LOG_ERROR(log, "Having stderr: {}", err);
-
            for (auto & thread : send_data_threads)
                if (thread.joinable())
                    thread.join();

-            command->wait();
+            rethrowExceptionDuringSendDataIfNeeded();
        }

        return status;
@ -100,11 +161,30 @@ public:

 private:

+    void rethrowExceptionDuringSendDataIfNeeded()
+    {
+        std::lock_guard<std::mutex> lock(send_data_lock);
+        if (exception_during_send_data)
+        {
+            command = nullptr;
+            std::rethrow_exception(exception_during_send_data);
+        }
+    }
+
+    std::unique_ptr<ShellCommand> command;
+    ShellCommandSourceConfiguration configuration;
+
+    size_t current_read_rows = 0;
+
+    Poco::Logger * log;
+
+    std::shared_ptr<ProcessPool> process_pool;
+
    QueryPipeline pipeline;
    std::unique_ptr<PullingPipelineExecutor> executor;
-    std::unique_ptr<ShellCommand> command;
-    std::vector<ThreadFromGlobalPool> send_data_threads;
-    Poco::Logger * log;
-};

+    std::vector<ThreadFromGlobalPool> send_data_threads;
+    std::mutex send_data_lock;
+    std::exception_ptr exception_during_send_data;
+};
 }
--- a/src/DataTypes/NestedUtils.cpp
+++ b/src/DataTypes/NestedUtils.cpp
@ -141,7 +141,7 @@ NamesAndTypesList collect(const NamesAndTypesList & names_and_types)
    auto nested_types = getSubcolumnsOfNested(names_and_types);

    for (const auto & name_type : names_and_types)
-        if (!nested_types.count(splitName(name_type.name).first))
+        if (!isArray(name_type.type) || !nested_types.count(splitName(name_type.name).first))
            res.push_back(name_type);

    for (const auto & name_type : nested_types)
@ -157,6 +157,9 @@ NamesAndTypesList convertToSubcolumns(const NamesAndTypesList & names_and_types)

    for (auto & name_type : res)
    {
+        if (!isArray(name_type.type))
+            continue;
+
        auto split = splitName(name_type.name);
        if (name_type.isSubcolumn() || split.second.empty())
            continue;
--- a/src/DataTypes/tests/gtest_NestedUtils.cpp
+++ b/src/DataTypes/tests/gtest_NestedUtils.cpp
@ -0,0 +1,43 @@
+#include <DataTypes/NestedUtils.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeNested.h>
+#include <gtest/gtest.h>
+
+using namespace DB;
+
+GTEST_TEST(NestedUtils, collect)
+{
+    DataTypePtr uint_type = std::make_shared<DataTypeUInt32>();
+    DataTypePtr array_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt32>());
+
+    const NamesAndTypesList source_columns =
+    {
+        {"id", uint_type},
+        {"arr1", array_type},
+        {"b.id", uint_type},
+        {"b.arr1", array_type},
+        {"b.arr2", array_type}
+    };
+
+    auto nested_type = createNested({uint_type, uint_type}, {"arr1", "arr2"});
+    const NamesAndTypesList columns_with_subcolumns =
+    {
+        {"id", uint_type},
+        {"arr1", array_type},
+        {"b.id", uint_type},
+        {"b", "arr1", nested_type, array_type},
+        {"b", "arr2", nested_type, array_type}
+    };
+
+    const NamesAndTypesList columns_with_nested =
+    {
+        {"id", uint_type},
+        {"arr1", array_type},
+        {"b.id", uint_type},
+        {"b", nested_type},
+    };
+
+    ASSERT_EQ(Nested::convertToSubcolumns(source_columns).toString(), columns_with_subcolumns.toString());
+    ASSERT_EQ(Nested::collect(source_columns).toString(), columns_with_nested.toString());
+}
--- a/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
+++ b/src/Databases/MySQL/MaterializedMySQLSyncThread.cpp
@ -247,7 +247,7 @@ void MaterializedMySQLSyncThread::assertMySQLAvailable()
 {
    try
    {
-        checkMySQLVariables(pool.get(), getContext()->getSettingsRef());
+        checkMySQLVariables(pool.get(/* wait_timeout= */ UINT64_MAX), getContext()->getSettingsRef());
    }
    catch (const mysqlxx::ConnectionFailed & e)
    {
@ -729,7 +729,7 @@ void MaterializedMySQLSyncThread::onEvent(Buffers & buffers, const BinlogEventPt
        {
            /// Some behaviors(such as changing the value of "binlog_checksum") rotate the binlog file.
            /// To ensure that the synchronization continues, we need to handle these events
-            metadata.fetchMasterVariablesValue(pool.get());
+            metadata.fetchMasterVariablesValue(pool.get(/* wait_timeout= */ UINT64_MAX));
            client.setBinlogChecksum(metadata.binlog_checksum);
        }
        else if (receive_event->header.type != HEARTBEAT_EVENT)
--- a/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp
+++ b/src/Databases/PostgreSQL/DatabaseMaterializedPostgreSQL.cpp
@ -61,10 +61,8 @@ void DatabaseMaterializedPostgreSQL::startSynchronization()
            connection_info,
            getContext(),
            is_attach,
-            settings->materialized_postgresql_max_block_size.value,
-            settings->materialized_postgresql_allow_automatic_update,
-            /* is_materialized_postgresql_database = */ true,
-            settings->materialized_postgresql_tables_list.value);
+            *settings,
+            /* is_materialized_postgresql_database = */ true);

    postgres::Connection connection(connection_info);
    NameSet tables_to_replicate;
--- a/src/Dictionaries/ExecutableDictionarySource.cpp
+++ b/src/Dictionaries/ExecutableDictionarySource.cpp
@ -1,6 +1,5 @@
 #include "ExecutableDictionarySource.h"

-#include <functional>
 #include <common/logger_useful.h>
 #include <common/LocalDateTime.h>
 #include <Common/ShellCommand.h>
@ -125,8 +124,7 @@ Pipe ExecutableDictionarySource::getStreamForBlock(const Block & block)
        formatBlock(output_stream, block);
        out.close();
    }};
-
-    std::vector<ShellCommandSource::SendDataTask> tasks = {task};
+    std::vector<ShellCommandSource::SendDataTask> tasks = {std::move(task)};

    Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process), log, std::move(tasks)));

--- a/src/Dictionaries/ExecutableDictionarySource.h
+++ b/src/Dictionaries/ExecutableDictionarySource.h
@ -1,11 +1,12 @@
 #pragma once

-#include "DictionaryStructure.h"
-#include "IDictionarySource.h"
+#include <common/logger_useful.h>
+
 #include <Core/Block.h>
 #include <Interpreters/Context.h>

-namespace Poco { class Logger; }
+#include <Dictionaries/IDictionarySource.h>
+#include <Dictionaries/DictionaryStructure.h>


 namespace DB
--- a/src/Dictionaries/ExecutablePoolDictionarySource.cpp
+++ b/src/Dictionaries/ExecutablePoolDictionarySource.cpp
@ -1,24 +1,18 @@
 #include "ExecutablePoolDictionarySource.h"

-#include <functional>
-#include <common/scope_guard.h>
-#include <Processors/Sources/SourceWithProgress.h>
-#include <Processors/Executors/PullingPipelineExecutor.h>
-#include <Processors/QueryPipeline.h>
+#include <common/logger_useful.h>
+#include <common/LocalDateTime.h>
+#include <Common/ShellCommand.h>
+
 #include <DataStreams/formatBlock.h>
+
 #include <Interpreters/Context.h>
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
-#include <Formats/FormatFactory.h>
-#include <Processors/Formats/IInputFormat.h>
-#include <Common/ShellCommand.h>
-#include <Common/ThreadPool.h>
-#include <common/logger_useful.h>
-#include <common/LocalDateTime.h>
-#include "DictionarySourceFactory.h"
-#include "DictionarySourceHelpers.h"
-#include "DictionaryStructure.h"
-#include "registerDictionaries.h"
+
+#include <Dictionaries/DictionarySourceFactory.h>
+#include <Dictionaries/DictionarySourceHelpers.h>
+#include <Dictionaries/DictionaryStructure.h>


 namespace DB
@ -37,13 +31,13 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(
    const Configuration & configuration_,
    Block & sample_block_,
    ContextPtr context_)
-    : log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
-    , dict_struct{dict_struct_}
-    , configuration{configuration_}
-    , sample_block{sample_block_}
-    , context{context_}
+    : dict_struct(dict_struct_)
+    , configuration(configuration_)
+    , sample_block(sample_block_)
+    , context(context_)
    /// If pool size == 0 then there is no size restrictions. Poco max size of semaphore is integer type.
-    , process_pool{std::make_shared<ProcessPool>(configuration.pool_size == 0 ? std::numeric_limits<int>::max() : configuration.pool_size)}
+    , process_pool(std::make_shared<ProcessPool>(configuration.pool_size == 0 ? std::numeric_limits<int>::max() : configuration.pool_size))
+    , log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
 {
    /// Remove keys from sample_block for implicit_key dictionary because
    /// these columns will not be returned from source
@ -62,13 +56,12 @@ ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(
 }

 ExecutablePoolDictionarySource::ExecutablePoolDictionarySource(const ExecutablePoolDictionarySource & other)
-    : log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
-    , update_time{other.update_time}
-    , dict_struct{other.dict_struct}
-    , configuration{other.configuration}
-    , sample_block{other.sample_block}
-    , context{Context::createCopy(other.context)}
-    , process_pool{std::make_shared<ProcessPool>(configuration.pool_size)}
+    : dict_struct(other.dict_struct)
+    , configuration(other.configuration)
+    , sample_block(other.sample_block)
+    , context(Context::createCopy(other.context))
+    , process_pool(std::make_shared<ProcessPool>(configuration.pool_size))
+    , log(&Poco::Logger::get("ExecutablePoolDictionarySource"))
 {
 }

@ -82,123 +75,6 @@ Pipe ExecutablePoolDictionarySource::loadUpdatedAll()
    throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "ExecutablePoolDictionarySource does not support loadUpdatedAll method");
 }

-namespace
-{
-    /** A stream, that runs child process and sends data to its stdin in background thread,
-      *  and receives data from its stdout.
-      */
-    class PoolSourceWithBackgroundThread final : public SourceWithProgress
-    {
-    public:
-        PoolSourceWithBackgroundThread(
-            std::shared_ptr<ProcessPool> process_pool_,
-            std::unique_ptr<ShellCommand> && command_,
-            Pipe pipe,
-            size_t read_rows_,
-            Poco::Logger * log_,
-            std::function<void(WriteBufferFromFile &)> && send_data_)
-            : SourceWithProgress(pipe.getHeader())
-            , process_pool(process_pool_)
-            , command(std::move(command_))
-            , rows_to_read(read_rows_)
-            , log(log_)
-            , send_data(std::move(send_data_))
-            , thread([this]
-            {
-                try
-                {
-                    send_data(command->in);
-                }
-                catch (...)
-                {
-                    std::lock_guard<std::mutex> lck(exception_during_read_lock);
-                    exception_during_read = std::current_exception();
-                }
-            })
-        {
-            pipeline.init(std::move(pipe));
-            executor = std::make_unique<PullingPipelineExecutor>(pipeline);
-        }
-
-        ~PoolSourceWithBackgroundThread() override
-        {
-            if (thread.joinable())
-                thread.join();
-
-            if (command)
-                process_pool->returnObject(std::move(command));
-        }
-
-    protected:
-        Chunk generate() override
-        {
-            rethrowExceptionDuringReadIfNeeded();
-
-            if (current_read_rows == rows_to_read)
-                return {};
-
-            Chunk chunk;
-
-            try
-            {
-                if (!executor->pull(chunk))
-                    return {};
-
-                current_read_rows += chunk.getNumRows();
-            }
-            catch (...)
-            {
-                tryLogCurrentException(log);
-                command = nullptr;
-                throw;
-            }
-
-            return chunk;
-        }
-
-    public:
-        Status prepare() override
-        {
-            auto status = SourceWithProgress::prepare();
-
-            if (status == Status::Finished)
-            {
-                if (thread.joinable())
-                    thread.join();
-
-                rethrowExceptionDuringReadIfNeeded();
-            }
-
-            return status;
-        }
-
-        void rethrowExceptionDuringReadIfNeeded()
-        {
-            std::lock_guard<std::mutex> lck(exception_during_read_lock);
-            if (exception_during_read)
-            {
-                command = nullptr;
-                std::rethrow_exception(exception_during_read);
-            }
-        }
-
-        String getName() const override { return "PoolWithBackgroundThread"; }
-
-        std::shared_ptr<ProcessPool> process_pool;
-        std::unique_ptr<ShellCommand> command;
-        QueryPipeline pipeline;
-        std::unique_ptr<PullingPipelineExecutor> executor;
-        size_t rows_to_read;
-        Poco::Logger * log;
-        std::function<void(WriteBufferFromFile &)> send_data;
-        ThreadFromGlobalPool thread;
-        size_t current_read_rows = 0;
-        std::mutex exception_during_read_lock;
-        std::exception_ptr exception_during_read;
-    };
-
-}
-
 Pipe ExecutablePoolDictionarySource::loadIds(const std::vector<UInt64> & ids)
 {
    LOG_TRACE(log, "loadIds {} size = {}", toString(), ids.size());
@ -228,19 +104,23 @@ Pipe ExecutablePoolDictionarySource::getStreamForBlock(const Block & block)

    if (!result)
        throw Exception(ErrorCodes::TIMEOUT_EXCEEDED,
-            "Could not get process from pool, max command execution timeout exceeded ({}) seconds",
+            "Could not get process from pool, max command execution timeout exceeded {} seconds",
            configuration.max_command_execution_time);

    size_t rows_to_read = block.rows();
-    auto format = FormatFactory::instance().getInput(configuration.format, process->out, sample_block, context, rows_to_read);
+    auto * process_in = &process->in;
+    ShellCommandSource::SendDataTask task = [process_in, block, this]() mutable
+    {
+        auto & out = *process_in;
+        auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty());
+        formatBlock(output_stream, block);
+    };
+    std::vector<ShellCommandSource::SendDataTask> tasks = {std::move(task)};

-    Pipe pipe(std::make_unique<PoolSourceWithBackgroundThread>(
-        process_pool, std::move(process), Pipe(std::move(format)), rows_to_read, log,
-        [block, this](WriteBufferFromFile & out) mutable
-        {
-            auto output_stream = context->getOutputStream(configuration.format, out, block.cloneEmpty());
-            formatBlock(output_stream, block);
-        }));
+    ShellCommandSourceConfiguration command_configuration;
+    command_configuration.read_fixed_number_of_rows = true;
+    command_configuration.number_of_rows_to_read = rows_to_read;
+    Pipe pipe(std::make_unique<ShellCommandSource>(context, configuration.format, sample_block, std::move(process), log, std::move(tasks), command_configuration, process_pool));

    if (configuration.implicit_key)
        pipe.addTransform(std::make_shared<TransformWithAdditionalColumns>(block, pipe.getHeader()));
@ -294,11 +174,6 @@ void registerDictionarySourceExecutablePool(DictionarySourceFactory & factory)

        ContextMutablePtr context = copyContextAndApplySettingsFromDictionaryConfig(global_context, config, config_prefix);

-        /** Currently parallel parsing input format cannot read exactly max_block_size rows from input,
-         *  so it will be blocked on ReadBufferFromFileDescriptor because this file descriptor represent pipe that does not have eof.
-         */
-        context->setSetting("input_format_parallel_parsing", false);
-
        String settings_config_prefix = config_prefix + ".executable_pool";

        size_t max_command_execution_time = config.getUInt64(settings_config_prefix + ".max_command_execution_time", 10);
--- a/src/Dictionaries/ExecutablePoolDictionarySource.h
+++ b/src/Dictionaries/ExecutablePoolDictionarySource.h
@ -1,20 +1,18 @@
 #pragma once

-#include <common/BorrowedObjectPool.h>
+#include <common/logger_useful.h>

 #include <Core/Block.h>
 #include <Interpreters/Context.h>

-#include "IDictionarySource.h"
-#include "DictionaryStructure.h"
-
-namespace Poco { class Logger; }
+#include <Dictionaries/IDictionarySource.h>
+#include <Dictionaries/DictionaryStructure.h>
+#include <DataStreams/ShellCommandSource.h>


 namespace DB
 {

-using ProcessPool = BorrowedObjectPool<std::unique_ptr<ShellCommand>>;

 /** ExecutablePoolDictionarySource allows loading data from pool of processes.
  * When client requests ids or keys source get process from ProcessPool
@ -73,14 +71,13 @@ public:
    Pipe getStreamForBlock(const Block & block);

 private:
-    Poco::Logger * log;
-    time_t update_time = 0;
    const DictionaryStructure dict_struct;
    const Configuration configuration;

    Block sample_block;
    ContextPtr context;
    std::shared_ptr<ProcessPool> process_pool;
+    Poco::Logger * log;
 };

 }
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@ -1422,16 +1422,32 @@ public:

    Monotonicity getMonotonicityForRange(const IDataType &, const Field & left_point, const Field & right_point) const override
    {
-        // For simplicity, we treat null values as monotonicity breakers.
+        const std::string_view name_view = Name::name;
+
+        // For simplicity, we treat null values as monotonicity breakers, except for variable / non-zero constant.
        if (left_point.isNull() || right_point.isNull())
+        {
+            if (name_view == "divide" || name_view == "intDiv")
+            {
+                // variable / constant
+                if (right.column && isColumnConst(*right.column))
+                {
+                    auto constant = (*right.column)[0];
+                    if (applyVisitor(FieldVisitorAccurateEquals(), constant, Field(0)))
+                        return {false, true, false}; // variable / 0 is undefined, let's treat it as non-monotonic
+                    bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
+
+                    // division is saturated to `inf`, thus it doesn't have overflow issues.
+                    return {true, is_constant_positive, true};
+                }
+            }
            return {false, true, false};
+        }

        // For simplicity, we treat every single value interval as positive monotonic.
        if (applyVisitor(FieldVisitorAccurateEquals(), left_point, right_point))
            return {true, true, false};

-        const std::string_view name_view = Name::name;
-
        if (name_view == "minus" || name_view == "plus")
        {
            // const +|- variable
@ -1503,14 +1519,14 @@ public:
                    return {true, true, false}; // 0 / 0 is undefined, thus it's not always monotonic

                bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
-                if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0)) &&
-                        applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0)))
+                if (applyVisitor(FieldVisitorAccurateLess(), left_point, Field(0))
+                    && applyVisitor(FieldVisitorAccurateLess(), right_point, Field(0)))
                {
                    return {true, is_constant_positive, false};
                }
-                else
-                if (applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point) &&
-                        applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point))
+                else if (
+                    applyVisitor(FieldVisitorAccurateLess(), Field(0), left_point)
+                    && applyVisitor(FieldVisitorAccurateLess(), Field(0), right_point))
                {
                    return {true, !is_constant_positive, false};
                }
@ -1524,7 +1540,7 @@ public:

                bool is_constant_positive = applyVisitor(FieldVisitorAccurateLess(), Field(0), constant);
                // division is saturated to `inf`, thus it doesn't have overflow issues.
-                return {true, is_constant_positive, false};
+                return {true, is_constant_positive, true};
            }
        }
        return {false, true, false};
--- a/src/Functions/FunctionsExternalDictionaries.h
+++ b/src/Functions/FunctionsExternalDictionaries.h
@ -104,9 +104,11 @@ public:

        const auto * attr_name_col = checkAndGetColumnConst<ColumnString>(sample_columns.getByPosition(1).column.get());
        if (!attr_name_col)
-            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet... must be a constant string");
+            throw Exception(ErrorCodes::ILLEGAL_COLUMN, "Second argument of function dictGet must be a constant string");

-        return getDictionary(dict_name_col->getValue<String>())->isInjective(attr_name_col->getValue<String>());
+        const auto dictionary_name = dict_name_col->getValue<String>();
+        const auto attribute_name = attr_name_col->getValue<String>();
+        return getDictionary(dictionary_name)->isInjective(attribute_name);
    }

    DictionaryStructure getDictionaryStructure(const String & dictionary_name) const
@ -321,21 +323,32 @@ public:

        Strings attribute_names = getAttributeNamesFromColumn(arguments[1].column, arguments[1].type);

-        DataTypes types;
-
        auto dictionary_structure = helper.getDictionaryStructure(dictionary_name);

+        DataTypes attribute_types;
+        attribute_types.reserve(attribute_names.size());
        for (auto & attribute_name : attribute_names)
        {
            /// We're extracting the return type from the dictionary's config, without loading the dictionary.
-            auto attribute = dictionary_structure.getAttribute(attribute_name);
-            types.emplace_back(attribute.type);
+            const auto & attribute = dictionary_structure.getAttribute(attribute_name);
+            attribute_types.emplace_back(attribute.type);
        }

-        if (types.size() > 1)
-            return std::make_shared<DataTypeTuple>(types, attribute_names);
+        bool key_is_nullable = arguments[2].type->isNullable();
+        if (attribute_types.size() > 1)
+        {
+            if (key_is_nullable)
+                throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Function {} support nullable key only for single dictionary attribute", getName());
+
+            return std::make_shared<DataTypeTuple>(attribute_types, attribute_names);
+        }
        else
-            return types.front();
+        {
+            if (key_is_nullable)
+                return makeNullable(attribute_types.front());
+            else
+                return attribute_types.front();
+        }
    }

    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
@ -418,7 +431,9 @@ public:
                default_cols = tuple_column->getColumnsCopy();
            }
            else
+            {
                default_cols.emplace_back(result);
+            }
        }
        else
        {
@ -426,7 +441,16 @@ public:
                default_cols.emplace_back(nullptr);
        }

-        const auto & key_col_with_type = arguments[2];
+        auto key_col_with_type = arguments[2];
+
+        bool key_is_only_null = key_col_with_type.type->onlyNull();
+        if (key_is_only_null)
+            return result_type->createColumnConstWithDefaultValue(input_rows_count);
+
+        bool key_is_nullable = key_col_with_type.type->isNullable();
+        if (key_is_nullable)
+            key_col_with_type = columnGetNested(key_col_with_type);
+
        auto key_column = key_col_with_type.column;

        Columns key_columns;
@ -482,7 +506,26 @@ public:
            key_types.emplace_back(range_col_type);
        }

-        return executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, result_type, default_cols);
+        DataTypePtr attribute_type = result_type;
+        if (key_is_nullable)
+        {
+            DataTypes attribute_types;
+            attribute_types.reserve(attribute_names.size());
+            for (auto & attribute_name : attribute_names)
+            {
+                const auto & attribute = dictionary->getStructure().getAttribute(attribute_name);
+                attribute_types.emplace_back(attribute.type);
+            }
+
+            attribute_type = attribute_types.front();
+        }
+
+        auto result_column = executeDictionaryRequest(dictionary, attribute_names, key_columns, key_types, attribute_type, default_cols);
+
+        if (key_is_nullable)
+            result_column = wrapInNullable(result_column, {arguments[2]}, result_type, input_rows_count);
+
+        return result_column;
    }

 private:
@ -511,12 +554,14 @@ private:
            result = ColumnTuple::create(std::move(result_columns));
        }
        else
+        {
            result = dictionary->getColumn(
                attribute_names[0],
                result_type,
                key_columns,
                key_types,
                default_cols.front());
+        }

        return result;
    }
@ -526,7 +571,9 @@ private:
        Strings attribute_names;

        if (const auto * name_col = checkAndGetColumnConst<ColumnString>(column.get()))
+        {
            attribute_names.emplace_back(name_col->getValue<String>());
+        }
        else if (const auto * tuple_col_const = checkAndGetColumnConst<ColumnTuple>(column.get()))
        {
            const ColumnTuple & tuple_col = assert_cast<const ColumnTuple &>(tuple_col_const->getDataColumn());
@ -551,10 +598,12 @@ private:
            }
        }
        else
+        {
            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
                "Illegal type {} of second argument of function {}, expected a const string or const tuple of const strings.",
                type->getName(),
                getName());
+        }

        return attribute_names;
    }
--- a/src/Functions/s2RectAdd.cpp
+++ b/src/Functions/s2RectAdd.cpp
@ -41,7 +41,7 @@ public:
        return name;
    }

-    size_t getNumberOfArguments() const override { return 4; }
+    size_t getNumberOfArguments() const override { return 3; }

    bool useDefaultImplementationForConstants() const override { return true; }

--- a/src/Functions/s2RectContains.cpp
+++ b/src/Functions/s2RectContains.cpp
@ -41,7 +41,7 @@ public:
        return name;
    }

-    size_t getNumberOfArguments() const override { return 4; }
+    size_t getNumberOfArguments() const override { return 3; }

    bool useDefaultImplementationForConstants() const override { return true; }

--- a/src/IO/WriteBufferFromFile.cpp
+++ b/src/IO/WriteBufferFromFile.cpp
@ -32,7 +32,7 @@ WriteBufferFromFile::WriteBufferFromFile(
    mode_t mode,
    char * existing_memory,
    size_t alignment)
-    : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment), file_name(file_name_)
+    : WriteBufferFromFileDescriptor(-1, buf_size, existing_memory, alignment, file_name_)
 {
    ProfileEvents::increment(ProfileEvents::FileOpen);

@ -65,9 +65,7 @@ WriteBufferFromFile::WriteBufferFromFile(
    size_t buf_size,
    char * existing_memory,
    size_t alignment)
-    :
-    WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment),
-    file_name(original_file_name.empty() ? "(fd = " + toString(fd_) + ")" : original_file_name)
+    : WriteBufferFromFileDescriptor(fd_, buf_size, existing_memory, alignment, original_file_name)
 {
    fd_ = -1;
 }
--- a/src/IO/WriteBufferFromFile.h
+++ b/src/IO/WriteBufferFromFile.h
@ -25,7 +25,6 @@ namespace DB
 class WriteBufferFromFile : public WriteBufferFromFileDescriptor
 {
 protected:
-    std::string file_name;
    CurrentMetrics::Increment metric_increment{CurrentMetrics::OpenFileForWrite};

 public:
--- a/src/IO/WriteBufferFromFileDescriptor.cpp
+++ b/src/IO/WriteBufferFromFileDescriptor.cpp
@ -61,7 +61,12 @@ void WriteBufferFromFileDescriptor::nextImpl()
        if ((-1 == res || 0 == res) && errno != EINTR)
        {
            ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteFailed);
-            throwFromErrnoWithPath("Cannot write to file " + getFileName(), getFileName(),
+
+            /// Don't use getFileName() here because this method can be called from destructor
+            String error_file_name = file_name;
+            if (error_file_name.empty())
+                error_file_name = "(fd = " + toString(fd) + ")";
+            throwFromErrnoWithPath("Cannot write to file " + error_file_name, error_file_name,
                                   ErrorCodes::CANNOT_WRITE_TO_FILE_DESCRIPTOR);
        }

@ -73,20 +78,20 @@ void WriteBufferFromFileDescriptor::nextImpl()
    ProfileEvents::increment(ProfileEvents::WriteBufferFromFileDescriptorWriteBytes, bytes_written);
 }

-
-/// Name or some description of file.
-std::string WriteBufferFromFileDescriptor::getFileName() const
-{
-    return "(fd = " + toString(fd) + ")";
-}
-
-
+/// NOTE: This class can be used as a very low-level building block, for example
+/// in trace collector. In such places allocations of memory can be dangerous,
+/// so don't allocate anything in this constructor.
 WriteBufferFromFileDescriptor::WriteBufferFromFileDescriptor(
    int fd_,
    size_t buf_size,
    char * existing_memory,
-    size_t alignment)
-    : WriteBufferFromFileBase(buf_size, existing_memory, alignment), fd(fd_) {}
+    size_t alignment,
+    std::string file_name_)
+    : WriteBufferFromFileBase(buf_size, existing_memory, alignment)
+    , fd(fd_)
+    , file_name(std::move(file_name_))
+{
+}


 WriteBufferFromFileDescriptor::~WriteBufferFromFileDescriptor()
@ -115,7 +120,7 @@ void WriteBufferFromFileDescriptor::sync()
 }


-off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence)
+off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence) // NOLINT
 {
    off_t res = lseek(fd, offset, whence);
    if (-1 == res)
@ -125,7 +130,7 @@ off_t WriteBufferFromFileDescriptor::seek(off_t offset, int whence)
 }


-void WriteBufferFromFileDescriptor::truncate(off_t length)
+void WriteBufferFromFileDescriptor::truncate(off_t length) // NOLINT
 {
    int res = ftruncate(fd, length);
    if (-1 == res)
@ -133,7 +138,7 @@ void WriteBufferFromFileDescriptor::truncate(off_t length)
 }


-off_t WriteBufferFromFileDescriptor::size()
+off_t WriteBufferFromFileDescriptor::size() const
 {
    struct stat buf;
    int res = fstat(fd, &buf);
@ -142,4 +147,13 @@ off_t WriteBufferFromFileDescriptor::size()
    return buf.st_size;
 }

+std::string WriteBufferFromFileDescriptor::getFileName() const
+{
+    if (file_name.empty())
+        return "(fd = " + toString(fd) + ")";
+
+    return file_name;
+}
+
+
 }
--- a/src/IO/WriteBufferFromFileDescriptor.h
+++ b/src/IO/WriteBufferFromFileDescriptor.h
@ -13,17 +13,17 @@ class WriteBufferFromFileDescriptor : public WriteBufferFromFileBase
 protected:
    int fd;

+    /// If file has name contains filename, otherwise contains string "(fd=...)"
+    std::string file_name;
+
    void nextImpl() override;
-
-    /// Name or some description of file.
-    std::string getFileName() const override;
-
 public:
    WriteBufferFromFileDescriptor(
        int fd_ = -1,
        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE,
        char * existing_memory = nullptr,
-        size_t alignment = 0);
+        size_t alignment = 0,
+        std::string file_name_ = "");

    /** Could be used before initialization if needed 'fd' was not passed to constructor.
      * It's not possible to change 'fd' during work.
@ -42,10 +42,15 @@ public:

    void sync() override;

-    off_t seek(off_t offset, int whence);
-    void truncate(off_t length);
+    /// clang-tidy wants these methods to be const, but
+    /// they are not const semantically
+    off_t seek(off_t offset, int whence); // NOLINT
+    void truncate(off_t length); // NOLINT

-    off_t size();
+    /// Name or some description of file.
+    std::string getFileName() const override;
+
+    off_t size() const;
 };

 }
--- a/src/IO/ZstdInflatingReadBuffer.cpp
+++ b/src/IO/ZstdInflatingReadBuffer.cpp
@ -28,41 +28,49 @@ ZstdInflatingReadBuffer::~ZstdInflatingReadBuffer()

 bool ZstdInflatingReadBuffer::nextImpl()
 {
-    if (eof)
-        return false;
-
-    if (input.pos >= input.size)
+    do
    {
-        in->nextIfAtEnd();
-        input.src = reinterpret_cast<unsigned char *>(in->position());
-        input.pos = 0;
-        input.size = in->buffer().end() - in->position();
-    }
+        // If it is known that end of file was reached, return false
+        if (eof)
+            return false;

-    output.dst = reinterpret_cast<unsigned char *>(internal_buffer.begin());
-    output.size = internal_buffer.size();
-    output.pos = 0;
+        /// If end was reached, get next part
+        if (input.pos >= input.size)
+        {
+            in->nextIfAtEnd();
+            input.src = reinterpret_cast<unsigned char *>(in->position());
+            input.pos = 0;
+            input.size = in->buffer().end() - in->position();
+        }

-    size_t ret = ZSTD_decompressStream(dctx, &output, &input);
-    if (ZSTD_isError(ret))
-        throw Exception(
-            ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream decoding failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING);
+        /// fill output
+        output.dst = reinterpret_cast<unsigned char *>(internal_buffer.begin());
+        output.size = internal_buffer.size();
+        output.pos = 0;

-    in->position() = in->buffer().begin() + input.pos;
-    working_buffer.resize(output.pos);
+        /// Decompress data and check errors.
+        size_t ret = ZSTD_decompressStream(dctx, &output, &input);
+        if (ZSTD_isError(ret))
+            throw Exception(
+                ErrorCodes::ZSTD_DECODER_FAILED, "Zstd stream decoding failed: error code: {}; zstd version: {}", ret, ZSTD_VERSION_STRING);

-    if (in->eof())
-    {
-        eof = true;
-        return !working_buffer.empty();
-    }
-    else if (output.pos == 0)
-    {
+        /// Check that something has changed after decompress (input or output position)
+        assert(output.pos > 0 || in->position() < in->buffer().begin() + input.pos);
+
+        /// move position to the end of read data
+        in->position() = in->buffer().begin() + input.pos;
+        working_buffer.resize(output.pos);
+
+        /// If end of file is reached, fill eof variable and return true if there is some data in buffer, otherwise return false
+        if (in->eof())
+        {
+            eof = true;
+            return !working_buffer.empty();
+        }
        /// It is possible, that input buffer is not at eof yet, but nothing was decompressed in current iteration.
        /// But there are cases, when such behaviour is not allowed - i.e. if input buffer is not eof, then
        /// it has to be guaranteed that working_buffer is not empty. So if it is empty, continue.
-        return nextImpl();
-    }
+    } while (output.pos == 0);

    return true;
 }
--- a/src/IO/ya.make
+++ b/src/IO/ya.make
@ -5,6 +5,7 @@ LIBRARY()

 ADDINCL(
    contrib/libs/zstd/include
+    contrib/libs/lz4
    contrib/restricted/fast_float/include
 )

@ -14,10 +15,10 @@ PEERDIR(
    contrib/libs/brotli/enc
    contrib/libs/poco/NetSSL_OpenSSL
    contrib/libs/zstd
+    contrib/libs/lz4
    contrib/restricted/fast_float
 )

-
 SRCS(
    AIO.cpp
    AsynchronousReadBufferFromFile.cpp
--- a/src/IO/ya.make.in
+++ b/src/IO/ya.make.in
@ -4,6 +4,7 @@ LIBRARY()

 ADDINCL(
    contrib/libs/zstd/include
+    contrib/libs/lz4
    contrib/restricted/fast_float/include
 )

@ -13,10 +14,10 @@ PEERDIR(
    contrib/libs/brotli/enc
    contrib/libs/poco/NetSSL_OpenSSL
    contrib/libs/zstd
+    contrib/libs/lz4
    contrib/restricted/fast_float
 )

-
 SRCS(
 <? find . -name '*.cpp' | grep -v -F tests | grep -v -F examples | grep -v -F fuzzers | grep -v -P 'S3|HDFS' | sed 's/^\.\//    /' | sort ?>
 )
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@ -57,6 +57,7 @@
 #include <Interpreters/Cluster.h>
 #include <Interpreters/InterserverIOHandler.h>
 #include <Interpreters/SystemLog.h>
+#include <Interpreters/SessionLog.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/DDLWorker.h>
 #include <Interpreters/DDLTask.h>
@ -641,7 +642,6 @@ ConfigurationPtr Context::getUsersConfig()
    return shared->users_config;
 }

-
 void Context::setUser(const UUID & user_id_)
 {
    auto lock = getLock();
@ -2071,6 +2071,16 @@ std::shared_ptr<OpenTelemetrySpanLog> Context::getOpenTelemetrySpanLog() const
    return shared->system_logs->opentelemetry_span_log;
 }

+std::shared_ptr<SessionLog> Context::getSessionLog() const
+{
+    auto lock = getLock();
+
+    if (!shared->system_logs)
+        return {};
+
+    return shared->system_logs->session_log;
+}
+

 std::shared_ptr<ZooKeeperLog> Context::getZooKeeperLog() const
 {
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@ -74,6 +74,7 @@ class MetricLog;
 class AsynchronousMetricLog;
 class OpenTelemetrySpanLog;
 class ZooKeeperLog;
+class SessionLog;
 struct MergeTreeSettings;
 class StorageS3Settings;
 class IDatabase;
@ -600,6 +601,7 @@ public:
    bool hasSessionContext() const { return !session_context.expired(); }

    ContextMutablePtr getGlobalContext() const;
+
    bool hasGlobalContext() const { return !global_context.expired(); }
    bool isGlobalContext() const
    {
@ -735,6 +737,7 @@ public:
    std::shared_ptr<AsynchronousMetricLog> getAsynchronousMetricLog() const;
    std::shared_ptr<OpenTelemetrySpanLog> getOpenTelemetrySpanLog() const;
    std::shared_ptr<ZooKeeperLog> getZooKeeperLog() const;
+    std::shared_ptr<SessionLog> getSessionLog() const;

    /// Returns an object used to log operations with parts if it possible.
    /// Provide table name to make required checks.
--- a/src/Interpreters/DatabaseCatalog.cpp
+++ b/src/Interpreters/DatabaseCatalog.cpp
@ -146,7 +146,6 @@ void DatabaseCatalog::initializeAndLoadTemporaryDatabase()

 void DatabaseCatalog::loadDatabases()
 {
-    loadMarkedAsDroppedTables();
    auto task_holder = getContext()->getSchedulePool().createTask("DatabaseCatalog", [this](){ this->dropTableDataTask(); });
    drop_task = std::make_unique<BackgroundSchedulePoolTaskHolder>(std::move(task_holder));
    (*drop_task)->activate();
@ -618,12 +617,6 @@ Dependencies DatabaseCatalog::getDependencies(const StorageID & from) const
    return Dependencies(iter->second.begin(), iter->second.end());
 }

-ViewDependencies DatabaseCatalog::getViewDependencies() const
-{
-    std::lock_guard lock{databases_mutex};
-    return ViewDependencies(view_dependencies.begin(), view_dependencies.end());
-}
-
 void
 DatabaseCatalog::updateDependency(const StorageID & old_from, const StorageID & old_where, const StorageID & new_from,
                                  const StorageID & new_where)
--- a/src/Interpreters/DatabaseCatalog.h
+++ b/src/Interpreters/DatabaseCatalog.h
@ -130,6 +130,7 @@ public:

    void initializeAndLoadTemporaryDatabase();
    void loadDatabases();
+    void loadMarkedAsDroppedTables();

    /// Get an object that protects the table from concurrently executing multiple DDL operations.
    DDLGuardPtr getDDLGuard(const String & database, const String & table);
@ -174,7 +175,6 @@ public:
    void addDependency(const StorageID & from, const StorageID & where);
    void removeDependency(const StorageID & from, const StorageID & where);
    Dependencies getDependencies(const StorageID & from) const;
-    ViewDependencies getViewDependencies() const;

    /// For Materialized and Live View
    void updateDependency(const StorageID & old_from, const StorageID & old_where,const StorageID & new_from, const StorageID & new_where);
@ -241,7 +241,6 @@ private:
    };
    using TablesMarkedAsDropped = std::list<TableMarkedAsDropped>;

-    void loadMarkedAsDroppedTables();
    void dropTableDataTask();
    void dropTableFinally(const TableMarkedAsDropped & table);

--- a/src/Interpreters/InterpreterSystemQuery.cpp
+++ b/src/Interpreters/InterpreterSystemQuery.cpp
@ -21,6 +21,7 @@
 #include <Interpreters/PartLog.h>
 #include <Interpreters/QueryThreadLog.h>
 #include <Interpreters/QueryViewsLog.h>
+#include <Interpreters/SessionLog.h>
 #include <Interpreters/TraceLog.h>
 #include <Interpreters/TextLog.h>
 #include <Interpreters/MetricLog.h>
@ -420,7 +421,8 @@ BlockIO InterpreterSystemQuery::execute()
                [&] { if (auto asynchronous_metric_log = getContext()->getAsynchronousMetricLog()) asynchronous_metric_log->flush(true); },
                [&] { if (auto opentelemetry_span_log = getContext()->getOpenTelemetrySpanLog()) opentelemetry_span_log->flush(true); },
                [&] { if (auto query_views_log = getContext()->getQueryViewsLog()) query_views_log->flush(true); },
-                [&] { if (auto zookeeper_log = getContext()->getZooKeeperLog()) zookeeper_log->flush(true); }
+                [&] { if (auto zookeeper_log = getContext()->getZooKeeperLog()) zookeeper_log->flush(true); },
+                [&] { if (auto session_log = getContext()->getSessionLog()) session_log->flush(true); }
            );
            break;
        }
--- a/src/Interpreters/JIT/CHJIT.cpp
+++ b/src/Interpreters/JIT/CHJIT.cpp
@ -2,6 +2,10 @@

 #if USE_EMBEDDED_COMPILER

+#include <sys/mman.h>
+
+#include <boost/noncopyable.hpp>
+
 #include <llvm/Analysis/TargetTransformInfo.h>
 #include <llvm/IR/BasicBlock.h>
 #include <llvm/IR/DataLayout.h>
@ -22,7 +26,10 @@
 #include <llvm/Transforms/IPO/PassManagerBuilder.h>
 #include <llvm/Support/SmallVectorMemoryBuffer.h>

+#include <common/getPageSize.h>
 #include <Common/Exception.h>
+#include <Common/formatReadable.h>
+

 namespace DB
 {
@ -31,6 +38,8 @@ namespace ErrorCodes
 {
    extern const int CANNOT_COMPILE_CODE;
    extern const int LOGICAL_ERROR;
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int CANNOT_MPROTECT;
 }

 /** Simple module to object file compiler.
@ -80,6 +89,161 @@ private:
    llvm::TargetMachine & target_machine;
 };

+/** Arena that allocate all memory with system page_size.
+  * All allocated pages can be protected with protection_flags using protect method.
+  * During destruction all allocated pages protection_flags will be reset.
+  */
+class PageArena : private boost::noncopyable
+{
+public:
+    PageArena() : page_size(::getPageSize()) {}
+
+    char * allocate(size_t size, size_t alignment)
+    {
+        /** First check if in some allocated page blocks there are enough free memory to make allocation.
+          * If there is no such block create it and then allocate from it.
+          */
+
+        for (size_t i = 0; i < page_blocks.size(); ++i)
+        {
+            char * result = tryAllocateFromPageBlockWithIndex(size, alignment, i);
+            if (result)
+                return result;
+        }
+
+        allocateNextPageBlock(size);
+        size_t allocated_page_index = page_blocks.size() - 1;
+        char * result = tryAllocateFromPageBlockWithIndex(size, alignment, allocated_page_index);
+        assert(result);
+
+        return result;
+    }
+
+    inline size_t getAllocatedSize() const { return allocated_size; }
+
+    inline size_t getPageSize() const { return page_size; }
+
+    ~PageArena()
+    {
+        protect(PROT_READ | PROT_WRITE);
+
+        for (auto & page_block : page_blocks)
+            free(page_block.base());
+    }
+
+    void protect(int protection_flags)
+    {
+        /** The code is partially based on the LLVM codebase
+              * The LLVM Project is under the Apache License v2.0 with LLVM Exceptions.
+              */
+
+#    if defined(__NetBSD__) && defined(PROT_MPROTECT)
+        protection_flags |= PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC);
+#    endif
+
+        bool invalidate_cache = (protection_flags & PROT_EXEC);
+
+        for (const auto & block : page_blocks)
+        {
+#    if defined(__arm__) || defined(__aarch64__)
+            /// Certain ARM implementations treat icache clear instruction as a memory read,
+            /// and CPU segfaults on trying to clear cache on !PROT_READ page.
+            /// Therefore we need to temporarily add PROT_READ for the sake of flushing the instruction caches.
+            if (invalidate_cache && !(protection_flags & PROT_READ))
+            {
+                int res = mprotect(block.base(), block.blockSize(), protection_flags | PROT_READ);
+                if (res != 0)
+                    throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
+
+                llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize());
+                InvalidateCache = false;
+            }
+#    endif
+            int res = mprotect(block.base(), block.blockSize(), protection_flags);
+            if (res != 0)
+                throwFromErrno("Cannot mprotect memory region", ErrorCodes::CANNOT_MPROTECT);
+
+            if (invalidate_cache)
+                llvm::sys::Memory::InvalidateInstructionCache(block.base(), block.blockSize());
+        }
+    }
+
+private:
+    struct PageBlock
+    {
+    public:
+        PageBlock(void * pages_base_, size_t pages_size_, size_t page_size_)
+            : pages_base(pages_base_), pages_size(pages_size_), page_size(page_size_)
+        {
+        }
+
+        inline void * base() const { return pages_base; }
+        inline size_t pagesSize() const { return pages_size; }
+        inline size_t pageSize() const { return page_size; }
+        inline size_t blockSize() const { return pages_size * page_size; }
+
+    private:
+        void * pages_base;
+        size_t pages_size;
+        size_t page_size;
+    };
+
+    std::vector<PageBlock> page_blocks;
+
+    std::vector<size_t> page_blocks_allocated_size;
+
+    size_t page_size = 0;
+
+    size_t allocated_size = 0;
+
+    char * tryAllocateFromPageBlockWithIndex(size_t size, size_t alignment, size_t page_block_index)
+    {
+        assert(page_block_index < page_blocks.size());
+        auto & pages_block = page_blocks[page_block_index];
+
+        size_t block_size = pages_block.blockSize();
+        size_t & block_allocated_size = page_blocks_allocated_size[page_block_index];
+        size_t block_free_size = block_size - block_allocated_size;
+
+        uint8_t * pages_start = static_cast<uint8_t *>(pages_block.base());
+        void * pages_offset = pages_start + block_allocated_size;
+
+        auto * result = std::align(alignment, size, pages_offset, block_free_size);
+
+        if (result)
+        {
+            block_allocated_size = reinterpret_cast<uint8_t *>(result) - pages_start;
+            block_allocated_size += size;
+
+            return static_cast<char *>(result);
+        }
+        else
+        {
+            return nullptr;
+        }
+    }
+
+    void allocateNextPageBlock(size_t size)
+    {
+        size_t pages_to_allocate_size = ((size / page_size) + 1) * 2;
+        size_t allocate_size = page_size * pages_to_allocate_size;
+
+        void * buf = nullptr;
+        int res = posix_memalign(&buf, page_size, allocate_size);
+
+        if (res != 0)
+            throwFromErrno(
+                fmt::format("Cannot allocate memory (posix_memalign) alignment {} size {}.", page_size, ReadableSize(allocate_size)),
+                ErrorCodes::CANNOT_ALLOCATE_MEMORY,
+                res);
+
+        page_blocks.emplace_back(buf, pages_to_allocate_size, page_size);
+        page_blocks_allocated_size.emplace_back(0);
+
+        allocated_size += allocate_size;
+    }
+};
+
 // class AssemblyPrinter
 // {
 // public:
@ -104,46 +268,43 @@ private:

 /** MemoryManager for module.
  * Keep total allocated size during RuntimeDyld linker execution.
-  * Actual compiled code memory is stored in llvm::SectionMemoryManager member, we cannot use ZeroBase optimization here
-  * because it is required for llvm::SectionMemoryManager::MemoryMapper to live longer than llvm::SectionMemoryManager.
  */
-class JITModuleMemoryManager
+class JITModuleMemoryManager : public llvm::RTDyldMemoryManager
 {
-    class DefaultMMapper final : public llvm::SectionMemoryManager::MemoryMapper
-    {
-    public:
-        llvm::sys::MemoryBlock allocateMappedMemory(
-            llvm::SectionMemoryManager::AllocationPurpose Purpose [[maybe_unused]],
-            size_t NumBytes,
-            const llvm::sys::MemoryBlock * const NearBlock,
-            unsigned Flags,
-            std::error_code & EC) override
-        {
-            auto allocated_memory_block = llvm::sys::Memory::allocateMappedMemory(NumBytes, NearBlock, Flags, EC);
-            allocated_size += allocated_memory_block.allocatedSize();
-            return allocated_memory_block;
-        }
-
-        std::error_code protectMappedMemory(const llvm::sys::MemoryBlock & Block, unsigned Flags) override
-        {
-            return llvm::sys::Memory::protectMappedMemory(Block, Flags);
-        }
-
-        std::error_code releaseMappedMemory(llvm::sys::MemoryBlock & M) override { return llvm::sys::Memory::releaseMappedMemory(M); }
-
-        size_t allocated_size = 0;
-    };
-
 public:
-    JITModuleMemoryManager() : manager(&mmaper) { }

-    inline size_t getAllocatedSize() const { return mmaper.allocated_size; }
+    uint8_t * allocateCodeSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef) override
+    {
+        return reinterpret_cast<uint8_t *>(ex_page_arena.allocate(size, alignment));
+    }

-    inline llvm::SectionMemoryManager & getManager() { return manager; }
+    uint8_t * allocateDataSection(uintptr_t size, unsigned alignment, unsigned, llvm::StringRef, bool is_read_only) override
+    {
+        if (is_read_only)
+            return reinterpret_cast<uint8_t *>(ro_page_arena.allocate(size, alignment));
+        else
+            return reinterpret_cast<uint8_t *>(rw_page_arena.allocate(size, alignment));
+    }
+
+    bool finalizeMemory(std::string *) override
+    {
+        ro_page_arena.protect(PROT_READ);
+        ex_page_arena.protect(PROT_READ | PROT_EXEC);
+        return true;
+    }
+
+    inline size_t allocatedSize() const
+    {
+        size_t data_size = rw_page_arena.getAllocatedSize() + ro_page_arena.getAllocatedSize();
+        size_t code_size = ex_page_arena.getAllocatedSize();
+
+        return data_size + code_size;
+    }

 private:
-    DefaultMMapper mmaper;
-    llvm::SectionMemoryManager manager;
+    PageArena rw_page_arena;
+    PageArena ro_page_arena;
+    PageArena ex_page_arena;
 };

 class JITSymbolResolver : public llvm::LegacyJITSymbolResolver
@ -249,12 +410,12 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr<llvm::Module> module)
    }

    std::unique_ptr<JITModuleMemoryManager> module_memory_manager = std::make_unique<JITModuleMemoryManager>();
-    llvm::RuntimeDyld dynamic_linker = {module_memory_manager->getManager(), *symbol_resolver};
+    llvm::RuntimeDyld dynamic_linker = {*module_memory_manager, *symbol_resolver};

    std::unique_ptr<llvm::RuntimeDyld::LoadedObjectInfo> linked_object = dynamic_linker.loadObject(*object.get());

    dynamic_linker.resolveRelocations();
-    module_memory_manager->getManager().finalizeMemory();
+    module_memory_manager->finalizeMemory(nullptr);

    CompiledModule compiled_module;

@ -275,7 +436,7 @@ CHJIT::CompiledModule CHJIT::compileModule(std::unique_ptr<llvm::Module> module)
        compiled_module.function_name_to_symbol.emplace(std::move(function_name), jit_symbol_address);
    }

-    compiled_module.size = module_memory_manager->getAllocatedSize();
+    compiled_module.size = module_memory_manager->allocatedSize();
    compiled_module.identifier = current_module_key;

    module_identifier_to_memory_manager[current_module_key] = std::move(module_memory_manager);
--- a/src/Interpreters/LogicalExpressionsOptimizer.cpp
+++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp
@ -225,22 +225,19 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain

    /// 1. Create a new IN expression based on information from the OR-chain.

-    /// Construct a list of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN`
-    ASTPtr value_list = std::make_shared<ASTExpressionList>();
+    /// Construct a tuple of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN`
+
+    Tuple tuple;
+    tuple.reserve(equality_functions.size());
+
    for (const auto * function : equality_functions)
    {
        const auto & operands = getFunctionOperands(function);
-        value_list->children.push_back(operands[1]);
+        tuple.push_back(operands[1]->as<ASTLiteral>()->value);
    }

    /// Sort the literals so that they are specified in the same order in the IN expression.
-    /// Otherwise, they would be specified in the order of the ASTLiteral addresses, which is nondeterministic.
-    std::sort(value_list->children.begin(), value_list->children.end(), [](const DB::ASTPtr & lhs, const DB::ASTPtr & rhs)
-    {
-        const auto * val_lhs = lhs->as<ASTLiteral>();
-        const auto * val_rhs = rhs->as<ASTLiteral>();
-        return val_lhs->value < val_rhs->value;
-    });
+    std::sort(tuple.begin(), tuple.end());

    /// Get the expression `expr` from the chain `expr = x1 OR ... OR expr = xN`
    ASTPtr equals_expr_lhs;
@ -250,14 +247,11 @@ void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain
        equals_expr_lhs = operands[0];
    }

-    auto tuple_function = std::make_shared<ASTFunction>();
-    tuple_function->name = "tuple";
-    tuple_function->arguments = value_list;
-    tuple_function->children.push_back(tuple_function->arguments);
+    auto tuple_literal = std::make_shared<ASTLiteral>(std::move(tuple));

    ASTPtr expression_list = std::make_shared<ASTExpressionList>();
    expression_list->children.push_back(equals_expr_lhs);
-    expression_list->children.push_back(tuple_function);
+    expression_list->children.push_back(tuple_literal);

    /// Construct the expression `expr IN (x1, ..., xN)`
    auto in_function = std::make_shared<ASTFunction>();
--- a/src/Interpreters/Session.cpp
+++ b/src/Interpreters/Session.cpp
@ -8,6 +8,7 @@
 #include <Common/ThreadPool.h>
 #include <Common/setThreadName.h>
 #include <Interpreters/Context.h>
+#include <Interpreters/SessionLog.h>

 #include <atomic>
 #include <condition_variable>
@ -241,7 +242,8 @@ void Session::shutdownNamedSessions()


 Session::Session(const ContextPtr & global_context_, ClientInfo::Interface interface_)
-    : global_context(global_context_)
+    : session_id(UUIDHelpers::generateV4()),
+      global_context(global_context_)
 {
    prepared_client_info.emplace();
    prepared_client_info->interface = interface_;
@ -254,6 +256,14 @@ Session::~Session()
    /// Early release a NamedSessionData.
    if (named_session)
        named_session->release();
+
+    if (notified_session_log_about_login)
+    {
+        // must have been set in makeQueryContext or makeSessionContext
+        assert(user);
+        if (auto session_log = getSessionLog())
+            session_log->addLogOut(session_id, user->getName(), getClientInfo());
+    }
 }

 Authentication::Type Session::getAuthenticationType(const String & user_name) const
@ -261,6 +271,21 @@ Authentication::Type Session::getAuthenticationType(const String & user_name) co
    return global_context->getAccessControlManager().read<User>(user_name)->authentication.getType();
 }

+Authentication::Type Session::getAuthenticationTypeOrLogInFailure(const String & user_name) const
+{
+    try
+    {
+        return getAuthenticationType(user_name);
+    }
+    catch (const Exception & e)
+    {
+        if (auto session_log = getSessionLog())
+            session_log->addLoginFailure(session_id, getClientInfo(), user_name, e);
+
+        throw;
+    }
+}
+
 Authentication::Digest Session::getPasswordDoubleSHA1(const String & user_name) const
 {
    return global_context->getAccessControlManager().read<User>(user_name)->authentication.getPasswordDoubleSHA1();
@ -280,7 +305,16 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So
    if ((address == Poco::Net::SocketAddress{}) && (prepared_client_info->interface == ClientInfo::Interface::LOCAL))
        address = Poco::Net::SocketAddress{"127.0.0.1", 0};

-    user_id = global_context->getAccessControlManager().login(credentials_, address.host());
+    try
+    {
+        user_id = global_context->getAccessControlManager().login(credentials_, address.host());
+    }
+    catch (const Exception & e)
+    {
+        if (auto session_log = getSessionLog())
+            session_log->addLoginFailure(session_id, *prepared_client_info, credentials_.getUserName(), e);
+        throw;
+    }

    prepared_client_info->current_user = credentials_.getUserName();
    prepared_client_info->current_address = address;
@ -330,7 +364,7 @@ ContextMutablePtr Session::makeSessionContext()
    return session_context;
 }

-ContextMutablePtr Session::makeSessionContext(const String & session_id_, std::chrono::steady_clock::duration timeout_, bool session_check_)
+ContextMutablePtr Session::makeSessionContext(const String & session_name_, std::chrono::steady_clock::duration timeout_, bool session_check_)
 {
    if (session_context)
        throw Exception("Session context already exists", ErrorCodes::LOGICAL_ERROR);
@ -342,7 +376,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_id_, std::c
    std::shared_ptr<NamedSessionData> new_named_session;
    bool new_named_session_created = false;
    std::tie(new_named_session, new_named_session_created)
-        = NamedSessionsStorage::instance().acquireSession(global_context, user_id.value_or(UUID{}), session_id_, timeout_, session_check_);
+        = NamedSessionsStorage::instance().acquireSession(global_context, user_id.value_or(UUID{}), session_name_, timeout_, session_check_);

    auto new_session_context = new_named_session->context;
    new_session_context->makeSessionContext();
@ -359,8 +393,7 @@ ContextMutablePtr Session::makeSessionContext(const String & session_id_, std::c
        new_session_context->setUser(*user_id);

    /// Session context is ready.
-    session_context = new_session_context;
-    session_id = session_id_;
+    session_context = std::move(new_session_context);
    named_session = new_named_session;
    named_session_created = new_named_session_created;
    user = session_context->getUser();
@ -378,6 +411,13 @@ ContextMutablePtr Session::makeQueryContext(ClientInfo && query_client_info) con
    return makeQueryContextImpl(nullptr, &query_client_info);
 }

+std::shared_ptr<SessionLog> Session::getSessionLog() const
+{
+    // take it from global context, since it outlives the Session and always available.
+    // please note that server may have session_log disabled, hence this may return nullptr.
+    return global_context->getSessionLog();
+}
+
 ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const
 {
    /// We can create a query context either from a session context or from a global context.
@ -425,7 +465,21 @@ ContextMutablePtr Session::makeQueryContextImpl(const ClientInfo * client_info_t
    query_context_created = true;
    user = query_context->getUser();

+    if (!notified_session_log_about_login)
+    {
+        if (auto session_log = getSessionLog())
+        {
+            session_log->addLoginSuccess(
+                    session_id,
+                    named_session ? std::optional<std::string>(named_session->key.second) : std::nullopt,
+                    *query_context);
+
+            notified_session_log_about_login = true;
+        }
+    }
+
    return query_context;
 }

 }
+
--- a/src/Interpreters/Session.h
+++ b/src/Interpreters/Session.h
@ -19,6 +19,7 @@ struct NamedSessionData;
 class NamedSessionsStorage;
 struct User;
 using UserPtr = std::shared_ptr<const User>;
+class SessionLog;

 /** Represents user-session from the server perspective,
 *  basically it is just a smaller subset of Context API, simplifies Context management.
@ -41,6 +42,8 @@ public:
    /// Provides information about the authentication type of a specified user.
    Authentication::Type getAuthenticationType(const String & user_name) const;
    Authentication::Digest getPasswordDoubleSHA1(const String & user_name) const;
+    /// Same as getAuthenticationType, but adds LoginFailure event in case of error.
+    Authentication::Type getAuthenticationTypeOrLogInFailure(const String & user_name) const;

    /// Sets the current user, checks the credentials and that the specified address is allowed to connect from.
    /// The function throws an exception if there is no such user or password is wrong.
@ -54,7 +57,7 @@ public:
    /// Makes a session context, can be used one or zero times.
    /// The function also assigns an user to this context.
    ContextMutablePtr makeSessionContext();
-    ContextMutablePtr makeSessionContext(const String & session_id_, std::chrono::steady_clock::duration timeout_, bool session_check_);
+    ContextMutablePtr makeSessionContext(const String & session_name_, std::chrono::steady_clock::duration timeout_, bool session_check_);
    ContextMutablePtr sessionContext() { return session_context; }
    ContextPtr sessionContext() const { return session_context; }

@ -66,8 +69,11 @@ public:
    ContextMutablePtr makeQueryContext(ClientInfo && query_client_info) const;

 private:
+    std::shared_ptr<SessionLog> getSessionLog() const;
    ContextMutablePtr makeQueryContextImpl(const ClientInfo * client_info_to_copy, ClientInfo * client_info_to_move) const;

+    mutable bool notified_session_log_about_login = false;
+    const UUID session_id;
    const ContextPtr global_context;

    /// ClientInfo that will be copied to a session context when it's created.
@ -79,9 +85,9 @@ private:
    ContextMutablePtr session_context;
    mutable bool query_context_created = false;

-    String session_id;
    std::shared_ptr<NamedSessionData> named_session;
    bool named_session_created = false;
 };

 }
+
--- a/src/Interpreters/SessionLog.cpp
+++ b/src/Interpreters/SessionLog.cpp
@ -0,0 +1,261 @@
+#include <Interpreters/SessionLog.h>
+
+#include <Access/ContextAccess.h>
+#include <Access/User.h>
+#include <Access/EnabledRolesInfo.h>
+#include <Core/Settings.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeDateTime64.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeEnum.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <Common/IPv6ToBinary.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnTuple.h>
+#include <Access/SettingsProfilesInfo.h>
+
+#include <cassert>
+
+namespace
+{
+using namespace DB;
+
+inline DateTime64 time_in_microseconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::microseconds>(timepoint.time_since_epoch()).count();
+}
+
+inline time_t time_in_seconds(std::chrono::time_point<std::chrono::system_clock> timepoint)
+{
+    return std::chrono::duration_cast<std::chrono::seconds>(timepoint.time_since_epoch()).count();
+}
+
+auto eventTime()
+{
+    const auto finish_time = std::chrono::system_clock::now();
+
+    return std::make_pair(time_in_seconds(finish_time), time_in_microseconds(finish_time));
+}
+
+using AuthType = Authentication::Type;
+using Interface = ClientInfo::Interface;
+
+void fillColumnArray(const Strings & data, IColumn & column)
+{
+    auto & array = typeid_cast<ColumnArray &>(column);
+    size_t size = 0;
+    auto & data_col = array.getData();
+    for (const auto & name : data)
+    {
+        data_col.insertData(name.data(), name.size());
+        ++size;
+    }
+    auto & offsets = array.getOffsets();
+    offsets.push_back(offsets.back() + size);
+};
+
+}
+
+namespace DB
+{
+
+SessionLogElement::SessionLogElement(const UUID & session_id_, Type type_)
+    : session_id(session_id_),
+      type(type_)
+{
+    std::tie(event_time, event_time_microseconds) = eventTime();
+}
+
+NamesAndTypesList SessionLogElement::getNamesAndTypes()
+{
+    const auto event_type = std::make_shared<DataTypeEnum8>(
+        DataTypeEnum8::Values
+        {
+            {"LoginFailure",           static_cast<Int8>(SESSION_LOGIN_FAILURE)},
+            {"LoginSuccess",           static_cast<Int8>(SESSION_LOGIN_SUCCESS)},
+            {"Logout",                 static_cast<Int8>(SESSION_LOGOUT)}
+        });
+
+#define AUTH_TYPE_NAME_AND_VALUE(v) std::make_pair(Authentication::TypeInfo::get(v).raw_name, static_cast<Int8>(v))
+    const auto identified_with_column = std::make_shared<DataTypeEnum8>(
+        DataTypeEnum8::Values
+        {
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::NO_PASSWORD),
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::PLAINTEXT_PASSWORD),
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::SHA256_PASSWORD),
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::DOUBLE_SHA1_PASSWORD),
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::LDAP),
+            AUTH_TYPE_NAME_AND_VALUE(AuthType::KERBEROS)
+        });
+#undef AUTH_TYPE_NAME_AND_VALUE
+
+    const auto interface_type_column = std::make_shared<DataTypeEnum8>(
+        DataTypeEnum8::Values
+        {
+            {"TCP",                    static_cast<Int8>(Interface::TCP)},
+            {"HTTP",                   static_cast<Int8>(Interface::HTTP)},
+            {"gRPC",                   static_cast<Int8>(Interface::GRPC)},
+            {"MySQL",                  static_cast<Int8>(Interface::MYSQL)},
+            {"PostgreSQL",             static_cast<Int8>(Interface::POSTGRESQL)}
+        });
+
+    const auto lc_string_datatype = std::make_shared<DataTypeLowCardinality>(std::make_shared<DataTypeString>());
+
+    const auto changed_settings_type_column = std::make_shared<DataTypeArray>(
+        std::make_shared<DataTypeTuple>(
+            DataTypes({
+                // setting name
+                lc_string_datatype,
+                // value
+                std::make_shared<DataTypeString>()
+            })));
+
+    return
+    {
+        {"type", std::move(event_type)},
+        {"session_id", std::make_shared<DataTypeUUID>()},
+        {"session_name", std::make_shared<DataTypeString>()},
+        {"event_date", std::make_shared<DataTypeDate>()},
+        {"event_time", std::make_shared<DataTypeDateTime>()},
+        {"event_time_microseconds", std::make_shared<DataTypeDateTime64>(6)},
+
+        {"user", std::make_shared<DataTypeString>()},
+        {"auth_type", std::move(identified_with_column)},
+
+        {"profiles", std::make_shared<DataTypeArray>(lc_string_datatype)},
+        {"roles", std::make_shared<DataTypeArray>(lc_string_datatype)},
+        {"changed_settings", std::move(changed_settings_type_column)},
+
+        {"client_address", DataTypeFactory::instance().get("IPv6")},
+        {"client_port", std::make_shared<DataTypeUInt16>()},
+        {"interface", std::move(interface_type_column)},
+
+        {"client_hostname", std::make_shared<DataTypeString>()},
+        {"client_name", std::make_shared<DataTypeString>()},
+        {"client_revision", std::make_shared<DataTypeUInt32>()},
+        {"client_version_major", std::make_shared<DataTypeUInt32>()},
+        {"client_version_minor", std::make_shared<DataTypeUInt32>()},
+        {"client_version_patch", std::make_shared<DataTypeUInt32>()},
+
+        {"failure_reason", std::make_shared<DataTypeString>()},
+    };
+}
+
+void SessionLogElement::appendToBlock(MutableColumns & columns) const
+{
+    assert(type >= SESSION_LOGIN_FAILURE && type <= SESSION_LOGOUT);
+    assert(user_identified_with >= Authentication::Type::NO_PASSWORD && user_identified_with <= Authentication::Type::MAX_TYPE);
+
+    size_t i = 0;
+
+    columns[i++]->insert(type);
+    columns[i++]->insert(session_id);
+    columns[i++]->insert(session_name);
+    columns[i++]->insert(static_cast<DayNum>(DateLUT::instance().toDayNum(event_time).toUnderType()));
+    columns[i++]->insert(event_time);
+    columns[i++]->insert(event_time_microseconds);
+
+    columns[i++]->insert(user);
+    columns[i++]->insert(user_identified_with);
+
+    fillColumnArray(profiles, *columns[i++]);
+    fillColumnArray(roles, *columns[i++]);
+
+    {
+        auto & changed_settings_array_col = assert_cast<ColumnArray &>(*columns[i++]);
+        auto & changed_settings_tuple_col = assert_cast<ColumnTuple &>(changed_settings_array_col.getData());
+        auto & names_col = *changed_settings_tuple_col.getColumnPtr(0)->assumeMutable();
+        auto & values_col = assert_cast<ColumnString &>(*changed_settings_tuple_col.getColumnPtr(1)->assumeMutable());
+
+        size_t items_added = 0;
+        for (const auto & kv : changed_settings)
+        {
+            names_col.insert(kv.first);
+            values_col.insert(kv.second);
+            ++items_added;
+        }
+
+        auto & offsets = changed_settings_array_col.getOffsets();
+        offsets.push_back(changed_settings_tuple_col.size());
+    }
+
+    columns[i++]->insertData(IPv6ToBinary(client_info.current_address.host()).data(), 16);
+    columns[i++]->insert(client_info.current_address.port());
+
+    columns[i++]->insert(client_info.interface);
+
+    columns[i++]->insertData(client_info.client_hostname.data(), client_info.client_hostname.length());
+    columns[i++]->insertData(client_info.client_name.data(), client_info.client_name.length());
+    columns[i++]->insert(client_info.client_tcp_protocol_version);
+    columns[i++]->insert(client_info.client_version_major);
+    columns[i++]->insert(client_info.client_version_minor);
+    columns[i++]->insert(client_info.client_version_patch);
+
+    columns[i++]->insertData(auth_failure_reason.data(), auth_failure_reason.length());
+}
+
+void SessionLog::addLoginSuccess(const UUID & session_id, std::optional<String> session_name, const Context & login_context)
+{
+    const auto access = login_context.getAccess();
+    const auto & settings = login_context.getSettingsRef();
+    const auto & client_info = login_context.getClientInfo();
+
+    DB::SessionLogElement log_entry(session_id, SESSION_LOGIN_SUCCESS);
+    log_entry.client_info = client_info;
+
+    {
+        const auto user = access->getUser();
+        log_entry.user = user->getName();
+        log_entry.user_identified_with = user->authentication.getType();
+        log_entry.external_auth_server = user->authentication.getLDAPServerName();
+    }
+
+    if (session_name)
+        log_entry.session_name = *session_name;
+
+    if (const auto roles_info = access->getRolesInfo())
+        log_entry.roles = roles_info->getCurrentRolesNames();
+
+    if (const auto profile_info = access->getDefaultProfileInfo())
+    log_entry.profiles = profile_info->getProfileNames();
+
+    for (const auto & s : settings.allChanged())
+        log_entry.changed_settings.emplace_back(s.getName(), s.getValueString());
+
+    add(log_entry);
+}
+
+void SessionLog::addLoginFailure(
+        const UUID & session_id,
+        const ClientInfo & info,
+        const String & user,
+        const Exception & reason)
+{
+    SessionLogElement log_entry(session_id, SESSION_LOGIN_FAILURE);
+
+    log_entry.user = user;
+    log_entry.auth_failure_reason = reason.message();
+    log_entry.client_info = info;
+    log_entry.user_identified_with = Authentication::Type::NO_PASSWORD;
+
+    add(log_entry);
+}
+
+void SessionLog::addLogOut(const UUID & session_id, const String & user, const ClientInfo & client_info)
+{
+    auto log_entry = SessionLogElement(session_id, SESSION_LOGOUT);
+    log_entry.user = user;
+    log_entry.client_info = client_info;
+
+    add(log_entry);
+}
+
+}
--- a/src/Interpreters/SessionLog.h
+++ b/src/Interpreters/SessionLog.h
@ -0,0 +1,74 @@
+#pragma once
+
+#include <Interpreters/SystemLog.h>
+#include <Interpreters/ClientInfo.h>
+#include <Access/Authentication.h>
+
+namespace DB
+{
+
+enum SessionLogElementType : int8_t
+{
+    SESSION_LOGIN_FAILURE = 0,
+    SESSION_LOGIN_SUCCESS = 1,
+    SESSION_LOGOUT = 2,
+};
+
+class ContextAccess;
+
+/** A struct which will be inserted as row into session_log table.
+  *
+  *  Allows to log information about user sessions:
+  * - auth attempts, auth result, auth method, etc.
+  * - log out events
+  */
+struct SessionLogElement
+{
+    using Type = SessionLogElementType;
+
+    SessionLogElement() = default;
+    SessionLogElement(const UUID & session_id_, Type type_);
+    SessionLogElement(const SessionLogElement &) = default;
+    SessionLogElement & operator=(const SessionLogElement &) = default;
+    SessionLogElement(SessionLogElement &&) = default;
+    SessionLogElement & operator=(SessionLogElement &&) = default;
+
+    UUID session_id;
+
+    Type type = SESSION_LOGIN_FAILURE;
+
+    String session_name;
+    time_t event_time{};
+    Decimal64 event_time_microseconds{};
+
+    String user;
+    Authentication::Type user_identified_with = Authentication::Type::NO_PASSWORD;
+    String external_auth_server;
+    Strings roles;
+    Strings profiles;
+    std::vector<std::pair<String, String>> changed_settings;
+
+    ClientInfo client_info;
+    String auth_failure_reason;
+
+    static std::string name() { return "SessionLog"; }
+
+    static NamesAndTypesList getNamesAndTypes();
+    static NamesAndAliases getNamesAndAliases() { return {}; }
+
+    void appendToBlock(MutableColumns & columns) const;
+};
+
+
+/// Instead of typedef - to allow forward declaration.
+class SessionLog : public SystemLog<SessionLogElement>
+{
+    using SystemLog<SessionLogElement>::SystemLog;
+
+public:
+    void addLoginSuccess(const UUID & session_id, std::optional<String> session_name, const Context & login_context);
+    void addLoginFailure(const UUID & session_id, const ClientInfo & info, const String & user, const Exception & reason);
+    void addLogOut(const UUID & session_id, const String & user, const ClientInfo & client_info);
+};
+
+}
--- a/src/Interpreters/Set.cpp
+++ b/src/Interpreters/Set.cpp
@ -402,8 +402,8 @@ void Set::checkTypesEqual(size_t set_type_idx, const DataTypePtr & other_type) c
                        + data_types[set_type_idx]->getName() + " on the right", ErrorCodes::TYPE_MISMATCH);
 }

-MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_)
-    : indexes_mapping(std::move(index_mapping_))
+MergeTreeSetIndex::MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_)
+    : has_all_keys(set_elements.size() == indexes_mapping_.size()), indexes_mapping(std::move(indexes_mapping_))
 {
    std::sort(indexes_mapping.begin(), indexes_mapping.end(),
        [](const KeyTuplePositionMapping & l, const KeyTuplePositionMapping & r)
@ -548,11 +548,11 @@ BoolMask MergeTreeSetIndex::checkInRange(const std::vector<Range> & key_ranges,
            break;
        }
    }
-    if (one_element_range)
+    if (one_element_range && has_all_keys)
    {
        /// Here we know that there is one element in range.
        /// The main difference with the normal case is that we can definitely say that
-        /// condition in this range always TRUE (can_be_false = 0) xor always FALSE (can_be_true = 0).
+        /// condition in this range is always TRUE (can_be_false = 0) or always FALSE (can_be_true = 0).

        /// Check if it's an empty range
        if (!left_included || !right_included)
--- a/src/Interpreters/Set.h
+++ b/src/Interpreters/Set.h
@ -208,7 +208,7 @@ public:
        std::vector<FunctionBasePtr> functions;
    };

-    MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && index_mapping_);
+    MergeTreeSetIndex(const Columns & set_elements, std::vector<KeyTuplePositionMapping> && indexes_mapping_);

    size_t size() const { return ordered_set.at(0)->size(); }

@ -217,6 +217,8 @@ public:
    BoolMask checkInRange(const std::vector<Range> & key_ranges, const DataTypes & data_types) const;

 private:
+    // If all arguments in tuple are key columns, we can optimize NOT IN when there is only one element.
+    bool has_all_keys;
    Columns ordered_set;
    std::vector<KeyTuplePositionMapping> indexes_mapping;

--- a/src/Interpreters/SystemLog.cpp
+++ b/src/Interpreters/SystemLog.cpp
@ -6,7 +6,7 @@
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/QueryThreadLog.h>
 #include <Interpreters/QueryViewsLog.h>
-#include <Interpreters/SystemLog.h>
+#include <Interpreters/SessionLog.h>
 #include <Interpreters/TextLog.h>
 #include <Interpreters/TraceLog.h>
 #include <Interpreters/ZooKeeperLog.h>
@ -39,7 +39,13 @@ std::shared_ptr<TSystemLog> createSystemLog(
    const String & config_prefix)
 {
    if (!config.has(config_prefix))
+    {
+        LOG_DEBUG(&Poco::Logger::get("SystemLog"),
+                "Not creating {}.{} since corresponding section '{}' is missing from config",
+                default_database_name, default_table_name, config_prefix);
+
        return {};
+    }

    String database = config.getString(config_prefix + ".database", default_database_name);
    String table = config.getString(config_prefix + ".table", default_table_name);
@ -107,6 +113,7 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
        "opentelemetry_span_log");
    query_views_log = createSystemLog<QueryViewsLog>(global_context, "system", "query_views_log", config, "query_views_log");
    zookeeper_log = createSystemLog<ZooKeeperLog>(global_context, "system", "zookeeper_log", config, "zookeeper_log");
+    session_log = createSystemLog<SessionLog>(global_context, "system", "session_log", config, "session_log");

    if (query_log)
        logs.emplace_back(query_log.get());
@ -130,6 +137,8 @@ SystemLogs::SystemLogs(ContextPtr global_context, const Poco::Util::AbstractConf
        logs.emplace_back(query_views_log.get());
    if (zookeeper_log)
        logs.emplace_back(zookeeper_log.get());
+    if (session_log)
+        logs.emplace_back(session_log.get());

    try
    {
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`../../../tests/config/users.d/session_log_test.xml`