Merge branch 'master' of github.com:ClickHouse/ClickHouse into insert-cluster

2024-11-10 01:25:21 +00:00 · 2021-01-15 15:37:56 +00:00 · 2021-01-15 15:37:56 +00:00 · dbb3c89b50
commit dbb3c89b50
parent 09d71029c2 1b37d7716f
338 changed files with 18301 additions and 13401 deletions
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@ -112,11 +112,13 @@ static void writeSignalIDtoSignalPipe(int sig)
 /** Signal handler for HUP / USR1 */
 static void closeLogsSignalHandler(int sig, siginfo_t *, void *)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    writeSignalIDtoSignalPipe(sig);
 }

 static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    writeSignalIDtoSignalPipe(sig);
 }

@ -125,6 +127,7 @@ static void terminateRequestedSignalHandler(int sig, siginfo_t *, void *)
  */
 static void signalHandler(int sig, siginfo_t * info, void * context)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    auto saved_errno = errno;   /// We must restore previous value of errno in signal handler.

    char buf[signal_pipe_buf_size];
--- a/cmake/autogenerated_versions.txt
+++ b/cmake/autogenerated_versions.txt
@ -1,9 +1,9 @@
 # This strings autochanged from release_lib.sh:
-SET(VERSION_REVISION 54444)
-SET(VERSION_MAJOR 20)
-SET(VERSION_MINOR 13)
+SET(VERSION_REVISION 54445)
+SET(VERSION_MAJOR 21)
+SET(VERSION_MINOR 1)
 SET(VERSION_PATCH 1)
-SET(VERSION_GITHASH e581f9ccfc5c64867b0f488cce72412fd2966471)
-SET(VERSION_DESCRIBE v20.13.1.1-prestable)
-SET(VERSION_STRING 20.13.1.1)
+SET(VERSION_GITHASH 667dd0cf0ccecdaa6f334177b7ece2f53bd196a1)
+SET(VERSION_DESCRIBE v21.1.1.5646-prestable)
+SET(VERSION_STRING 21.1.1.5646)
 # end of autochange
--- a/cmake/find/ccache.cmake
+++ b/cmake/find/ccache.cmake
@ -32,12 +32,21 @@ if (CCACHE_FOUND AND NOT COMPILER_MATCHES_CCACHE)
   if (CCACHE_VERSION VERSION_GREATER "3.2.0" OR NOT CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
      message(STATUS "Using ${CCACHE_FOUND} ${CCACHE_VERSION}")

-      # 4+ ccache respect SOURCE_DATE_EPOCH (always includes it into the hash
-      # of the manifest) and debian will extract these from d/changelog, and
-      # makes cache of ccache unusable
+      # debian (debhlpers) set SOURCE_DATE_EPOCH environment variable, that is
+      # filled from the debian/changelog or current time.
      #
-      # FIXME: once sloppiness will be introduced for this this can be removed.
-      if (CCACHE_VERSION VERSION_GREATER "4.0")
+      # - 4.0+ ccache always includes this environment variable into the hash
+      #   of the manifest, which do not allow to use previous cache,
+      # - 4.2+ ccache ignores SOURCE_DATE_EPOCH under time_macros sloppiness.
+      #
+      # So for:
+      # - 4.2+ time_macros sloppiness is used,
+      # - 4.0+ will ignore SOURCE_DATE_EPOCH environment variable.
+      if (CCACHE_VERSION VERSION_GREATER_EQUAL "4.2")
+         message(STATUS "Use time_macros sloppiness for ccache")
+         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
+         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "${CCACHE_FOUND} --set-config=sloppiness=time_macros")
+      elseif (CCACHE_VERSION VERSION_GREATER_EQUAL "4.0")
         message(STATUS "Ignore SOURCE_DATE_EPOCH for ccache")
         set_property (GLOBAL PROPERTY RULE_LAUNCH_COMPILE "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
         set_property (GLOBAL PROPERTY RULE_LAUNCH_LINK "env -u SOURCE_DATE_EPOCH ${CCACHE_FOUND}")
--- a/cmake/find/rdkafka.cmake
+++ b/cmake/find/rdkafka.cmake
@ -1,5 +1,4 @@
-# Freebsd: contrib/cppkafka/include/cppkafka/detail/endianness.h:53:23: error: 'betoh16' was not declared in this scope
-if (NOT ARCH_ARM AND NOT OS_FREEBSD AND OPENSSL_FOUND)
+if (NOT ARCH_ARM AND OPENSSL_FOUND)
    option (ENABLE_RDKAFKA "Enable kafka" ${ENABLE_LIBRARIES})
 elseif(ENABLE_RDKAFKA AND NOT OPENSSL_FOUND)
    message (${RECONFIGURE_MESSAGE_LEVEL} "Can't use librdkafka without SSL")
--- a/cmake/toolchain/darwin-x86_64/README.txt
+++ b/cmake/toolchain/darwin-x86_64/README.txt
@ -1,2 +1,2 @@
-wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
-tar xJf MacOSX10.14.sdk.tar.xz --strip-components=1
+wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
+tar xJf MacOSX10.15.sdk.tar.xz --strip-components=1
--- a/contrib/libcxx
+++ b/contrib/libcxx
@ -1 +1 @@
-Subproject commit 95650a0db4399ee871d5fd698ad12384fe9fa964
+Subproject commit 8b80a151d12b98ffe2d0c22f7cec12c3b9ff88d7
--- a/contrib/libcxx-cmake/CMakeLists.txt
+++ b/contrib/libcxx-cmake/CMakeLists.txt
@ -5,6 +5,8 @@ set(LIBCXX_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libcxx)
 set(SRCS
 ${LIBCXX_SOURCE_DIR}/src/algorithm.cpp
 ${LIBCXX_SOURCE_DIR}/src/any.cpp
+${LIBCXX_SOURCE_DIR}/src/atomic.cpp
+${LIBCXX_SOURCE_DIR}/src/barrier.cpp
 ${LIBCXX_SOURCE_DIR}/src/bind.cpp
 ${LIBCXX_SOURCE_DIR}/src/charconv.cpp
 ${LIBCXX_SOURCE_DIR}/src/chrono.cpp
@ -20,6 +22,7 @@ ${LIBCXX_SOURCE_DIR}/src/functional.cpp
 ${LIBCXX_SOURCE_DIR}/src/future.cpp
 ${LIBCXX_SOURCE_DIR}/src/hash.cpp
 ${LIBCXX_SOURCE_DIR}/src/ios.cpp
+${LIBCXX_SOURCE_DIR}/src/ios.instantiations.cpp
 ${LIBCXX_SOURCE_DIR}/src/iostream.cpp
 ${LIBCXX_SOURCE_DIR}/src/locale.cpp
 ${LIBCXX_SOURCE_DIR}/src/memory.cpp
@ -28,6 +31,7 @@ ${LIBCXX_SOURCE_DIR}/src/mutex_destructor.cpp
 ${LIBCXX_SOURCE_DIR}/src/new.cpp
 ${LIBCXX_SOURCE_DIR}/src/optional.cpp
 ${LIBCXX_SOURCE_DIR}/src/random.cpp
+${LIBCXX_SOURCE_DIR}/src/random_shuffle.cpp
 ${LIBCXX_SOURCE_DIR}/src/regex.cpp
 ${LIBCXX_SOURCE_DIR}/src/shared_mutex.cpp
 ${LIBCXX_SOURCE_DIR}/src/stdexcept.cpp
--- a/contrib/libcxxabi
+++ b/contrib/libcxxabi
@ -1 +1 @@
-Subproject commit 1ebc83af4c06dbcd56b4d166c1314a7d4c1173f9
+Subproject commit df8f1e727dbc9e2bedf2282096fa189dc3fe0076
--- a/contrib/libcxxabi-cmake/CMakeLists.txt
+++ b/contrib/libcxxabi-cmake/CMakeLists.txt
@ -11,7 +11,6 @@ ${LIBCXXABI_SOURCE_DIR}/src/cxa_personality.cpp
 ${LIBCXXABI_SOURCE_DIR}/src/stdlib_exception.cpp
 ${LIBCXXABI_SOURCE_DIR}/src/abort_message.cpp
 ${LIBCXXABI_SOURCE_DIR}/src/cxa_demangle.cpp
-${LIBCXXABI_SOURCE_DIR}/src/cxa_unexpected.cpp
 ${LIBCXXABI_SOURCE_DIR}/src/cxa_exception.cpp
 ${LIBCXXABI_SOURCE_DIR}/src/cxa_handlers.cpp
 ${LIBCXXABI_SOURCE_DIR}/src/cxa_exception_storage.cpp
--- a/contrib/librdkafka-cmake/config.h.in
+++ b/contrib/librdkafka-cmake/config.h.in
@ -83,7 +83,8 @@
 #if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ <= 101400)
 #define _TTHREAD_EMULATE_TIMESPEC_GET_
 #endif
-
+#elif defined(__FreeBSD__)
+#define HAVE_PTHREAD_SETNAME_FREEBSD 1
 #else
 // pthread_setname_gnu
 #define HAVE_PTHREAD_SETNAME_GNU 1
--- a/debian/changelog
+++ b/debian/changelog
@ -1,5 +1,5 @@
-clickhouse (20.13.1.1) unstable; urgency=low
+clickhouse (21.1.0) unstable; urgency=low

  * Modified source code

- -- clickhouse-release <clickhouse-release@yandex-team.ru>  Mon, 23 Nov 2020 10:29:24 +0300
+ -- Alexey Milovidov <milovidov@yandex-team.ru>  Mon, 11 Jan 2021 03:51:08 +0300
--- a/docker/client/Dockerfile
+++ b/docker/client/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.13.1.*
+ARG version=21.1.0

 RUN apt-get update \
    && apt-get install --yes --no-install-recommends \
--- a/docker/images.json
+++ b/docker/images.json
@ -45,7 +45,8 @@
        "name": "yandex/clickhouse-stateless-test",
        "dependent": [
            "docker/test/stateful",
-            "docker/test/coverage"
+            "docker/test/coverage",
+            "docker/test/unit"
        ]
    },
    "docker/test/stateless_pytest": {
@ -134,7 +135,9 @@
         "name": "yandex/clickhouse-test-base",
         "dependent": [
            "docker/test/stateless",
-            "docker/test/stateless_pytest"
+            "docker/test/stateless_unbundled",
+            "docker/test/stateless_pytest",
+            "docker/test/integration/base"
         ]
    },
    "docker/packager/unbundled": {
@ -151,5 +154,9 @@
    "docker/test/integration/kerberized_hadoop": {
        "name": "yandex/clickhouse-kerberized-hadoop",
        "dependent": []
+    },
+    "docker/test/sqlancer": {
+        "name": "yandex/clickhouse-sqlancer-test",
+        "dependent": []
    }
 }
--- a/docker/packager/binary/Dockerfile
+++ b/docker/packager/binary/Dockerfile
@ -82,7 +82,7 @@ RUN git clone https://github.com/tpoechtrager/cctools-port.git \
    && rm -rf cctools-port

 # Download toolchain for Darwin
-RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
+RUN wget -nv https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz

 # Download toolchain for ARM
 # It contains all required headers and libraries. Note that it's named as "gcc" but actually we are using clang for cross compiling.
--- a/docker/packager/binary/build.sh
+++ b/docker/packager/binary/build.sh
@ -3,7 +3,7 @@
 set -x -e

 mkdir -p build/cmake/toolchain/darwin-x86_64
-tar xJf MacOSX10.14.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1
+tar xJf MacOSX10.15.sdk.tar.xz -C build/cmake/toolchain/darwin-x86_64 --strip-components=1

 mkdir -p build/cmake/toolchain/linux-aarch64
 tar xJf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C build/cmake/toolchain/linux-aarch64 --strip-components=1
--- a/docker/server/.dockerignore
+++ b/docker/server/.dockerignore
@ -4,5 +4,5 @@ alpine-root/install/*
 # docs (looks useless)
 alpine-root/usr/share/doc/*

-# packages, etc. (used by prepare.sh)
-alpine-root/tgz-packages/*
+# packages, etc. (used by alpine-build.sh)
+tgz-packages/*
--- a/docker/server/.gitignore
+++ b/docker/server/.gitignore
@ -1 +1,2 @@
-alpine-root/*
+alpine-root/*
+tgz-packages/*
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:20.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.13.1.*
+ARG version=21.1.0
 ARG gosu_ver=1.10

 RUN apt-get update \
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -16,7 +16,7 @@ RUN addgroup clickhouse \
    && chown root:clickhouse /var/log/clickhouse-server \
    && chmod 775 /var/log/clickhouse-server \
    && chmod +x /entrypoint.sh \
-    && apk add --no-cache su-exec
+    && apk add --no-cache su-exec bash

 EXPOSE 9000 8123 9009

--- a/docker/server/alpine-build.sh
+++ b/docker/server/alpine-build.sh
@ -4,6 +4,7 @@ set -x
 REPO_CHANNEL="${REPO_CHANNEL:-stable}" # lts / testing / prestable / etc
 REPO_URL="${REPO_URL:-"https://repo.yandex.ru/clickhouse/tgz/${REPO_CHANNEL}"}"
 VERSION="${VERSION:-20.9.3.45}"
+DOCKER_IMAGE="${DOCKER_IMAGE:-yandex/clickhouse-server}"

 # where original files live
 DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
@ -11,12 +12,12 @@ DOCKER_BUILD_FOLDER="${BASH_SOURCE%/*}"
 # we will create root for our image here
 CONTAINER_ROOT_FOLDER="${DOCKER_BUILD_FOLDER}/alpine-root"

-# where to put downloaded tgz
-TGZ_PACKAGES_FOLDER="${CONTAINER_ROOT_FOLDER}/tgz-packages"
-
-# clean up the root from old runs
+# clean up the root from old runs, it's reconstructed each time
 rm -rf "$CONTAINER_ROOT_FOLDER"
+mkdir -p "$CONTAINER_ROOT_FOLDER"

+# where to put downloaded tgz
+TGZ_PACKAGES_FOLDER="${DOCKER_BUILD_FOLDER}/tgz-packages"
 mkdir -p "$TGZ_PACKAGES_FOLDER"

 PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
@ -24,7 +25,7 @@ PACKAGES=( "clickhouse-client" "clickhouse-server" "clickhouse-common-static" )
 # download tars from the repo
 for package in "${PACKAGES[@]}"
 do
-    wget -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
+    wget -c -q --show-progress "${REPO_URL}/${package}-${VERSION}.tgz" -O "${TGZ_PACKAGES_FOLDER}/${package}-${VERSION}.tgz"
 done

 # unpack tars
@ -42,7 +43,7 @@ mkdir -p "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/users.d" \
         "${CONTAINER_ROOT_FOLDER}/lib64"

 cp "${DOCKER_BUILD_FOLDER}/docker_related_config.xml" "${CONTAINER_ROOT_FOLDER}/etc/clickhouse-server/config.d/"
-cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh"      "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"
+cp "${DOCKER_BUILD_FOLDER}/entrypoint.sh"             "${CONTAINER_ROOT_FOLDER}/entrypoint.sh"

 ## get glibc components from ubuntu 20.04 and put them to expected place
 docker pull ubuntu:20.04
@ -56,4 +57,5 @@ docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAIN
 docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
 docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"

-docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
+docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "${DOCKER_IMAGE}:${VERSION}-alpine" --pull
+rm -rf "$CONTAINER_ROOT_FOLDER"
--- a/docker/server/entrypoint.alpine.sh
+++ b/docker/server/entrypoint.alpine.sh
@ -1,152 +0,0 @@
-#!/bin/sh
-#set -x
-
-DO_CHOWN=1
-if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
-    DO_CHOWN=0
-fi
-
-CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
-CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"
-
-# support --user
-if [ "$(id -u)" = "0" ]; then
-    USER=$CLICKHOUSE_UID
-    GROUP=$CLICKHOUSE_GID
-    # busybox has setuidgid & chpst buildin
-    gosu="su-exec $USER:$GROUP"
-else
-    USER="$(id -u)"
-    GROUP="$(id -g)"
-    gosu=""
-    DO_CHOWN=0
-fi
-
-# set some vars
-CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
-
-# port is needed to check if clickhouse-server is ready for connections
-HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
-
-# get CH directories locations
-DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
-TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
-USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
-LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
-LOG_DIR="$(dirname "${LOG_PATH}" || true)"
-ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
-ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
-FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
-
-CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
-CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
-CLICKHOUSE_DB="${CLICKHOUSE_DB:-}"
-
-for dir in "$DATA_DIR" \
-  "$ERROR_LOG_DIR" \
-  "$LOG_DIR" \
-  "$TMP_DIR" \
-  "$USER_PATH" \
-  "$FORMAT_SCHEMA_PATH"
-do
-    # check if variable not empty
-    [ -z "$dir" ] && continue
-    # ensure directories exist
-    if ! mkdir -p "$dir"; then
-        echo "Couldn't create necessary directory: $dir"
-        exit 1
-    fi
-
-    if [ "$DO_CHOWN" = "1" ]; then
-        # ensure proper directories permissions
-        chown -R "$USER:$GROUP" "$dir"
-    elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
-        echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
-        exit 1
-    fi
-done
-
-# if clickhouse user is defined - create it (user "default" already exists out of box)
-if [ -n "$CLICKHOUSE_USER" ] && [ "$CLICKHOUSE_USER" != "default" ] || [ -n "$CLICKHOUSE_PASSWORD" ]; then
-    echo "$0: create new user '$CLICKHOUSE_USER' instead 'default'"
-    cat <<EOT > /etc/clickhouse-server/users.d/default-user.xml
-    <yandex>
-      <!-- Docs: <https://clickhouse.tech/docs/en/operations/settings/settings_users/> -->
-      <users>
-        <!-- Remove default user -->
-        <default remove="remove">
-        </default>
-
-        <${CLICKHOUSE_USER}>
-          <profile>default</profile>
-          <networks>
-            <ip>::/0</ip>
-          </networks>
-          <password>${CLICKHOUSE_PASSWORD}</password>
-          <quota>default</quota>
-        </${CLICKHOUSE_USER}>
-      </users>
-    </yandex>
-EOT
-fi
-
-if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
-    # Listen only on localhost until the initialization is done
-    $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
-    pid="$!"
-
-    # check if clickhouse is ready to accept connections
-    # will try to send ping clickhouse via http_port (max 6 retries, with 1 sec timeout and 1 sec delay between retries)
-    tries=6
-    while ! wget --spider -T 1 -q "http://localhost:$HTTP_PORT/ping" 2>/dev/null; do
-        if [ "$tries" -le "0" ]; then
-            echo >&2 'ClickHouse init process failed.'
-            exit 1
-        fi
-        tries=$(( tries-1 ))
-        sleep 1
-    done
-
-    if [ -n "$CLICKHOUSE_PASSWORD" ]; then
-        printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
-    fi
-
-    clickhouseclient="clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD "
-
-    # create default database, if defined
-    if [ -n "$CLICKHOUSE_DB" ]; then
-        echo "$0: create database '$CLICKHOUSE_DB'"
-        "$clickhouseclient" -q "CREATE DATABASE IF NOT EXISTS $CLICKHOUSE_DB";
-    fi
-
-    for f in /docker-entrypoint-initdb.d/*; do
-        case "$f" in
-            *.sh)
-                if [ -x "$f" ]; then
-                    echo "$0: running $f"
-                    "$f"
-                else
-                    echo "$0: sourcing $f"
-                    . "$f"
-                fi
-                ;;
-            *.sql)    echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
-            *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
-            *)        echo "$0: ignoring $f" ;;
-        esac
-        echo
-    done
-
-    if ! kill -s TERM "$pid" || ! wait "$pid"; then
-        echo >&2 'Finishing of ClickHouse init process failed.'
-        exit 1
-    fi
-fi
-
-# if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
-if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
-    exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
-fi
-
-# Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
-exec "$@"
--- a/docker/server/entrypoint.sh
+++ b/docker/server/entrypoint.sh
@ -1,7 +1,10 @@
 #!/bin/bash

+set -eo pipefail
+shopt -s nullglob
+
 DO_CHOWN=1
-if [ "$CLICKHOUSE_DO_NOT_CHOWN" = 1 ]; then
+if [ "${CLICKHOUSE_DO_NOT_CHOWN:-0}" = "1" ]; then
    DO_CHOWN=0
 fi

@ -9,10 +12,17 @@ CLICKHOUSE_UID="${CLICKHOUSE_UID:-"$(id -u clickhouse)"}"
 CLICKHOUSE_GID="${CLICKHOUSE_GID:-"$(id -g clickhouse)"}"

 # support --user
-if [ x"$UID" == x0 ]; then
+if [ "$(id -u)" = "0" ]; then
    USER=$CLICKHOUSE_UID
    GROUP=$CLICKHOUSE_GID
-    gosu="gosu $USER:$GROUP"
+    if command -v gosu &> /dev/null; then
+        gosu="gosu $USER:$GROUP"
+    elif command -v su-exec &> /dev/null; then
+        gosu="su-exec $USER:$GROUP"
+    else
+        echo "No gosu/su-exec detected!"
+        exit 1
+    fi
 else
    USER="$(id -u)"
    GROUP="$(id -g)"
@ -23,18 +33,23 @@ fi
 # set some vars
 CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"

+if ! $gosu test -f "$CLICKHOUSE_CONFIG" -a -r "$CLICKHOUSE_CONFIG"; then
+    echo "Configuration file '$dir' isn't readable by user with id '$USER'"
+    exit 1
+fi
+
 # port is needed to check if clickhouse-server is ready for connections
-HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
+HTTP_PORT="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=http_port)"

 # get CH directories locations
-DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
-TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
-USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
-LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
-LOG_DIR="$(dirname $LOG_PATH || true)"
-ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
-ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
-FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
+DATA_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=path || true)"
+TMP_DIR="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=tmp_path || true)"
+USER_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=user_files_path || true)"
+LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.log || true)"
+LOG_DIR="$(dirname "$LOG_PATH" || true)"
+ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=logger.errorlog || true)"
+ERROR_LOG_DIR="$(dirname "$ERROR_LOG_PATH" || true)"
+FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "$CLICKHOUSE_CONFIG" --key=format_schema_path || true)"

 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
 CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -58,8 +73,8 @@ do
    if [ "$DO_CHOWN" = "1" ]; then
        # ensure proper directories permissions
        chown -R "$USER:$GROUP" "$dir"
-    elif [ "$(stat -c %u "$dir")" != "$USER" ]; then
-        echo "Necessary directory '$dir' isn't owned by user with id '$USER'"
+    elif ! $gosu test -d "$dir" -a -w "$dir" -a -r "$dir"; then
+        echo "Necessary directory '$dir' isn't accessible by user with id '$USER'"
        exit 1
    fi
 done
@ -90,21 +105,22 @@ fi

 if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
    # Listen only on localhost until the initialization is done
-    $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
+    $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" -- --listen_host=127.0.0.1 &
    pid="$!"

    # check if clickhouse is ready to accept connections
-    # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec delay)
-    if ! wget --spider --quiet --prefer-family=IPv6 --tries="${CLICKHOUSE_INIT_TIMEOUT:-12}" --waitretry=1 --retry-connrefused "http://localhost:$HTTP_PORT/ping" ; then
-        echo >&2 'ClickHouse init process failed.'
-        exit 1
-    fi
+    # will try to send ping clickhouse via http_port (max 12 retries by default, with 1 sec timeout and 1 sec delay between retries)
+    tries=${CLICKHOUSE_INIT_TIMEOUT:-12}
+    while ! wget --spider -T 1 -q "http://127.0.0.1:$HTTP_PORT/ping" 2>/dev/null; do
+        if [ "$tries" -le "0" ]; then
+            echo >&2 'ClickHouse init process failed.'
+            exit 1
+        fi
+        tries=$(( tries-1 ))
+        sleep 1
+    done

-    if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
-        printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
-    fi
-
-    clickhouseclient=( clickhouse-client --multiquery -u $CLICKHOUSE_USER $WITH_PASSWORD )
+    clickhouseclient=( clickhouse-client --multiquery -u "$CLICKHOUSE_USER" --password "$CLICKHOUSE_PASSWORD" )

    echo

@ -122,10 +138,11 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
                    "$f"
                else
                    echo "$0: sourcing $f"
+                    # shellcheck source=/dev/null
                    . "$f"
                fi
                ;;
-            *.sql)    echo "$0: running $f"; cat "$f" | "${clickhouseclient[@]}" ; echo ;;
+            *.sql)    echo "$0: running $f"; "${clickhouseclient[@]}" < "$f" ; echo ;;
            *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "${clickhouseclient[@]}"; echo ;;
            *)        echo "$0: ignoring $f" ;;
        esac
@ -140,7 +157,7 @@ fi

 # if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
 if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
-    exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
+    exec $gosu /usr/bin/clickhouse-server --config-file="$CLICKHOUSE_CONFIG" "$@"
 fi

 # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
--- a/docker/test/Dockerfile
+++ b/docker/test/Dockerfile
@ -1,7 +1,7 @@
 FROM ubuntu:18.04

 ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
-ARG version=20.13.1.*
+ARG version=21.1.0

 RUN apt-get update && \
    apt-get install -y apt-transport-https dirmngr && \
--- a/docker/test/fasttest/run.sh
+++ b/docker/test/fasttest/run.sh
@ -329,6 +329,7 @@ function run_tests

        # nc - command not found
        01601_proxy_protocol
+        01622_defaults_for_url_engine
    )

    time clickhouse-test -j 8 --order=random --no-long --testname --shard --zookeeper --skip "${TESTS_TO_SKIP[@]}" -- "$FASTTEST_FOCUS" 2>&1 | ts '%Y-%m-%d %H:%M:%S' | tee "$FASTTEST_OUTPUT/test_log.txt"
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -72,7 +72,7 @@ function watchdog

 function fuzz
 {
-    ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -10000 > server.log &
+    ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
    server_pid=$!
    kill -0 $server_pid
    while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
@ -85,7 +85,7 @@ function fuzz
    # SC2046: Quote this to prevent word splitting. Actually I need word splitting.
    # shellcheck disable=SC2012,SC2046
    ./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
-        > >(tail -n 10000 > fuzzer.log) \
+        > >(tail -n 100000 > fuzzer.log) \
        2>&1 \
        || fuzzer_exit_code=$?

--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@ -30,3 +30,4 @@ RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-od

 ENV TZ=Europe/Moscow
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@ -0,0 +1,13 @@
+# docker build -t yandex/clickhouse-sqlancer-test .
+FROM ubuntu:20.04
+
+RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git openjdk-14-jdk maven --yes --no-install-recommends
+
+RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
+RUN mkdir /sqlancer && \
+	cd /sqlancer && \
+	unzip /sqlancer.zip
+RUN cd /sqlancer/sqlancer-master && mvn package -DskipTests
+
+COPY run.sh /
+CMD ["/bin/bash", "/run.sh"]
--- a/docker/test/sqlancer/run.sh
+++ b/docker/test/sqlancer/run.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -e -x
+
+dpkg -i package_folder/clickhouse-common-static_*.deb
+dpkg -i package_folder/clickhouse-common-static-dbg_*.deb
+dpkg -i package_folder/clickhouse-server_*.deb
+dpkg -i package_folder/clickhouse-client_*.deb
+
+service clickhouse-server start && sleep 5
+
+cd /sqlancer/sqlancer-master
+CLICKHOUSE_AVAILABLE=true mvn -Dtest=TestClickHouse test
+
+cp /sqlancer/sqlancer-master/target/surefire-reports/TEST-sqlancer.dbms.TestClickHouse.xml /test_output/result.xml
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -66,3 +66,6 @@ function run_tests()
 export -f run_tests

 timeout "$MAX_RUN_TIME" bash -c run_tests ||:
+
+tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
+tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
--- a/docker/test/stateless_unbundled/Dockerfile
+++ b/docker/test/stateless_unbundled/Dockerfile
@ -86,3 +86,4 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

 COPY run.sh /
 CMD ["/bin/bash", "/run.sh"]
+
--- a/docker/test/unit/Dockerfile
+++ b/docker/test/unit/Dockerfile
@ -7,3 +7,4 @@ RUN apt-get install gdb

 CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \
    gdb -q  -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
+
--- a/docs/en/development/build-cross-osx.md
+++ b/docs/en/development/build-cross-osx.md
@ -42,9 +42,9 @@ Also, we need to download macOS X SDK into the working tree.

 ``` bash
 cd ClickHouse
-wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
+wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
-tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
+tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
 ```

 ## Build ClickHouse {#build-clickhouse}
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -98,7 +98,9 @@ For a description of parameters, see the [CREATE query description](../../../sql
    -   `merge_max_block_size` — Maximum number of rows in block for merge operations. Default value: 8192.
    -   `storage_policy` — Storage policy. See [Using Multiple Block Devices for Data Storage](#table_engine-mergetree-multiple-volumes).
    -   `min_bytes_for_wide_part`, `min_rows_for_wide_part` — Minimum number of bytes/rows in a data part that can be stored in `Wide` format. You can set one, both or none of these settings. See [Data Storage](#mergetree-data-storage).
-    -   `max_parts_in_total` — Maximum number of parts in all partitions. 
+    -   `max_parts_in_total` — Maximum number of parts in all partitions.
+	-   `max_compress_block_size` — Maximum size of blocks of uncompressed data before compressing for writing to a table. You can also specify this setting in the global settings (see [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.
+	-   `min_compress_block_size` — Minimum size of blocks of uncompressed data required for compression when writing the next mark. You can also specify this setting in the global settings (see [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size) setting). The value specified when table is created overrides the global value for this setting.

 **Example of Sections Setting**

--- a/docs/en/engines/table-engines/special/distributed.md
+++ b/docs/en/engines/table-engines/special/distributed.md
@ -25,10 +25,27 @@ The Distributed engine accepts parameters:
    -   [insert_distributed_sync](../../../operations/settings/settings.md#insert_distributed_sync) setting
    -   [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md#table_engine-mergetree-multiple-volumes) for the examples

+Also it accept the following settings:
+
+- `fsync_after_insert` - do the `fsync` for the file data after asynchronous insert to Distributed. Guarantees that the OS flushed the whole inserted data to a file **on the initiator node** disk.
+
+- `fsync_directories` - do the `fsync` for directories. Guarantees that the OS refreshed directory metadata after operations related to asynchronous inserts on Distributed table (after insert, after sending the data to shard, etc).
+
+!!! note "Note"
+
+    **Durability settings** (`fsync_...`):
+
+    - Affect only asynchronous INSERTs (i.e. `insert_distributed_sync=false`) when data first stored on the initiator node disk and later asynchronously send to shards.
+    - May significantly decrease the inserts' performance
+    - Affect writing the data stored inside Distributed table folder into the **node which accepted your insert**. If you need to have guarantees of writing data to underlying MergeTree tables - see durability settings (`...fsync...`) in `system.merge_tree_settings`
+
 Example:

 ``` sql
 Distributed(logs, default, hits[, sharding_key[, policy_name]])
+SETTINGS
+    fsync_after_insert=0,
+    fsync_directories=0;
 ```

 Data will be read from all servers in the `logs` cluster, from the default.hits table located on every server in the cluster.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -844,23 +844,27 @@ Higher values will lead to higher memory usage.

 ## max_compress_block_size {#max-compress-block-size}

-The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). If the size is reduced, the compression rate is significantly reduced, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced. There usually isn’t any reason to change this setting.
+The maximum size of blocks of uncompressed data before compressing for writing to a table. By default, 1,048,576 (1 MiB). Specifying smaller block size generally leads to slightly reduced compression ratio, the compression and decompression speed increases slightly due to cache locality, and memory consumption is reduced.
+
+!!! note "Warning"
+    This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.

 Don’t confuse blocks for compression (a chunk of memory consisting of bytes) with blocks for query processing (a set of rows from a table).

 ## min_compress_block_size {#min-compress-block-size}

-For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md)" tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least ‘min_compress_block_size’. By default, 65,536.
+For [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. In order to reduce latency when processing queries, a block is compressed when writing the next mark if its size is at least `min_compress_block_size`. By default, 65,536.

-The actual size of the block, if the uncompressed data is less than ‘max_compress_block_size’, is no less than this value and no less than the volume of data for one mark.
+The actual size of the block, if the uncompressed data is less than `max_compress_block_size`, is no less than this value and no less than the volume of data for one mark.

-Let’s look at an example. Assume that ‘index_granularity’ was set to 8192 during table creation.
+Let’s look at an example. Assume that `index_granularity` was set to 8192 during table creation.

 We are writing a UInt32-type column (4 bytes per value). When writing 8192 rows, the total will be 32 KB of data. Since min_compress_block_size = 65,536, a compressed block will be formed for every two marks.

 We are writing a URL column with the String type (average size of 60 bytes per value). When writing 8192 rows, the average will be slightly less than 500 KB of data. Since this is more than 65,536, a compressed block will be formed for each mark. In this case, when reading data from the disk in the range of a single mark, extra data won’t be decompressed.

-There usually isn’t any reason to change this setting.
+!!! note "Warning"
+    This is an expert-level setting, and you shouldn't change it if you're just getting started with Clickhouse.

 ## max_query_size {#settings-max_query_size}

@ -2470,6 +2474,45 @@ Possible values:

 Default value: `0`.

+
+## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
+
+Enables or disables rewriting all aggregate functions in a query, adding [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull) suffix to them. Enable it for SQL standard compatibility.
+It is implemented via query rewrite (similar to [count_distinct_implementation](#settings-count_distinct_implementation) setting) to get consistent results for distributed queries. 
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 0.
+
+**Example**
+
+Consider the following query with aggregate functions:
+```sql
+SELECT
+    SUM(-1),
+    MAX(0)
+FROM system.one
+WHERE 0
+```
+
+With `aggregate_functions_null_for_empty = 0` it would produce:
+```text
+┌─SUM(-1)─┬─MAX(0)─┐
+│       0 │      0 │
+└─────────┴────────┘
+```
+
+With `aggregate_functions_null_for_empty = 1` the result would be:
+```text
+┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
+│          NULL │         NULL │
+└───────────────┴──────────────┘
+```
+
+
 ## union_default_mode {#union-default-mode}

 Sets a mode for combining `SELECT` query results. The setting is only used when shared with [UNION](../../sql-reference/statements/select/union.md) without explicitly specifying the `UNION ALL` or `UNION DISTINCT`.
@ -2484,6 +2527,7 @@ Default value: `''`.

 See examples in [UNION](../../sql-reference/statements/select/union.md).

+
 ## data_type_default_nullable {#data_type_default_nullable}

 Allows data types without explicit modifiers [NULL or NOT NULL](../../sql-reference/statements/create/table.md#null-modifiers) in column definition will be [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable).
@ -2495,6 +2539,7 @@ Possible values:

 Default value: `0`.

+
 ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}

 Enables special logic to perform merges on replicas.
--- a/docs/en/operations/system-tables/distributed_ddl_queue.md
+++ b/docs/en/operations/system-tables/distributed_ddl_queue.md
@ -0,0 +1,67 @@
+# system.distributed_ddl_queue {#system_tables-distributed_ddl_queue}
+
+Contains information about distributed ddl queries (ON CLUSTER queries) that were executed on a cluster.
+
+Columns:
+
+-   `entry`  ([String](../../sql-reference/data-types/string.md)) - Query id.
+-   `host_name`  ([String](../../sql-reference/data-types/string.md)) - Hostname.
+-   `host_address`  ([String](../../sql-reference/data-types/string.md)) - IP address that the Hostname resolves to.
+-   `port`  ([UInt16](../../sql-reference/data-types/int-uint.md)) - Host Port.
+-   `status`  ([Enum](../../sql-reference/data-types/enum.md)) - Stats of the query.
+-   `cluster`  ([String](../../sql-reference/data-types/string.md)) - Cluster name.
+-   `query`  ([String](../../sql-reference/data-types/string.md)) - Query executed.
+-   `initiator`  ([String](../../sql-reference/data-types/string.md)) - Nod that executed the query.
+-   `query_start_time` ([Date](../../sql-reference/data-types/date.md)) — Query start time.
+-   `query_finish_time` ([Date](../../sql-reference/data-types/date.md)) — Query finish time.
+-   `query_duration_ms` ([UInt64](../../sql-reference/data-types/datetime64.md)) — Duration of query execution in milliseconds.
+-   `exception_code`  ([Enum](../../sql-reference/data-types/enum.md)) - Exception code from ZooKeeper.
+
+
+**Example**
+
+``` sql
+SELECT *
+FROM system.distributed_ddl_queue
+WHERE cluster = 'test_cluster'
+LIMIT 2
+FORMAT Vertical
+
+Query id: f544e72a-6641-43f1-836b-24baa1c9632a
+
+Row 1:
+──────
+entry:             query-0000000000
+host_name:         clickhouse01
+host_address:      172.23.0.11
+port:              9000
+status:            Finished
+cluster:           test_cluster
+query:             CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
+initiator:         clickhouse01:9000
+query_start_time:  2020-12-30 13:07:51
+query_finish_time: 2020-12-30 13:07:51
+query_duration_ms: 6
+exception_code:    ZOK
+
+Row 2:
+──────
+entry:             query-0000000000
+host_name:         clickhouse02
+host_address:      172.23.0.12
+port:              9000
+status:            Finished
+cluster:           test_cluster
+query:             CREATE DATABASE test_db UUID '4a82697e-c85e-4e5b-a01e-a36f2a758456' ON CLUSTER test_cluster
+initiator:         clickhouse01:9000
+query_start_time:  2020-12-30 13:07:51
+query_finish_time: 2020-12-30 13:07:51
+query_duration_ms: 6
+exception_code:    ZOK
+
+2 rows in set. Elapsed: 0.025 sec.
+```
+
+
+[Original article](https://clickhouse.tech/docs/en/operations/system_tables/distributed_ddl_queuedistributed_ddl_queue.md) <!--hide-->
+ 
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
@ -55,10 +55,10 @@ In this case, ClickHouse can reload the dictionary earlier if the dictionary con
 When upgrading the dictionaries, the ClickHouse server applies different logic depending on the type of [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md):

 -   For a text file, it checks the time of modification. If the time differs from the previously recorded time, the dictionary is updated.
-   For MyISAM tables, the time of modification is checked using a `SHOW TABLE STATUS` query.
+-   For MySQL source, the time of modification is checked using a `SHOW TABLE STATUS` query (in case of MySQL 8 you need to disable meta-information caching in MySQL by `set global information_schema_stats_expiry=0`. 
 -   Dictionaries from other sources are updated every time by default.

-For MySQL (InnoDB), ODBC and ClickHouse sources, you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:
+For other sources (ODBC, ClickHouse, etc), you can set up a query that will update the dictionaries only if they really changed, rather than each time. To do this, follow these steps:

 -   The dictionary table must have a field that always changes when the source data is updated.
 -   The settings of the source must specify a query that retrieves the changing field. The ClickHouse server interprets the query result as a row, and if this row has changed relative to its previous state, the dictionary is updated. Specify the query in the `<invalidate_query>` field in the settings for the [source](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md).
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@ -583,7 +583,7 @@ Example of settings:
 or

 ``` sql
-SOURCE(MONGO(
+SOURCE(MONGODB(
    host 'localhost'
    port 27017
    user ''
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -23,6 +23,7 @@ The following actions are supported:
 -   [CLEAR COLUMN](#alter_clear-column) — Resets column values.
 -   [COMMENT COLUMN](#alter_comment-column) — Adds a text comment to the column.
 -   [MODIFY COLUMN](#alter_modify-column) — Changes column’s type, default expression and TTL.
+-   [MODIFY COLUMN REMOVE](#modify-remove) — Removes one of the column properties.

 These actions are described in detail below.

@ -145,6 +146,26 @@ The `ALTER` query is atomic. For MergeTree tables it is also lock-free.

 The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.

+## MODIFY COLUMN REMOVE {#modify-remove}
+
+Removes one of the column properties: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
+
+Syntax:
+
+```sql
+ALTER TABLE table_name MODIFY column_name REMOVE property;
+```
+
+**Example**
+
+```sql
+ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
+```
+
+## See Also
+
+- [REMOVE TTL](ttl.md).
+
 ## Limitations {#alter-query-limitations}

 The `ALTER` query lets you create and delete separate elements (columns) in nested data structures, but not whole nested data structures. To add a nested data structure, you can add columns with a name like `name.nested_name` and the type `Array(T)`. A nested data structure is equivalent to multiple array columns with a name that has the same prefix before the dot.
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -286,7 +286,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
 You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:

 -   As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
-   As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
+-   As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
 -   Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
 -   In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

--- a/docs/en/sql-reference/statements/alter/ttl.md
+++ b/docs/en/sql-reference/statements/alter/ttl.md
@ -3,10 +3,83 @@ toc_priority: 44
 toc_title: TTL
 ---

-### Manipulations with Table TTL {#manipulations-with-table-ttl}
+# Manipulations with Table TTL {#manipulations-with-table-ttl}
+
+## MODIFY TTL {#modify-ttl}

 You can change [table TTL](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl) with a request of the following form:

 ``` sql
-ALTER TABLE table-name MODIFY TTL ttl-expression
+ALTER TABLE table_name MODIFY TTL ttl_expression;
 ```
+
+## REMOVE TTL {#remove-ttl}
+
+TTL-property can be removed from table with the following query:
+
+```sql
+ALTER TABLE table_name REMOVE TTL 
+```
+
+**Example**
+
+Consider the table with table `TTL`:
+
+```sql
+CREATE TABLE table_with_ttl
+(
+    event_time DateTime,
+    UserID UInt64,
+    Comment String
+)
+ENGINE MergeTree()
+ORDER BY tuple()
+TTL event_time + INTERVAL 3 MONTH;
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
+
+INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
+```
+
+Run `OPTIMIZE` to force `TTL` cleanup:
+
+```sql
+OPTIMIZE TABLE table_with_ttl FINAL;
+SELECT * FROM table_with_ttl FORMAT PrettyCompact;
+```
+Second row was deleted from table.
+
+```text
+┌─────────event_time────┬──UserID─┬─────Comment──┐
+│   2020-12-11 12:44:57 │       1 │    username1 │
+└───────────────────────┴─────────┴──────────────┘
+```
+
+Now remove table `TTL` with the following query:
+
+```sql
+ALTER TABLE table_with_ttl REMOVE TTL;
+```
+
+Re-insert the deleted row and force the `TTL` cleanup again with `OPTIMIZE`:
+
+```sql
+INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
+OPTIMIZE TABLE table_with_ttl FINAL;
+SELECT * FROM table_with_ttl FORMAT PrettyCompact;
+```
+
+The `TTL` is no longer there, so the second row is not deleted:
+
+```text
+┌─────────event_time────┬──UserID─┬─────Comment──┐
+│   2020-12-11 12:44:57 │       1 │    username1 │
+│   2020-08-11 12:44:57 │       2 │    username2 │
+└───────────────────────┴─────────┴──────────────┘
+```
+
+### See Also
+
+- More about the [TTL-expression](../../../sql-reference/statements/create/table#ttl-expression).
+- Modify column [with TTL](../../../sql-reference/statements/alter/column#alter_modify-column).
--- a/docs/en/sql-reference/statements/insert-into.md
+++ b/docs/en/sql-reference/statements/insert-into.md
@ -13,9 +13,7 @@ Basic query format:
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```

-You can specify a list of columns to insert using  the `(c1, c2, c3)` or `COLUMNS(c1,c2,c3)` syntax. 
-
-Instead of listing all the required columns you can use the `(* EXCEPT(column_list))` syntax.
+You can specify a list of columns to insert using  the `(c1, c2, c3)`. You can also use an expression with column [matcher](../../sql-reference/statements/select/index.md#asterisk) such as `*` and/or [modifiers](../../sql-reference/statements/select/index.md#select-modifiers) such as [APPLY](../../sql-reference/statements/select/index.md#apply-modifier), [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier), [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier). 

 For example, consider the table:

@ -23,9 +21,8 @@ For example, consider the table:
 SHOW CREATE insert_select_testtable;
 ```

-```
-┌─statement────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
-│ CREATE TABLE insert_select_testtable
+```text
+CREATE TABLE insert_select_testtable
 (
    `a` Int8,
    `b` String,
@ -33,8 +30,7 @@ SHOW CREATE insert_select_testtable;
 )
 ENGINE = MergeTree()
 ORDER BY a
-SETTINGS index_granularity = 8192 │
-└──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+SETTINGS index_granularity = 8192 
 ```

 ``` sql
--- a/docs/en/sql-reference/statements/select/all.md
+++ b/docs/en/sql-reference/statements/select/all.md
@ -0,0 +1,21 @@
+---
+toc_title: ALL
+---
+
+# ALL Clause {#select-all}
+
+`SELECT ALL` is identical to `SELECT` without `DISTINCT`.
+
+- If `ALL` specified, ignore it.
+- If both `ALL` and `DISTINCT` specified, exception will be thrown.
+
+`ALL` can also be specified inside aggregate function with the same effect(noop), for instance:
+
+```sql
+SELECT sum(ALL number) FROM numbers(10);
+```
+equals to
+
+```sql
+SELECT sum(number) FROM numbers(10);
+```
--- a/docs/en/sql-reference/statements/select/distinct.md
+++ b/docs/en/sql-reference/statements/select/distinct.md
@ -18,10 +18,6 @@ It is possible to obtain the same result by applying [GROUP BY](../../../sql-ref
 -   When [ORDER BY](../../../sql-reference/statements/select/order-by.md) is omitted and [LIMIT](../../../sql-reference/statements/select/limit.md) is defined, the query stops running immediately after the required number of different rows has been read.
 -   Data blocks are output as they are processed, without waiting for the entire query to finish running.

-## Limitations {#limitations}
-
-`DISTINCT` is not supported if `SELECT` has at least one array column.
-
 ## Examples {#examples}

 ClickHouse supports using the `DISTINCT` and `ORDER BY` clauses for different columns in one query. The `DISTINCT` clause is executed before the `ORDER BY` clause.
--- a/docs/es/development/build-cross-osx.md
+++ b/docs/es/development/build-cross-osx.md
@ -44,9 +44,9 @@ Además, necesitamos descargar macOS X SDK en el árbol de trabajo.

 ``` bash
 cd ClickHouse
-wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
+wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
-tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
+tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
 ```

 # Construir ClickHouse {#build-clickhouse}
--- a/docs/fr/development/build-cross-osx.md
+++ b/docs/fr/development/build-cross-osx.md
@ -44,9 +44,9 @@ En outre, nous devons télécharger macOS X SDK dans l'arbre de travail.

 ``` bash
 cd ClickHouse
-wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
+wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
-tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
+tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
 ```

 # Construire ClickHouse {#build-clickhouse}
--- a/docs/ja/development/build-cross-osx.md
+++ b/docs/ja/development/build-cross-osx.md
@ -45,9 +45,9 @@ make install

 ``` bash
 cd ClickHouse
-wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz'
+wget 'https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz'
 mkdir -p build-darwin/cmake/toolchain/darwin-x86_64
-tar xJf MacOSX10.14.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
+tar xJf MacOSX10.15.sdk.tar.xz -C build-darwin/cmake/toolchain/darwin-x86_64 --strip-components=1
 ```

 # ビルドClickHouse {#build-clickhouse}
--- a/docs/ru/development/architecture.md
+++ b/docs/ru/development/architecture.md
@ -133,7 +133,7 @@ ClickHouse имеет сильную типизацию, поэтому нет

 ## Агрегатные функции {#aggregate-functions}

-Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь один человек `UInt64` значение) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).
+Агрегатные функции - это функции с состоянием (stateful). Они накапливают переданные значения в некотором состоянии и позволяют получать результаты из этого состояния. Работа с ними осуществляется с помощью интерфейса `IAggregateFunction`. Состояния могут быть как простыми (состояние для `AggregateFunctionCount` это всего лишь одна переменная типа `UInt64`) так и довольно сложными (состояние `AggregateFunctionUniqCombined` представляет собой комбинацию линейного массива, хэш-таблицы и вероятностной структуры данных `HyperLogLog`).

 Состояния распределяются в `Arena` (пул памяти) для работы с несколькими состояниями при выполнении запроса `GROUP BY` высокой кардинальности (большим числом уникальных данных). Состояния могут иметь нетривиальный конструктор и деструктор: например, сложные агрегатные состояния могут сами аллоцировать дополнительную память. Потому к созданию и уничтожению состояний, правильной передаче владения и порядку уничтожения следует уделять больше внимание.

--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -77,17 +77,19 @@ ORDER BY expr

 -   `SETTINGS` — дополнительные параметры, регулирующие поведение `MergeTree` (необязательные):

-    - `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
-    - `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
+    -   `index_granularity` — максимальное количество строк данных между засечками индекса. По умолчанию — 8192. Смотрите [Хранение данных](#mergetree-data-storage).
+    -   `index_granularity_bytes` — максимальный размер гранул данных в байтах. По умолчанию — 10Mb. Чтобы ограничить размер гранул только количеством строк, установите значение 0 (не рекомендовано). Смотрите [Хранение данных](#mergetree-data-storage).
    -   `min_index_granularity_bytes` — минимально допустимый размер гранул данных в байтах. Значение по умолчанию — 1024b. Для обеспечения защиты от случайного создания таблиц с очень низким значением `index_granularity_bytes`. Смотрите [Хранение данных](#mergetree-data-storage).
-    - `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
-    - `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если  `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
-    - `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse  использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
-    - <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
-    - `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
-    - `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
-    - `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
+    -   `enable_mixed_granularity_parts` — включает или выключает переход к ограничению размера гранул с помощью настройки `index_granularity_bytes`. Настройка `index_granularity_bytes` улучшает производительность ClickHouse при выборке данных из таблиц с большими (десятки и сотни мегабайтов) строками. Если у вас есть таблицы с большими строками, можно включить эту настройку, чтобы повысить эффективность запросов `SELECT`.
+    -   `use_minimalistic_part_header_in_zookeeper` — Способ хранения заголовков кусков данных в ZooKeeper. Если  `use_minimalistic_part_header_in_zookeeper = 1`, то ZooKeeper хранит меньше данных. Подробнее читайте в [описании настройки](../../../operations/server-configuration-parameters/settings.md#server-settings-use_minimalistic_part_header_in_zookeeper) в разделе "Конфигурационные параметры сервера".
+    -   `min_merge_bytes_to_use_direct_io` — минимальный объём данных при слиянии, необходимый для прямого (небуферизованного) чтения/записи (direct I/O) на диск. При слиянии частей данных ClickHouse вычисляет общий объём хранения всех данных, подлежащих слиянию. Если общий объём хранения всех данных для чтения превышает `min_bytes_to_use_direct_io` байт, тогда ClickHouse  использует флаг `O_DIRECT` при чтении данных с диска. Если `min_merge_bytes_to_use_direct_io = 0`, тогда прямой ввод-вывод отключен. Значение по умолчанию: `10 * 1024 * 1024 * 1024` байтов.
+    -   <a name="mergetree_setting-merge_with_ttl_timeout"></a>`merge_with_ttl_timeout` — минимальное время в секундах перед повторным слиянием с TTL. По умолчанию — 86400 (1 день).
+    -   `write_final_mark` — включает или отключает запись последней засечки индекса в конце куска данных, указывающей за последний байт. По умолчанию — 1. Не отключайте её.
+    -   `merge_max_block_size` — максимальное количество строк в блоке для операций слияния. Значение по умолчанию: 8192.
+    -   `storage_policy` — политика хранения данных. Смотрите [Хранение данных таблицы на нескольких блочных устройствах](#table_engine-mergetree-multiple-volumes).
    -   `min_bytes_for_wide_part`, `min_rows_for_wide_part` — минимальное количество байт/строк в куске данных для хранения в формате `Wide`. Можно задать одну или обе настройки или не задавать ни одной. Подробнее см. в разделе [Хранение данных](#mergetree-data-storage).
+	-   `max_compress_block_size` — максимальный размер блоков несжатых данных перед сжатием для записи в таблицу. Вы также можете задать этот параметр в глобальных настройках (смотрите [max_compress_block_size](../../../operations/settings/settings.md#max-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.
+	-   `min_compress_block_size` — минимальный размер блоков несжатых данных, необходимых для сжатия при записи следующей засечки. Вы также можете задать этот параметр в глобальных настройках (смотрите [min_compress_block_size](../../../operations/settings/settings.md#min-compress-block-size)). Настройка, которая задается при создании таблицы, имеет более высокий приоритет, чем глобальная.

 **Пример задания секций**

--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -811,23 +811,27 @@ log_query_threads=1

 ## max_compress_block_size {#max-compress-block-size}

-Максимальный размер блоков не сжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативки. Как правило, не имеет смысла менять эту настройку.
+Максимальный размер блоков несжатых данных перед сжатием при записи в таблицу. По умолчанию - 1 048 576 (1 MiB). При уменьшении размера, незначительно уменьшается коэффициент сжатия, незначительно возрастает скорость сжатия и разжатия за счёт кэш-локальности, и уменьшается потребление оперативной памяти.
+
+!!! note "Предупреждение"
+    Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.

 Не путайте блоки для сжатия (кусок памяти, состоящий из байт) и блоки для обработки запроса (пачка строк из таблицы).

 ## min_compress_block_size {#min-compress-block-size}

-Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше min_compress_block_size. По умолчанию - 65 536.
+Для таблиц типа [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md). В целях уменьшения задержек при обработке запросов, блок сжимается при записи следующей засечки, если его размер не меньше `min_compress_block_size`. По умолчанию - 65 536.

-Реальный размер блока, если несжатых данных меньше max_compress_block_size, будет не меньше этого значения и не меньше объёма данных на одну засечку.
+Реальный размер блока, если несжатых данных меньше `max_compress_block_size`, будет не меньше этого значения и не меньше объёма данных на одну засечку.

-Рассмотрим пример. Пусть index_granularity, указанная при создании таблицы - 8192.
+Рассмотрим пример. Пусть `index_granularity`, указанная при создании таблицы - 8192.

-Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как min_compress_block_size = 65 536, сжатый блок будет сформирован на каждые две засечки.
+Пусть мы записываем столбец типа UInt32 (4 байта на значение). При записи 8192 строк, будет всего 32 КБ данных. Так как `min_compress_block_size` = 65 536, сжатый блок будет сформирован на каждые две засечки.

 Пусть мы записываем столбец URL типа String (средний размер - 60 байт на значение). При записи 8192 строк, будет, в среднем, чуть меньше 500 КБ данных. Так как это больше 65 536 строк, то сжатый блок будет сформирован на каждую засечку. В этом случае, при чтении с диска данных из диапазона в одну засечку, не будет разжато лишних данных.

-Как правило, не имеет смысла менять эту настройку.
+!!! note "Предупреждение"
+    Эта настройка экспертного уровня, не используйте ее, если вы только начинаете работать с Clickhouse.

 ## max_query_size {#settings-max_query_size}

@ -2339,6 +2343,45 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;

 Значение по умолчанию: `0`.

+
+## aggregate_functions_null_for_empty {#aggregate_functions_null_for_empty}
+
+Включает или отключает перезапись всех агрегатных функций в запросе, с добавлением к ним суффикса [-OrNull](../../sql-reference/aggregate-functions/combinators.md#agg-functions-combinator-ornull). Включите для совместимости со стандартом SQL.
+Реализуется с помощью перезаписи запросов (аналогично настройке [count_distinct_implementation](#settings-count_distinct_implementation)), чтобы получить согласованные результаты для распределенных запросов.
+
+Возможные значения:
+
+-   0 — выключена.
+-   1 — включена.
+
+Значение по умолчанию: 0.
+
+**Пример**
+
+Рассмотрим запрос с агрегирующими функциями:
+```sql
+SELECT
+    SUM(-1),
+    MAX(0)
+FROM system.one
+WHERE 0
+```
+
+Результат запроса с настройкой `aggregate_functions_null_for_empty = 0`:
+```text
+┌─SUM(-1)─┬─MAX(0)─┐
+│       0 │      0 │
+└─────────┴────────┘
+```
+
+Результат запроса с настройкой `aggregate_functions_null_for_empty = 1`:
+```text
+┌─SUMOrNull(-1)─┬─MAXOrNull(0)─┐
+│          NULL │         NULL │
+└───────────────┴──────────────┘
+```
+
+
 ## union_default_mode {#union-default-mode}

 Устанавливает режим объединения результатов `SELECT` запросов. Настройка используется только при совместном использовании с [UNION](../../sql-reference/statements/select/union.md) без явного указания `UNION ALL` или `UNION DISTINCT`.
@ -2353,6 +2396,7 @@ SELECT number FROM numbers(3) FORMAT JSONEachRow;

 Смотрите примеры в разделе [UNION](../../sql-reference/statements/select/union.md).

+
 ## execute_merges_on_single_replica_time_threshold {#execute-merges-on-single-replica-time-threshold}

 Включает особую логику выполнения слияний на репликах.
--- a/docs/ru/operations/system-tables/metrics.md
+++ b/docs/ru/operations/system-tables/metrics.md
@ -8,7 +8,7 @@
 -   `value` ([Int64](../../sql-reference/data-types/int-uint.md)) — значение метрики.
 -   `description` ([String](../../sql-reference/data-types/string.md)) — описание метрики.

-Список поддержанных метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).
+Список поддерживаемых метрик смотрите в файле [src/Common/CurrentMetrics.cpp](https://github.com/ClickHouse/ClickHouse/blob/master/src/Common/CurrentMetrics.cpp).

 **Пример**

--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md
@ -54,10 +54,10 @@ LIFETIME(MIN 300 MAX 360)
 При обновлении словарей сервер ClickHouse применяет различную логику в зависимости от типа [источника](external-dicts-dict-sources.md):

 > -   У текстового файла проверяется время модификации. Если время изменилось по отношению к запомненному ранее, то словарь обновляется.
-> -   Для таблиц типа MyISAM, время модификации проверяется запросом `SHOW TABLE STATUS`.
+> -   Для MySQL источника, время модификации проверяется запросом `SHOW TABLE STATUS` (для MySQL 8 необходимо отключить кеширование мета-информации в MySQL `set global information_schema_stats_expiry=0`.
 > -   Словари из других источников по умолчанию обновляются каждый раз.

-Для источников MySQL (InnoDB), ODBC и ClickHouse можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:
+Для других источников (ODBC, ClickHouse и т.д.) можно настроить запрос, который позволит обновлять словари только в случае их фактического изменения, а не каждый раз. Чтобы это сделать необходимо выполнить следующие условия/действия:

 > -   В таблице словаря должно быть поле, которое гарантированно изменяется при обновлении данных в источнике.
 > -   В настройках источника указывается запрос, который получает изменяющееся поле. Результат запроса сервер ClickHouse интерпретирует как строку и если эта строка изменилась по отношению к предыдущему состоянию, то словарь обновляется. Запрос следует указывать в поле `<invalidate_query>` настроек [источника](external-dicts-dict-sources.md).
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -12,6 +12,7 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
 -   [CLEAR COLUMN](#alter_clear-column) — сбрасывает все значения в столбце для заданной партиции;
 -   [COMMENT COLUMN](#alter_comment-column) — добавляет комментарий к столбцу;
 -   [MODIFY COLUMN](#alter_modify-column) — изменяет тип столбца, выражение для значения по умолчанию и TTL.
+-   [MODIFY COLUMN REMOVE](#modify-remove) — удаляет какое-либо из свойств столбца.

 Подробное описание для каждого действия приведено ниже.

@ -135,6 +136,28 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)

 Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.

+## MODIFY COLUMN REMOVE {#modify-remove}
+
+Удаляет какое-либо из свойств столбца: `DEFAULT`, `ALIAS`, `MATERIALIZED`, `CODEC`, `COMMENT`, `TTL`.
+
+Синтаксис:
+
+```sql
+ALTER TABLE table_name MODIFY column_name REMOVE property;
+```
+
+**Пример**
+
+Удаление свойства TTL:
+
+```sql
+ALTER TABLE table_with_ttl MODIFY COLUMN column_ttl REMOVE TTL;
+```
+
+## Смотрите также
+
+- [REMOVE TTL](ttl.md).
+
 ## Ограничения запроса ALTER {#ogranicheniia-zaprosa-alter}

 Запрос `ALTER` позволяет создавать и удалять отдельные элементы (столбцы) вложенных структур данных, но не вложенные структуры данных целиком. Для добавления вложенной структуры данных, вы можете добавить столбцы с именем вида `name.nested_name` и типом `Array(T)` - вложенная структура данных полностью эквивалентна нескольким столбцам-массивам с именем, имеющим одинаковый префикс до точки.
--- a/docs/ru/sql-reference/statements/alter/partition.md
+++ b/docs/ru/sql-reference/statements/alter/partition.md
@ -288,7 +288,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
 Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:

 -   Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`.
-   Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
+-   Кортеж из выражений или констант, совпадающий (в типах) с кортежем партиционирования. В случае ключа партиционирования из одного элемента, выражение следует обернуть в функцию `tuple(...)`. Например, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
 -   Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
 -   Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

@ -306,4 +306,4 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;

 Примеры запросов `ALTER ... PARTITION` можно посмотреть в тестах: [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) и [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).

-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
--- a/docs/ru/sql-reference/statements/alter/ttl.md
+++ b/docs/ru/sql-reference/statements/alter/ttl.md
@ -5,10 +5,82 @@ toc_title: TTL

 #  Манипуляции с TTL таблицы {#manipuliatsii-s-ttl-tablitsy}

+## MODIFY TTL {#modify-ttl}
+
 Вы можете изменить [TTL для таблицы](../../../engines/table-engines/mergetree-family/mergetree.md#mergetree-column-ttl) запросом следующего вида:

 ``` sql
 ALTER TABLE table-name MODIFY TTL ttl-expression
 ```

-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
+## REMOVE TTL {#remove-ttl}
+
+Удалить табличный TTL можно запросом следующего вида:
+
+```sql
+ALTER TABLE table_name REMOVE TTL 
+```
+
+**Пример**
+
+Создадим таблицу с табличным `TTL` и заполним её данными:
+
+```sql
+CREATE TABLE table_with_ttl
+(
+    event_time DateTime,
+    UserID UInt64,
+    Comment String
+)
+ENGINE MergeTree()
+ORDER BY tuple()
+TTL event_time + INTERVAL 3 MONTH;
+SETTINGS min_bytes_for_wide_part = 0;
+
+INSERT INTO table_with_ttl VALUES (now(), 1, 'username1');
+
+INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
+```
+
+Выполним `OPTIMIZE` для принудительной очистки по `TTL`:
+
+```sql
+OPTIMIZE TABLE table_with_ttl FINAL;
+SELECT * FROM table_with_ttl;
+```
+В результате видно, что вторая строка удалена.
+
+```text
+┌─────────event_time────┬──UserID─┬─────Comment──┐
+│   2020-12-11 12:44:57 │       1 │    username1 │
+└───────────────────────┴─────────┴──────────────┘
+```
+
+Удаляем табличный `TTL`:
+
+```sql
+ALTER TABLE table_with_ttl REMOVE TTL;
+```
+
+Заново вставляем удаленную строку и снова принудительно запускаем очистку по `TTL` с помощью `OPTIMIZE`:
+
+```sql 
+INSERT INTO table_with_ttl VALUES (now() - INTERVAL 4 MONTH, 2, 'username2');
+OPTIMIZE TABLE table_with_ttl FINAL;
+SELECT * FROM table_with_ttl;
+```
+
+`TTL` больше нет, поэтому данные не удаляются:
+
+```text
+┌─────────event_time────┬──UserID─┬─────Comment──┐
+│   2020-12-11 12:44:57 │       1 │    username1 │
+│   2020-08-11 12:44:57 │       2 │    username2 │
+└───────────────────────┴─────────┴──────────────┘
+```
+
+### Смотрите также
+
+- Подробнее о [свойстве TTL](../../../engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-ttl).
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/ttl/) <!--hide-->
--- a/docs/ru/sql-reference/statements/insert-into.md
+++ b/docs/ru/sql-reference/statements/insert-into.md
@ -13,9 +13,7 @@ toc_title: INSERT INTO
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```

-Вы можете указать список столбцов для вставки, используя следующий синтаксис: `(c1, c2, c3)` или `COLUMNS(c1,c2,c3)`. 
-
-Можно не перечислять все необходимые столбцы, а использовать синтаксис `(* EXCEPT(column_list))`.
+Вы можете указать список столбцов для вставки, используя синтаксис `(c1, c2, c3)`. Также можно использовать выражение cо [звездочкой](../../sql-reference/statements/select/index.md#asterisk) и/или модификаторами, такими как `APPLY`, `EXCEPT`, `REPLACE`. 

 В качестве примера рассмотрим таблицу:

--- a/docs/ru/sql-reference/statements/select/distinct.md
+++ b/docs/ru/sql-reference/statements/select/distinct.md
@ -18,10 +18,6 @@ toc_title: DISTINCT
 -   Когда секция [ORDER BY](order-by.md) опущена, а секция [LIMIT](limit.md) присутствует, запрос прекращает выполнение сразу после считывания необходимого количества различных строк.
 -   Блоки данных выводятся по мере их обработки, не дожидаясь завершения выполнения всего запроса.

-## Ограничения {#limitations}
-
-`DISTINCT` не поддерживается, если `SELECT` имеет по крайней мере один столбец-массив.
-
 ## Примеры {#examples}

 ClickHouse поддерживает использование секций `DISTINCT` и `ORDER BY` для разных столбцов в одном запросе. Секция `DISTINCT` выполняется до секции `ORDER BY`.
--- a/docs/zh/development/build-cross-osx.md
+++ b/docs/zh/development/build-cross-osx.md
@ -33,8 +33,8 @@ cd cctools-port/cctools
 make install

 cd ${CCTOOLS}
-wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.14-beta4/MacOSX10.14.sdk.tar.xz
-tar xJf MacOSX10.14.sdk.tar.xz
+wget https://github.com/phracker/MacOSX-SDKs/releases/download/10.15/MacOSX10.15.sdk.tar.xz
+tar xJf MacOSX10.15.sdk.tar.xz
 ```

 # 编译 ClickHouse {#bian-yi-clickhouse}
@ -46,7 +46,7 @@ CC=clang-8 CXX=clang++-8 cmake . -Bbuild-osx -DCMAKE_SYSTEM_NAME=Darwin \
    -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ar \
    -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/x86_64-apple-darwin-ranlib \
    -DLINKER_NAME=${CCTOOLS}/bin/x86_64-apple-darwin-ld \
-    -DSDK_PATH=${CCTOOLS}/MacOSX10.14.sdk
+    -DSDK_PATH=${CCTOOLS}/MacOSX10.15.sdk
 ninja -C build-osx
 ```

--- a/docs/zh/sql-reference/data-types/float.md
+++ b/docs/zh/sql-reference/data-types/float.md
@ -29,7 +29,7 @@ SELECT 1 - 0.9

 -   当一行行阅读浮点数的时候，浮点数的结果可能不是机器最近显示的数值。

-## 南和Inf {#data_type-float-nan-inf}
+## NaN和Inf {#data_type-float-nan-inf}

 与标准SQL相比，ClickHouse 支持以下类别的浮点数：

--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@ -273,11 +273,12 @@ try
    global_context->setCurrentDatabase(default_database);
    applyCmdOptions(*global_context);

-    String path = global_context->getPath();
-    if (!path.empty())
+    if (config().has("path"))
    {
+        String path = global_context->getPath();
+
        /// Lock path directory before read
-        status.emplace(global_context->getPath() + "status", StatusFile::write_full_info);
+        status.emplace(path + "status", StatusFile::write_full_info);

        LOG_DEBUG(log, "Loading metadata from {}", path);
        Poco::File(path + "data/").createDirectories();
@ -288,7 +289,7 @@ try
        DatabaseCatalog::instance().loadDatabases();
        LOG_DEBUG(log, "Loaded metadata.");
    }
-    else
+    else if (!config().has("no-system-tables"))
    {
        attachSystemTables(*global_context);
    }
@ -540,6 +541,7 @@ void LocalServer::init(int argc, char ** argv)
        ("logger.log", po::value<std::string>(), "Log file name")
        ("logger.level", po::value<std::string>(), "Log level")
        ("ignore-error", "do not stop processing if a query failed")
+        ("no-system-tables", "do not attach system tables (better startup time)")
        ("version,V", "print version information and exit")
        ;

@ -602,6 +604,8 @@ void LocalServer::init(int argc, char ** argv)
        config().setString("logger.level", options["logger.level"].as<std::string>());
    if (options.count("ignore-error"))
        config().setBool("ignore-error", true);
+    if (options.count("no-system-tables"))
+        config().setBool("no-system-tables", true);

    std::vector<std::string> arguments;
    for (int arg_num = 1; arg_num < argc; ++arg_num)
--- a/programs/server/play.html
+++ b/programs/server/play.html
@ -287,7 +287,7 @@
    </div>
    <div id="run_div">
        <button class="shadow" id="run">Run</button>
-        <span class="hint">&nbsp;(Ctrl+Enter)</span>
+        <span class="hint">&nbsp;(Ctrl/Cmd+Enter)</span>
        <span id="hourglass">⧗</span>
        <span id="check-mark">✔</span>
        <span id="stats"></span>
@ -424,10 +424,10 @@
        post();
    }

-    document.onkeypress = function(event)
+    document.onkeydown = function(event)
    {
        /// Firefox has code 13 for Enter and Chromium has code 10.
-        if (event.ctrlKey && (event.charCode == 13 || event.charCode == 10)) {
+        if ((event.metaKey || event.ctrlKey) && (event.keyCode == 13 || event.keyCode == 10)) {
            post();
        }
    }
--- a/src/AggregateFunctions/AggregateFunctionGroupArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupArray.h
@ -112,7 +112,6 @@ class GroupArrayNumericImpl final
 {
    using Data = GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>;
    static constexpr bool limit_num_elems = Trait::has_limit;
-    DataTypePtr & data_type;
    UInt64 max_elems;
    UInt64 seed;

@ -121,7 +120,6 @@ public:
        const DataTypePtr & data_type_, UInt64 max_elems_ = std::numeric_limits<UInt64>::max(), UInt64 seed_ = 123456)
        : IAggregateFunctionDataHelper<GroupArrayNumericData<T, Trait::sampler != Sampler::NONE>, GroupArrayNumericImpl<T, Trait>>(
            {data_type_}, {})
-        , data_type(this->argument_types[0])
        , max_elems(max_elems_)
        , seed(seed_)
    {
@ -129,7 +127,7 @@ public:

    String getName() const override { return getNameByTrait<Trait>(); }

-    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(data_type); }
+    DataTypePtr getReturnType() const override { return std::make_shared<DataTypeArray>(this->argument_types[0]); }

    void insert(Data & a, const T & v, Arena * arena) const
    {
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@ -168,7 +168,7 @@ public:
        {
            for (const auto & x : small)
            {
-                if (rb->contains(static_cast<Value>(x.getValue())))
+                if (r1.rb->contains(static_cast<Value>(x.getValue())))
                    buffer.push_back(x.getValue());
            }

@ -264,7 +264,7 @@ public:
        {
            for (const auto & x : small)
            {
-                if (rb->contains(static_cast<Value>(x.getValue())))
+                if (r1.rb->contains(static_cast<Value>(x.getValue())))
                    ++ret;
            }
        }
@ -419,7 +419,7 @@ public:
        if (isSmall())
            return small.find(x) != small.end();
        else
-            return rb->contains(x);
+            return rb->contains(static_cast<Value>(x));
    }

    /**
@ -613,7 +613,7 @@ public:
    /**
     * Replace value
     */
-    void rb_replace(const UInt32 * from_vals, const UInt32 * to_vals, size_t num)
+    void rb_replace(const UInt64 * from_vals, const UInt64 * to_vals, size_t num)
    {
        if (isSmall())
            toLarge();
@ -622,9 +622,9 @@ public:
        {
            if (from_vals[i] == to_vals[i])
                continue;
-            bool changed = rb->removeChecked(from_vals[i]);
+            bool changed = rb->removeChecked(static_cast<Value>(from_vals[i]));
            if (changed)
-                rb->add(to_vals[i]);
+                rb->add(static_cast<Value>(to_vals[i]));
        }
    }
 };
--- a/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupUniqArray.h
@ -56,7 +56,7 @@ public:

    DataTypePtr getReturnType() const override
    {
-        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeNumber<T>>());
+        return std::make_shared<DataTypeArray>(this->argument_types[0]);
    }

    void add(AggregateDataPtr place, const IColumn ** columns, size_t row_num, Arena *) const override
--- a/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
+++ b/src/AggregateFunctions/AggregateFunctionWindowFunnel.h
@ -19,12 +19,12 @@ namespace ErrorCodes
    extern const int BAD_ARGUMENTS;
 }

-struct ComparePairFirst final
+struct ComparePair final
 {
    template <typename T1, typename T2>
    bool operator()(const std::pair<T1, T2> & lhs, const std::pair<T1, T2> & rhs) const
    {
-        return lhs.first < rhs.first;
+        return lhs.first == rhs.first ? lhs.second < rhs.second : lhs.first < rhs.first;
    }
 };

@ -34,7 +34,7 @@ struct AggregateFunctionWindowFunnelData
 {
    using TimestampEvent = std::pair<T, UInt8>;
    using TimestampEvents = PODArrayWithStackMemory<TimestampEvent, 64>;
-    using Comparator = ComparePairFirst;
+    using Comparator = ComparePair;

    bool sorted = true;
    TimestampEvents events_list;
@ -47,8 +47,13 @@ struct AggregateFunctionWindowFunnelData
    void add(T timestamp, UInt8 event)
    {
        // Since most events should have already been sorted by timestamp.
-        if (sorted && events_list.size() > 0 && events_list.back().first > timestamp)
-            sorted = false;
+        if (sorted && events_list.size() > 0)
+        {
+            if (events_list.back().first == timestamp)
+                sorted = events_list.back().second <= event;
+            else
+                sorted = events_list.back().first <= timestamp;
+        }
        events_list.emplace_back(timestamp, event);
    }

--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@ -670,4 +670,32 @@ ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction &
 {
 }

+MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
+{
+    if (size == 0)
+        return cloneEmpty();
+
+    size_t from_size = data.size();
+
+    if (size <= from_size)
+    {
+        auto res = createView();
+        auto & res_data = res->data;
+        res_data.assign(data.begin(), data.begin() + size);
+        return res;
+    }
+    else
+    {
+        /// Create a new column to return.
+        MutableColumnPtr cloned_col = cloneEmpty();
+        auto * res = typeid_cast<ColumnAggregateFunction *>(cloned_col.get());
+
+        res->insertRangeFrom(*this, 0, from_size);
+        for (size_t i = from_size; i < size; ++i)
+            res->insertDefault();
+
+        return cloned_col;
+    }
+}
+
 }
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@ -215,7 +215,7 @@ public:
    void getExtremes(Field & min, Field & max) const override;

    bool structureEquals(const IColumn &) const override;
+
+    MutableColumnPtr cloneResized(size_t size) const override;
 };
-
-
 }
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@ -12,6 +12,10 @@
 #include <random>
 #include <cstdlib>

+#ifdef MEMORY_TRACKER_DEBUG_CHECKS
+thread_local bool _memory_tracker_always_throw_logical_error_on_allocation = false;
+#endif
+
 namespace
 {

@ -165,6 +169,14 @@ void MemoryTracker::alloc(Int64 size)
        }
    }

+#ifdef MEMORY_TRACKER_DEBUG_CHECKS
+    if (unlikely(_memory_tracker_always_throw_logical_error_on_allocation))
+    {
+        _memory_tracker_always_throw_logical_error_on_allocation = false;
+        throw DB::Exception(DB::ErrorCodes::LOGICAL_ERROR, "Memory tracker: allocations not allowed.");
+    }
+#endif
+
    std::bernoulli_distribution fault(fault_probability);
    if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true))
    {
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@ -5,6 +5,28 @@
 #include <Common/CurrentMetrics.h>
 #include <Common/VariableContext.h>

+#if !defined(NDEBUG)
+#define MEMORY_TRACKER_DEBUG_CHECKS
+#endif
+
+/// DENY_ALLOCATIONS_IN_SCOPE macro makes MemoryTracker throw LOGICAL_ERROR on any allocation attempt
+/// until the end of the scope. It's useful to ensure that no allocations happen in signal handlers and
+/// outside of try/catch block of thread functions. ALLOW_ALLOCATIONS_IN_SCOPE cancels effect of
+/// DENY_ALLOCATIONS_IN_SCOPE in the inner scope. In Release builds these macros do nothing.
+#ifdef MEMORY_TRACKER_DEBUG_CHECKS
+#include <ext/scope_guard.h>
+extern thread_local bool _memory_tracker_always_throw_logical_error_on_allocation;
+#define ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val) \
+        bool _allocations_flag_prev_val##n = _memory_tracker_always_throw_logical_error_on_allocation; \
+        _memory_tracker_always_throw_logical_error_on_allocation = val; \
+        SCOPE_EXIT({ _memory_tracker_always_throw_logical_error_on_allocation = _allocations_flag_prev_val##n; })
+#define ALLOCATIONS_IN_SCOPE_IMPL(n, val) ALLOCATIONS_IN_SCOPE_IMPL_CONCAT(n, val)
+#define DENY_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, true)
+#define ALLOW_ALLOCATIONS_IN_SCOPE ALLOCATIONS_IN_SCOPE_IMPL(__LINE__, false)
+#else
+#define DENY_ALLOCATIONS_IN_SCOPE static_assert(true)
+#define ALLOW_ALLOCATIONS_IN_SCOPE static_assert(true)
+#endif

 /** Tracks memory consumption.
  * It throws an exception if amount of consumed memory become greater than certain limit.
--- a/src/Common/ProfileEvents.cpp
+++ b/src/Common/ProfileEvents.cpp
@ -45,9 +45,6 @@
    M(CreatedReadBufferAIOFailed, "") \
    M(CreatedReadBufferMMap, "") \
    M(CreatedReadBufferMMapFailed, "") \
-    M(CreatedWriteBufferOrdinary, "") \
-    M(CreatedWriteBufferAIO, "") \
-    M(CreatedWriteBufferAIOFailed, "") \
    M(DiskReadElapsedMicroseconds, "Total time spent waiting for read syscall. This include reads from page cache.") \
    M(DiskWriteElapsedMicroseconds, "Total time spent waiting for write syscall. This include writes to page cache.") \
    M(NetworkReceiveElapsedMicroseconds, "") \
--- a/src/Common/QueryProfiler.cpp
+++ b/src/Common/QueryProfiler.cpp
@ -181,6 +181,7 @@ QueryProfilerReal::QueryProfilerReal(const UInt64 thread_id, const UInt32 period

 void QueryProfilerReal::signalHandler(int sig, siginfo_t * info, void * context)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    writeTraceInfo(TraceType::Real, sig, info, context);
 }

@ -190,6 +191,7 @@ QueryProfilerCpu::QueryProfilerCpu(const UInt64 thread_id, const UInt32 period)

 void QueryProfilerCpu::signalHandler(int sig, siginfo_t * info, void * context)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    writeTraceInfo(TraceType::CPU, sig, info, context);
 }

--- a/src/Common/ThreadFuzzer.cpp
+++ b/src/Common/ThreadFuzzer.cpp
@ -197,6 +197,7 @@ static void injection(

 void ThreadFuzzer::signalHandler(int)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    auto saved_errno = errno;

    auto & fuzzer = ThreadFuzzer::instance();
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@ -208,6 +208,7 @@ size_t ThreadPoolImpl<Thread>::active() const
 template <typename Thread>
 void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_it)
 {
+    DENY_ALLOCATIONS_IN_SCOPE;
    CurrentMetrics::Increment metric_all_threads(
        std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThread : CurrentMetrics::LocalThread);

@ -223,7 +224,9 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_

            if (!jobs.empty())
            {
-                job = std::move(jobs.top().job);
+                /// std::priority_queue does not provide interface for getting non-const reference to an element
+                /// to prevent us from modifying its priority. We have to use const_cast to force move semantics on JobWithPriority::job.
+                job = std::move(const_cast<Job &>(jobs.top().job));
                jobs.pop();
            }
            else
@ -237,6 +240,7 @@ void ThreadPoolImpl<Thread>::worker(typename std::list<Thread>::iterator thread_
        {
            try
            {
+                ALLOW_ALLOCATIONS_IN_SCOPE;
                CurrentMetrics::Increment metric_active_threads(
                    std::is_same_v<Thread, std::thread> ? CurrentMetrics::GlobalThreadActive : CurrentMetrics::LocalThreadActive);

--- a/src/Common/ZooKeeper/ZooKeeper.cpp
+++ b/src/Common/ZooKeeper/ZooKeeper.cpp
@ -65,6 +65,7 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho
        Coordination::ZooKeeper::Nodes nodes;
        nodes.reserve(hosts_strings.size());

+        bool dns_error = false;
        for (auto & host_string : hosts_strings)
        {
            try
@ -76,14 +77,27 @@ void ZooKeeper::init(const std::string & implementation_, const std::string & ho

                nodes.emplace_back(Coordination::ZooKeeper::Node{Poco::Net::SocketAddress{host_string}, secure});
            }
+            catch (const Poco::Net::HostNotFoundException & e)
+            {
+                /// Most likely it's misconfiguration and wrong hostname was specified
+                LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
+            }
            catch (const Poco::Net::DNSException & e)
            {
-                LOG_ERROR(log, "Cannot use ZooKeeper host {}, reason: {}", host_string, e.displayText());
+                /// Most likely DNS is not available now
+                dns_error = true;
+                LOG_ERROR(log, "Cannot use ZooKeeper host {} due to DNS error: {}", host_string, e.displayText());
            }
        }

        if (nodes.empty())
-            throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
+        {
+            /// For DNS errors we throw exception with ZCONNECTIONLOSS code, so it will be considered as hardware error, not user error
+            if (dns_error)
+                throw KeeperException("Cannot resolve any of provided ZooKeeper hosts due to DNS error", Coordination::Error::ZCONNECTIONLOSS);
+            else
+                throw KeeperException("Cannot use any of provided ZooKeeper nodes", Coordination::Error::ZBADARGUMENTS);
+        }

        impl = std::make_unique<Coordination::ZooKeeper>(
                nodes,
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@ -45,7 +45,7 @@ bool CachedCompressedReadBuffer::nextImpl()

        size_t size_decompressed;
        size_t size_compressed_without_checksum;
-        owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum);
+        owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum, false);

        if (owned_cell->compressed_size)
        {
--- a/src/Compression/CheckingCompressedReadBuffer.cpp
+++ b/src/Compression/CheckingCompressedReadBuffer.cpp
@ -0,0 +1,24 @@
+#include <Compression/CheckingCompressedReadBuffer.h>
+
+namespace DB
+{
+
+bool CheckingCompressedReadBuffer::nextImpl()
+{
+    size_t size_decompressed;
+    size_t size_compressed_without_checksum;
+    size_t size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, true);
+
+    if (!size_compressed)
+        return false;
+
+    /// own_compressed_buffer also includes getAdditionalSizeAtTheEndOfBuffer()
+    /// which should not be accounted here, so size_compressed is used.
+    ///
+    /// And BufferBase is used over ReadBuffer, since former reset the working_buffer.
+    BufferBase::set(own_compressed_buffer.data(), size_compressed, 0);
+
+    return true;
+}
+
+}
--- a/src/Compression/CheckingCompressedReadBuffer.h
+++ b/src/Compression/CheckingCompressedReadBuffer.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <Compression/CompressedReadBufferBase.h>
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+
+namespace DB
+{
+
+/** A buffer for reading from a compressed file with just checking checksums of
+  * the compressed blocks, without any decompression.
+  */
+class CheckingCompressedReadBuffer : public CompressedReadBufferBase, public ReadBuffer
+{
+protected:
+    bool nextImpl() override;
+
+public:
+    CheckingCompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false)
+        : CompressedReadBufferBase(&in_, allow_different_codecs_)
+        , ReadBuffer(nullptr, 0)
+    {
+    }
+};
+
+}
--- a/src/Compression/CompressedReadBuffer.cpp
+++ b/src/Compression/CompressedReadBuffer.cpp
@ -9,7 +9,7 @@ bool CompressedReadBuffer::nextImpl()
 {
    size_t size_decompressed;
    size_t size_compressed_without_checksum;
-    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum);
+    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
    if (!size_compressed)
        return false;

@ -40,7 +40,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
        size_t size_decompressed;
        size_t size_compressed_without_checksum;

-        if (!readCompressedData(size_decompressed, size_compressed_without_checksum))
+        if (!readCompressedData(size_decompressed, size_compressed_without_checksum, false))
            return bytes_read;

        auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
--- a/src/Compression/CompressedReadBufferBase.cpp
+++ b/src/Compression/CompressedReadBufferBase.cpp
@ -105,19 +105,18 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c

 /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
 /// Returns number of compressed bytes read.
-size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum)
+size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy)
 {
    if (compressed_in->eof())
        return 0;

-    Checksum checksum;
-    compressed_in->readStrict(reinterpret_cast<char *>(&checksum), sizeof(Checksum));
-
    UInt8 header_size = ICompressionCodec::getHeaderSize();
-    own_compressed_buffer.resize(header_size);
-    compressed_in->readStrict(own_compressed_buffer.data(), header_size);
+    own_compressed_buffer.resize(header_size + sizeof(Checksum));

-    uint8_t method = ICompressionCodec::readMethod(own_compressed_buffer.data());
+    compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size);
+    char * compressed_header = own_compressed_buffer.data() + sizeof(Checksum);
+
+    uint8_t method = ICompressionCodec::readMethod(compressed_header);

    if (!codec)
    {
@ -139,8 +138,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
        }
    }

-    size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(own_compressed_buffer.data());
-    size_decompressed = ICompressionCodec::readDecompressedBlockSize(own_compressed_buffer.data());
+    size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_header);
+    size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_header);

    /// This is for clang static analyzer.
    assert(size_decompressed > 0);
@ -160,8 +159,9 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
    auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();

    /// Is whole compressed block located in 'compressed_in->' buffer?
-    if (compressed_in->offset() >= header_size &&
-        compressed_in->position() + size_compressed_without_checksum + additional_size_at_the_end_of_buffer  - header_size <= compressed_in->buffer().end())
+    if (!always_copy &&
+        compressed_in->offset() >= header_size + sizeof(Checksum) &&
+        compressed_in->available() >= (size_compressed_without_checksum - header_size) + additional_size_at_the_end_of_buffer + sizeof(Checksum))
    {
        compressed_in->position() -= header_size;
        compressed_buffer = compressed_in->position();
@ -169,13 +169,16 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
    }
    else
    {
-        own_compressed_buffer.resize(size_compressed_without_checksum + additional_size_at_the_end_of_buffer);
-        compressed_buffer = own_compressed_buffer.data();
+        own_compressed_buffer.resize(sizeof(Checksum) + size_compressed_without_checksum + additional_size_at_the_end_of_buffer);
+        compressed_buffer = own_compressed_buffer.data() + sizeof(Checksum);
        compressed_in->readStrict(compressed_buffer + header_size, size_compressed_without_checksum - header_size);
    }

    if (!disable_checksum)
+    {
+        Checksum & checksum = *reinterpret_cast<Checksum *>(own_compressed_buffer.data());
        validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
+    }

    return size_compressed_without_checksum + sizeof(Checksum);
 }
--- a/src/Compression/CompressedReadBufferBase.h
+++ b/src/Compression/CompressedReadBufferBase.h
@ -30,8 +30,12 @@ protected:
    bool allow_different_codecs;

    /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
+    ///
+    /// If always_copy is true then even if the compressed block is already stored in compressed_in.buffer() it will be copied into own_compressed_buffer.
+    /// This is required for CheckingCompressedReadBuffer, since this is just a proxy.
+    ///
    /// Returns number of compressed bytes read.
-    size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum);
+    size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);

    void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);

--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@ -19,7 +19,7 @@ bool CompressedReadBufferFromFile::nextImpl()
 {
    size_t size_decompressed = 0;
    size_t size_compressed_without_checksum;
-    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum);
+    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
    if (!size_compressed)
        return false;

@ -98,7 +98,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
        size_t size_decompressed = 0;
        size_t size_compressed_without_checksum = 0;

-        size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum);
+        size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
        size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer.
        if (!new_size_compressed)
            return bytes_read;
--- a/src/Compression/CompressionCodecZSTD.cpp
+++ b/src/Compression/CompressionCodecZSTD.cpp
@ -38,7 +38,15 @@ UInt32 CompressionCodecZSTD::getMaxCompressedDataSize(UInt32 uncompressed_size)

 UInt32 CompressionCodecZSTD::doCompressData(const char * source, UInt32 source_size, char * dest) const
 {
-    size_t compressed_size = ZSTD_compress(dest, ZSTD_compressBound(source_size), source, source_size, level);
+    ZSTD_CCtx * cctx = ZSTD_createCCtx();
+    ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, level);
+    if (enable_long_range)
+    {
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_enableLongDistanceMatching, 1);
+        ZSTD_CCtx_setParameter(cctx, ZSTD_c_windowLog, window_log); // NB zero window_log means "use default" for libzstd
+    }
+    size_t compressed_size = ZSTD_compress2(cctx, dest, ZSTD_compressBound(source_size), source, source_size);
+    ZSTD_freeCCtx(cctx);

    if (ZSTD_isError(compressed_size))
        throw Exception("Cannot compress block with ZSTD: " + std::string(ZSTD_getErrorName(compressed_size)), ErrorCodes::CANNOT_COMPRESS);
@ -55,8 +63,13 @@ void CompressionCodecZSTD::doDecompressData(const char * source, UInt32 source_s
        throw Exception("Cannot ZSTD_decompress: " + std::string(ZSTD_getErrorName(res)), ErrorCodes::CANNOT_DECOMPRESS);
 }

-CompressionCodecZSTD::CompressionCodecZSTD(int level_)
-    : level(level_)
+CompressionCodecZSTD::CompressionCodecZSTD(int level_, int window_log_) : level(level_), enable_long_range(true), window_log(window_log_)
+{
+    setCodecDescription(
+        "ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level)), std::make_shared<ASTLiteral>(static_cast<UInt64>(window_log))});
+}
+
+CompressionCodecZSTD::CompressionCodecZSTD(int level_) : level(level_), enable_long_range(false), window_log(0)
 {
    setCodecDescription("ZSTD", {std::make_shared<ASTLiteral>(static_cast<UInt64>(level))});
 }
@ -64,13 +77,14 @@ CompressionCodecZSTD::CompressionCodecZSTD(int level_)
 void registerCodecZSTD(CompressionCodecFactory & factory)
 {
    UInt8 method_code = UInt8(CompressionMethodByte::ZSTD);
-    factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr
-    {
+    factory.registerCompressionCodec("ZSTD", method_code, [&](const ASTPtr & arguments) -> CompressionCodecPtr {
        int level = CompressionCodecZSTD::ZSTD_DEFAULT_LEVEL;
        if (arguments && !arguments->children.empty())
        {
-            if (arguments->children.size() > 1)
-                throw Exception("ZSTD codec must have 1 parameter, given " + std::to_string(arguments->children.size()), ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);
+            if (arguments->children.size() > 2)
+                throw Exception(
+                    "ZSTD codec must have 1 or 2 parameters, given " + std::to_string(arguments->children.size()),
+                    ErrorCodes::ILLEGAL_SYNTAX_FOR_CODEC_TYPE);

            const auto children = arguments->children;
            const auto * literal = children[0]->as<ASTLiteral>();
@ -79,9 +93,32 @@ void registerCodecZSTD(CompressionCodecFactory & factory)

            level = literal->value.safeGet<UInt64>();
            if (level > ZSTD_maxCLevel())
-                throw Exception("ZSTD codec can't have level more that " + toString(ZSTD_maxCLevel()) + ", given " + toString(level), ErrorCodes::ILLEGAL_CODEC_PARAMETER);
-        }
+                throw Exception(
+                    "ZSTD codec can't have level more than " + toString(ZSTD_maxCLevel()) + ", given " + toString(level),
+                    ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+            if (arguments->children.size() > 1)
+            {
+                const auto * window_literal = children[1]->as<ASTLiteral>();
+                if (!window_literal)
+                    throw Exception("ZSTD codec second argument must be integer", ErrorCodes::ILLEGAL_CODEC_PARAMETER);

+                const int window_log = window_literal->value.safeGet<UInt64>();
+
+                ZSTD_bounds window_log_bounds = ZSTD_cParam_getBounds(ZSTD_c_windowLog);
+                if (ZSTD_isError(window_log_bounds.error))
+                    throw Exception(
+                        "ZSTD windowLog parameter is not supported " + std::string(ZSTD_getErrorName(window_log_bounds.error)),
+                        ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+                // 0 means "use default" for libzstd
+                if (window_log != 0 && (window_log > window_log_bounds.upperBound || window_log < window_log_bounds.lowerBound))
+                    throw Exception(
+                        "ZSTD codec can't have window log more than " + toString(window_log_bounds.upperBound) + " and lower than "
+                            + toString(window_log_bounds.lowerBound) + ", given " + toString(window_log),
+                        ErrorCodes::ILLEGAL_CODEC_PARAMETER);
+
+                return std::make_shared<CompressionCodecZSTD>(level, window_log);
+            }
+        }
        return std::make_shared<CompressionCodecZSTD>(level);
    });
 }
--- a/src/Compression/CompressionCodecZSTD.h
+++ b/src/Compression/CompressionCodecZSTD.h
@ -12,9 +12,12 @@ class CompressionCodecZSTD : public ICompressionCodec
 {
 public:
    static constexpr auto ZSTD_DEFAULT_LEVEL = 1;
+    static constexpr auto ZSTD_DEFAULT_LOG_WINDOW = 24;

    CompressionCodecZSTD(int level_);

+    CompressionCodecZSTD(int level_, int window_log);
+
    uint8_t getMethodByte() const override;

    UInt32 getMaxCompressedDataSize(UInt32 uncompressed_size) const override;
@ -32,6 +35,8 @@ protected:

 private:
    const int level;
+    const bool enable_long_range;
+    const int window_log;
 };

 }
--- a/src/Compression/ya.make
+++ b/src/Compression/ya.make
@ -17,6 +17,7 @@ PEERDIR(

 SRCS(
    CachedCompressedReadBuffer.cpp
+    CheckingCompressedReadBuffer.cpp
    CompressedReadBuffer.cpp
    CompressedReadBufferBase.cpp
    CompressedReadBufferFromFile.cpp
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -371,8 +371,9 @@ class IColumn;
    M(Bool, database_atomic_wait_for_drop_and_detach_synchronously, false, "When executing DROP or DETACH TABLE in Atomic database, wait for table data to be finally dropped or detached.", 0) \
    M(Bool, enable_scalar_subquery_optimization, true, "If it is set to true, prevent scalar subqueries from (de)serializing large scalar values and possibly avoid running the same subquery more than once.", 0) \
    M(Bool, optimize_trivial_count_query, true, "Process trivial 'SELECT count() FROM table' query from metadata.", 0) \
+    M(Bool, optimize_respect_aliases, true, "If it is set to true, it will respect aliases in WHERE/GROUP BY/ORDER BY, that will help with partition pruning/secondary indexes/optimize_aggregation_in_order/optimize_read_in_order/optimize_trivial_count", 0) \
    M(UInt64, mutations_sync, 0, "Wait for synchronous execution of ALTER TABLE UPDATE/DELETE queries (mutations). 0 - execute asynchronously. 1 - wait current server. 2 - wait all replicas if they exist.", 0) \
-    M(Bool, optimize_move_functions_out_of_any, true, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
+    M(Bool, optimize_move_functions_out_of_any, false, "Move functions out of aggregate functions 'any', 'anyLast'.", 0) \
    M(Bool, optimize_injective_functions_inside_uniq, true, "Delete injective functions of one argument inside uniq*() functions.", 0) \
    M(Bool, optimize_arithmetic_operations_in_aggregate_functions, true, "Move arithmetic operations out of aggregation functions", 0) \
    M(Bool, optimize_duplicate_order_by_and_distinct, true, "Remove duplicate ORDER BY and DISTINCT if it's possible", 0) \
@ -404,13 +405,13 @@ class IColumn;
    M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
    M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
    M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
+    M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
+    M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
    M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
    M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
    M(Bool, allow_experimental_map_type, false, "Allow data type Map", 0) \
    M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
-    \
-    M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated parser", 0) \
-    \
+    M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated experimental parser", 0) \
    M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
    \
    /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
--- a/src/DataStreams/ColumnGathererStream.cpp
+++ b/src/DataStreams/ColumnGathererStream.cpp
@ -61,9 +61,12 @@ Block ColumnGathererStream::readImpl()

    MutableColumnPtr output_column = column.column->cloneEmpty();
    output_block = Block{column.cloneEmpty()};
+    /// Surprisingly this call may directly change output_block, bypassing
+    /// output_column. See ColumnGathererStream::gather.
    output_column->gather(*this);
    if (!output_column->empty())
        output_block.getByPosition(0).column = std::move(output_column);
+
    return output_block;
 }

--- a/src/Databases/DatabaseAtomic.h
+++ b/src/Databases/DatabaseAtomic.h
@ -1,10 +1,9 @@
 #pragma once

 #include <Databases/DatabasesCommon.h>
-#include <Core/BackgroundSchedulePool.h>
-
 #include <Databases/DatabaseOrdinary.h>

+
 namespace DB
 {

--- a/src/Databases/MySQL/MaterializeMySQLSyncThread.h
+++ b/src/Databases/MySQL/MaterializeMySQLSyncThread.h
@ -7,7 +7,6 @@
 #if USE_MYSQL

 #    include <mutex>
-#    include <Core/BackgroundSchedulePool.h>
 #    include <Core/MySQL/MySQLClient.h>
 #    include <DataStreams/BlockIO.h>
 #    include <DataTypes/DataTypeString.h>
--- a/src/Dictionaries/SSDCacheDictionary.cpp
+++ b/src/Dictionaries/SSDCacheDictionary.cpp
@ -21,6 +21,8 @@
 #include <ext/bit_cast.h>
 #include <filesystem>
 #include <city.h>
+#include <fcntl.h>
+

 namespace ProfileEvents
 {
--- a/src/Dictionaries/SSDCacheDictionary.h
+++ b/src/Dictionaries/SSDCacheDictionary.h
@ -16,7 +16,6 @@
 #include <Core/Block.h>
 #include <Dictionaries/BucketCache.h>
 #include <IO/HashingWriteBuffer.h>
-#include <IO/WriteBufferAIO.h>
 #include <list>
 #include <pcg_random.hpp>
 #include <Poco/Logger.h>
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.cpp
@ -22,6 +22,8 @@
 #include <numeric>
 #include <filesystem>
 #include <city.h>
+#include <fcntl.h>
+

 namespace ProfileEvents
 {
--- a/src/Dictionaries/SSDComplexKeyCacheDictionary.h
+++ b/src/Dictionaries/SSDComplexKeyCacheDictionary.h
@ -19,7 +19,6 @@
 #include <Dictionaries/BucketCache.h>
 #include <ext/scope_guard.h>
 #include <IO/HashingWriteBuffer.h>
-#include <IO/WriteBufferAIO.h>
 #include <list>
 #include <pcg_random.hpp>
 #include <Poco/Logger.h>
--- a/src/Disks/DiskCacheWrapper.cpp
+++ b/src/Disks/DiskCacheWrapper.cpp
@ -146,7 +146,7 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
                auto tmp_path = path + ".tmp";
                {
                    auto src_buffer = DiskDecorator::readFile(path, buf_size, estimated_size, aio_threshold, mmap_threshold);
-                    auto dst_buffer = cache_disk->writeFile(tmp_path, buf_size, WriteMode::Rewrite, estimated_size, aio_threshold);
+                    auto dst_buffer = cache_disk->writeFile(tmp_path, buf_size, WriteMode::Rewrite);
                    copyData(*src_buffer, *dst_buffer);
                }
                cache_disk->moveFile(tmp_path, path);
@ -175,10 +175,10 @@ DiskCacheWrapper::readFile(const String & path, size_t buf_size, size_t estimate
 }

 std::unique_ptr<WriteBufferFromFileBase>
-DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold)
+DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode)
 {
    if (!cache_file_predicate(path))
-        return DiskDecorator::writeFile(path, buf_size, mode, estimated_size, aio_threshold);
+        return DiskDecorator::writeFile(path, buf_size, mode);

    LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Write file {} to cache", backQuote(path));

@ -187,12 +187,12 @@ DiskCacheWrapper::writeFile(const String & path, size_t buf_size, WriteMode mode
        cache_disk->createDirectories(dir_path);

    return std::make_unique<CompletionAwareWriteBuffer>(
-        cache_disk->writeFile(path, buf_size, mode, estimated_size, aio_threshold),
-        [this, path, buf_size, mode, estimated_size, aio_threshold]()
+        cache_disk->writeFile(path, buf_size, mode),
+        [this, path, buf_size, mode]()
        {
            /// Copy file from cache to actual disk when cached buffer is finalized.
-            auto src_buffer = cache_disk->readFile(path, buf_size, estimated_size, aio_threshold, 0);
-            auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode, estimated_size, aio_threshold);
+            auto src_buffer = cache_disk->readFile(path, buf_size, 0, 0, 0);
+            auto dst_buffer = DiskDecorator::writeFile(path, buf_size, mode);
            copyData(*src_buffer, *dst_buffer);
            dst_buffer->finalize();
        },
--- a/src/Disks/DiskCacheWrapper.h
+++ b/src/Disks/DiskCacheWrapper.h
@ -36,7 +36,7 @@ public:
    std::unique_ptr<ReadBufferFromFileBase>
    readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const override;
    std::unique_ptr<WriteBufferFromFileBase>
-    writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) override;
+    writeFile(const String & path, size_t buf_size, WriteMode mode) override;
    void remove(const String & path) override;
    void removeRecursive(const String & path) override;
    void createHardLink(const String & src_path, const String & dst_path) override;
--- a/src/Disks/DiskDecorator.cpp
+++ b/src/Disks/DiskDecorator.cpp
@ -125,9 +125,9 @@ DiskDecorator::readFile(const String & path, size_t buf_size, size_t estimated_s
 }

 std::unique_ptr<WriteBufferFromFileBase>
-DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold)
+DiskDecorator::writeFile(const String & path, size_t buf_size, WriteMode mode)
 {
-    return delegate->writeFile(path, buf_size, mode, estimated_size, aio_threshold);
+    return delegate->writeFile(path, buf_size, mode);
 }

 void DiskDecorator::remove(const String & path)
--- a/src/Disks/DiskDecorator.h
+++ b/src/Disks/DiskDecorator.h
@ -38,7 +38,7 @@ public:
    std::unique_ptr<ReadBufferFromFileBase>
    readFile(const String & path, size_t buf_size, size_t estimated_size, size_t aio_threshold, size_t mmap_threshold) const override;
    std::unique_ptr<WriteBufferFromFileBase>
-    writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold) override;
+    writeFile(const String & path, size_t buf_size, WriteMode mode) override;
    void remove(const String & path) override;
    void removeRecursive(const String & path) override;
    void setLastModified(const String & path, const Poco::Timestamp & timestamp) override;
--- a/src/Disks/DiskLocal.cpp
+++ b/src/Disks/DiskLocal.cpp
@ -7,7 +7,6 @@
 #include <Common/quoteString.h>

 #include <IO/createReadBufferFromFileBase.h>
-#include <IO/createWriteBufferFromFileBase.h>
 #include <common/logger_useful.h>
 #include <unistd.h>

@ -232,10 +231,10 @@ DiskLocal::readFile(const String & path, size_t buf_size, size_t estimated_size,
 }

 std::unique_ptr<WriteBufferFromFileBase>
-DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t estimated_size, size_t aio_threshold)
+DiskLocal::writeFile(const String & path, size_t buf_size, WriteMode mode)
 {
    int flags = (mode == WriteMode::Append) ? (O_APPEND | O_CREAT | O_WRONLY) : -1;
-    return createWriteBufferFromFileBase(disk_path + path, estimated_size, aio_threshold, buf_size, flags);
+    return std::make_unique<WriteBufferFromFile>(disk_path + path, buf_size, flags);
 }

 void DiskLocal::remove(const String & path)
--- a/src/Disks/DiskLocal.h
+++ b/src/Disks/DiskLocal.h
@ -83,9 +83,7 @@ public:
    std::unique_ptr<WriteBufferFromFileBase> writeFile(
        const String & path,
        size_t buf_size,
-        WriteMode mode,
-        size_t estimated_size,
-        size_t aio_threshold) override;
+        WriteMode mode) override;

    void remove(const String & path) override;

--- a/src/Disks/DiskMemory.cpp
+++ b/src/Disks/DiskMemory.cpp
@ -330,7 +330,7 @@ std::unique_ptr<ReadBufferFromFileBase> DiskMemory::readFile(const String & path
    return std::make_unique<ReadIndirectBuffer>(path, iter->second.data);
 }

-std::unique_ptr<WriteBufferFromFileBase> DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode, size_t, size_t)
+std::unique_ptr<WriteBufferFromFileBase> DiskMemory::writeFile(const String & path, size_t buf_size, WriteMode mode)
 {
    std::lock_guard lock(mutex);

--- a/src/Disks/DiskMemory.h
+++ b/src/Disks/DiskMemory.h
@ -74,9 +74,7 @@ public:
    std::unique_ptr<WriteBufferFromFileBase> writeFile(
        const String & path,
        size_t buf_size,
-        WriteMode mode,
-        size_t estimated_size,
-        size_t aio_threshold) override;
+        WriteMode mode) override;

    void remove(const String & path) override;

--- a/Show More
+++ b/Show More