Merge remote-tracking branch 'origin/master' into tmp

2024-11-10 01:25:21 +00:00 · 2021-01-18 23:23:49 +03:00 · 2021-01-18 23:23:49 +03:00 · d0922e2985
commit d0922e2985
parent ccd401289c 7fa7ea2c93
354 changed files with 25173 additions and 16006 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -209,6 +209,12 @@
 [submodule "contrib/fast_float"]
 	path = contrib/fast_float
 	url = https://github.com/fastfloat/fast_float
+[submodule "contrib/libpqxx"]
+	path = contrib/libpqxx
+	url = https://github.com/jtv/libpqxx
+[submodule "contrib/libpq"]
+	path = contrib/libpq
+	url = https://github.com/ClickHouse-Extras/libpq
 [submodule "contrib/boringssl"]
 	path = contrib/boringssl
 	url = https://github.com/ClickHouse-Extras/boringssl.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -490,6 +490,7 @@ include (cmake/find/rapidjson.cmake)
 include (cmake/find/fastops.cmake)
 include (cmake/find/odbc.cmake)
 include (cmake/find/rocksdb.cmake)
+include (cmake/find/libpqxx.cmake)
 include (cmake/find/nuraft.cmake)


--- a/README.md
+++ b/README.md
@ -17,3 +17,4 @@ ClickHouse® is an open-source column-oriented database management system that a

 ## Upcoming Events
 * [SF Bay Area ClickHouse Virtual Office Hours (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/274273549/) on 20 January 2020.
+* [Chinese ClickHouse Meetup (online)](http://hdxu.cn/8KxZE) on 6 February 2020.
--- a/base/glibc-compatibility/musl/sched_getcpu.c
+++ b/base/glibc-compatibility/musl/sched_getcpu.c
@ -4,6 +4,12 @@
 #include "syscall.h"
 #include "atomic.h"

+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#include <sanitizer/msan_interface.h>
+#endif
+#endif
+
 #ifdef VDSO_GETCPU_SYM

 static void *volatile vdso_func;
@ -37,6 +43,13 @@ int sched_getcpu(void)
 #endif

 	r = __syscall(SYS_getcpu, &cpu, 0, 0);
-	if (!r) return cpu;
+	if (!r) {
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+        __msan_unpoison(&cpu, sizeof(cpu));
+#endif
+#endif
+        return cpu;
+    }
 	return __syscall_ret(r);
 }
--- a/cmake/find/libpqxx.cmake
+++ b/cmake/find/libpqxx.cmake
@ -0,0 +1,31 @@
+option(ENABLE_LIBPQXX "Enalbe libpqxx" ${ENABLE_LIBRARIES})
+
+if (NOT ENABLE_LIBPQXX)
+    return()
+endif()
+
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/CMakeLists.txt")
+    message (WARNING "submodule contrib/libpqxx is missing. to fix try run: \n git submodule update --init --recursive")
+    message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpqxx library")
+    set (USE_LIBPQXX 0)
+    return()
+endif()
+
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/libpq/include")
+    message (ERROR "submodule contrib/libpq is missing. to fix try run: \n git submodule update --init --recursive")
+    message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find internal libpq needed for libpqxx")
+    set (USE_LIBPQXX 0)
+    return()
+endif()
+
+if (NOT USE_INTERNAL_SSL_LIBRARY)
+    set (USE_LIBPQXX 0)
+else ()
+set (USE_LIBPQXX 1)
+set (LIBPQXX_LIBRARY libpqxx)
+set (LIBPQ_LIBRARY libpq)
+set (LIBPQXX_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpqxx/include")
+set (LIBPQ_ROOT_DIR "${ClickHouse_SOURCE_DIR}/contrib/libpq")
+message (STATUS "Using libpqxx=${USE_LIBPQXX}: ${LIBPQXX_INCLUDE_DIR} : ${LIBPQXX_LIBRARY}")
+message (STATUS "Using libpq: ${LIBPQ_ROOT_DIR} : ${LIBPQ_INCLUDE_DIR} : ${LIBPQ_LIBRARY}")
+endif()
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -119,12 +119,6 @@ if (USE_INTERNAL_LDAP_LIBRARY)
    add_subdirectory (openldap-cmake)
 endif ()

-# Should go before:
-# - mariadb-connector-c
-# - aws-s3-cmake
-# - sentry-native
-add_subdirectory (curl-cmake)
-
 function(mysql_support)
    set(CLIENT_PLUGIN_CACHING_SHA2_PASSWORD STATIC)
    set(CLIENT_PLUGIN_SHA256_PASSWORD STATIC)
@ -142,6 +136,7 @@ function(mysql_support)
        set(ZLIB_LIBRARY ${ZLIB_LIBRARIES})
        set(WITH_EXTERNAL_ZLIB ON)
    endif()
+    set(WITH_CURL OFF)
    add_subdirectory (mariadb-connector-c)
 endfunction()
 if (ENABLE_MYSQL AND USE_INTERNAL_MYSQL_LIBRARY)
@ -288,6 +283,10 @@ if (USE_CASSANDRA)
    add_subdirectory (cassandra)
 endif()

+# Should go before:
+# - sentry-native
+add_subdirectory (curl-cmake)
+
 if (USE_SENTRY)
    add_subdirectory (sentry-native)
 endif()
@ -310,6 +309,11 @@ if (USE_INTERNAL_ROCKSDB_LIBRARY)
    add_subdirectory(rocksdb-cmake)
 endif()

+if (USE_LIBPQXX)
+    add_subdirectory (libpq-cmake)
+    add_subdirectory (libpqxx-cmake)
+endif()
+
 if (USE_NURAFT)
  add_subdirectory(nuraft-cmake)
 endif()
--- a/contrib/dragonbox
+++ b/contrib/dragonbox
@ -1 +1 @@
-Subproject commit b2751c65c0592c0239aec3becd53d0ea2fde9329
+Subproject commit 923705af6fd953aa948fc175f6020b15f7359838
--- a/contrib/krb5
+++ b/contrib/krb5
@ -1 +1 @@
-Subproject commit 90ff6f4f8c695d6bf1aaba78a9b8942be92141c2
+Subproject commit 5149dea4e2be0f67707383d2682b897c14631374
--- a/contrib/libpq
+++ b/contrib/libpq
@ -0,0 +1 @@
+Subproject commit 8e7e905854714a7fbb49c124dbc45c7bd4b98e07
--- a/contrib/libpq-cmake/CMakeLists.txt
+++ b/contrib/libpq-cmake/CMakeLists.txt
@ -0,0 +1,58 @@
+set(LIBPQ_SOURCE_DIR ${ClickHouse_SOURCE_DIR}/contrib/libpq)
+
+set(SRCS
+        ${LIBPQ_SOURCE_DIR}/fe-auth.c
+        ${LIBPQ_SOURCE_DIR}/fe-auth-scram.c
+        ${LIBPQ_SOURCE_DIR}/fe-connect.c
+        ${LIBPQ_SOURCE_DIR}/fe-exec.c
+        ${LIBPQ_SOURCE_DIR}/fe-lobj.c
+        ${LIBPQ_SOURCE_DIR}/fe-misc.c
+        ${LIBPQ_SOURCE_DIR}/fe-print.c
+        ${LIBPQ_SOURCE_DIR}/fe-protocol2.c
+        ${LIBPQ_SOURCE_DIR}/fe-protocol3.c
+        ${LIBPQ_SOURCE_DIR}/fe-secure.c
+        ${LIBPQ_SOURCE_DIR}/fe-secure-common.c
+        ${LIBPQ_SOURCE_DIR}/fe-secure-openssl.c
+        ${LIBPQ_SOURCE_DIR}/legacy-pqsignal.c
+        ${LIBPQ_SOURCE_DIR}/libpq-events.c
+        ${LIBPQ_SOURCE_DIR}/pqexpbuffer.c
+
+        ${LIBPQ_SOURCE_DIR}/common/scram-common.c
+        ${LIBPQ_SOURCE_DIR}/common/sha2_openssl.c
+        ${LIBPQ_SOURCE_DIR}/common/md5.c
+        ${LIBPQ_SOURCE_DIR}/common/saslprep.c
+        ${LIBPQ_SOURCE_DIR}/common/unicode_norm.c
+        ${LIBPQ_SOURCE_DIR}/common/ip.c
+        ${LIBPQ_SOURCE_DIR}/common/jsonapi.c
+        ${LIBPQ_SOURCE_DIR}/common/wchar.c
+        ${LIBPQ_SOURCE_DIR}/common/base64.c
+        ${LIBPQ_SOURCE_DIR}/common/link-canary.c
+        ${LIBPQ_SOURCE_DIR}/common/fe_memutils.c
+        ${LIBPQ_SOURCE_DIR}/common/string.c
+        ${LIBPQ_SOURCE_DIR}/common/pg_get_line.c
+        ${LIBPQ_SOURCE_DIR}/common/stringinfo.c
+        ${LIBPQ_SOURCE_DIR}/common/psprintf.c
+        ${LIBPQ_SOURCE_DIR}/common/encnames.c
+        ${LIBPQ_SOURCE_DIR}/common/logging.c
+
+        ${LIBPQ_SOURCE_DIR}/port/snprintf.c
+        ${LIBPQ_SOURCE_DIR}/port/strlcpy.c
+        ${LIBPQ_SOURCE_DIR}/port/strerror.c
+        ${LIBPQ_SOURCE_DIR}/port/inet_net_ntop.c
+        ${LIBPQ_SOURCE_DIR}/port/getpeereid.c
+        ${LIBPQ_SOURCE_DIR}/port/chklocale.c
+        ${LIBPQ_SOURCE_DIR}/port/noblock.c
+        ${LIBPQ_SOURCE_DIR}/port/pg_strong_random.c
+        ${LIBPQ_SOURCE_DIR}/port/pgstrcasecmp.c
+        ${LIBPQ_SOURCE_DIR}/port/thread.c
+        ${LIBPQ_SOURCE_DIR}/port/path.c
+        ${LIBPQ_SOURCE_DIR}/port/explicit_bzero.c
+        )
+
+add_library(libpq ${SRCS})
+
+target_include_directories (libpq PUBLIC ${LIBPQ_SOURCE_DIR})
+target_include_directories (libpq PUBLIC ${LIBPQ_SOURCE_DIR}/include)
+target_include_directories (libpq PRIVATE ${LIBPQ_SOURCE_DIR}/configs)
+
+target_link_libraries (libpq PRIVATE ssl)
--- a/contrib/libpqxx
+++ b/contrib/libpqxx
@ -0,0 +1 @@
+Subproject commit 58d2a028d1600225ac3a478d6b3a06ba2f0c01f6
--- a/contrib/libpqxx-cmake/CMakeLists.txt
+++ b/contrib/libpqxx-cmake/CMakeLists.txt
@ -0,0 +1,78 @@
+set (LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/libpqxx)
+
+set (SRCS
+    ${LIBRARY_DIR}/src/strconv.cxx
+    ${LIBRARY_DIR}/src/array.cxx
+    ${LIBRARY_DIR}/src/binarystring.cxx
+    ${LIBRARY_DIR}/src/connection.cxx
+    ${LIBRARY_DIR}/src/cursor.cxx
+    ${LIBRARY_DIR}/src/encodings.cxx
+    ${LIBRARY_DIR}/src/errorhandler.cxx
+    ${LIBRARY_DIR}/src/except.cxx
+    ${LIBRARY_DIR}/src/field.cxx
+    ${LIBRARY_DIR}/src/largeobject.cxx
+    ${LIBRARY_DIR}/src/notification.cxx
+    ${LIBRARY_DIR}/src/pipeline.cxx
+    ${LIBRARY_DIR}/src/result.cxx
+    ${LIBRARY_DIR}/src/robusttransaction.cxx
+    ${LIBRARY_DIR}/src/sql_cursor.cxx
+    ${LIBRARY_DIR}/src/stream_from.cxx
+    ${LIBRARY_DIR}/src/stream_to.cxx
+    ${LIBRARY_DIR}/src/subtransaction.cxx
+    ${LIBRARY_DIR}/src/transaction.cxx
+    ${LIBRARY_DIR}/src/transaction_base.cxx
+    ${LIBRARY_DIR}/src/row.cxx
+    ${LIBRARY_DIR}/src/util.cxx
+    ${LIBRARY_DIR}/src/version.cxx
+)
+
+# Need to explicitly include each header file, because in the directory include/pqxx there are also files
+# like just 'array'. So if including the whole directory with `target_include_directories`, it will make
+# conflicts with all includes of <array>.
+set (HDRS
+    ${LIBRARY_DIR}/include/pqxx/array.hxx
+    ${LIBRARY_DIR}/include/pqxx/binarystring.hxx
+    ${LIBRARY_DIR}/include/pqxx/composite.hxx
+    ${LIBRARY_DIR}/include/pqxx/connection.hxx
+    ${LIBRARY_DIR}/include/pqxx/cursor.hxx
+    ${LIBRARY_DIR}/include/pqxx/dbtransaction.hxx
+    ${LIBRARY_DIR}/include/pqxx/errorhandler.hxx
+    ${LIBRARY_DIR}/include/pqxx/except.hxx
+    ${LIBRARY_DIR}/include/pqxx/field.hxx
+    ${LIBRARY_DIR}/include/pqxx/isolation.hxx
+    ${LIBRARY_DIR}/include/pqxx/largeobject.hxx
+    ${LIBRARY_DIR}/include/pqxx/nontransaction.hxx
+    ${LIBRARY_DIR}/include/pqxx/notification.hxx
+    ${LIBRARY_DIR}/include/pqxx/pipeline.hxx
+    ${LIBRARY_DIR}/include/pqxx/prepared_statement.hxx
+    ${LIBRARY_DIR}/include/pqxx/result.hxx
+    ${LIBRARY_DIR}/include/pqxx/robusttransaction.hxx
+    ${LIBRARY_DIR}/include/pqxx/row.hxx
+    ${LIBRARY_DIR}/include/pqxx/separated_list.hxx
+    ${LIBRARY_DIR}/include/pqxx/strconv.hxx
+    ${LIBRARY_DIR}/include/pqxx/stream_from.hxx
+    ${LIBRARY_DIR}/include/pqxx/stream_to.hxx
+    ${LIBRARY_DIR}/include/pqxx/subtransaction.hxx
+    ${LIBRARY_DIR}/include/pqxx/transaction.hxx
+    ${LIBRARY_DIR}/include/pqxx/transaction_base.hxx
+    ${LIBRARY_DIR}/include/pqxx/types.hxx
+    ${LIBRARY_DIR}/include/pqxx/util.hxx
+    ${LIBRARY_DIR}/include/pqxx/version.hxx
+    ${LIBRARY_DIR}/include/pqxx/zview.hxx
+)
+
+add_library(libpqxx ${SRCS} ${HDRS})
+
+target_link_libraries(libpqxx PUBLIC ${LIBPQ_LIBRARY})
+target_include_directories (libpqxx PRIVATE ${LIBRARY_DIR}/include)
+
+# crutch
+set(CM_CONFIG_H_IN "${LIBRARY_DIR}/include/pqxx/config.h.in")
+set(CM_CONFIG_PUB "${LIBRARY_DIR}/include/pqxx/config-public-compiler.h")
+set(CM_CONFIG_INT "${LIBRARY_DIR}/include/pqxx/config-internal-compiler.h")
+set(CM_CONFIG_PQ "${LIBRARY_DIR}/include/pqxx/config-internal-libpq.h")
+
+configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_INT}" @ONLY)
+configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PUB}" @ONLY)
+configure_file("${CM_CONFIG_H_IN}" "${CM_CONFIG_PQ}" @ONLY)
+
--- a/docker/images.json
+++ b/docker/images.json
@ -45,7 +45,8 @@
        "name": "yandex/clickhouse-stateless-test",
        "dependent": [
            "docker/test/stateful",
-            "docker/test/coverage"
+            "docker/test/coverage",
+            "docker/test/unit"
        ]
    },
    "docker/test/stateless_pytest": {
@ -134,7 +135,10 @@
         "name": "yandex/clickhouse-test-base",
         "dependent": [
            "docker/test/stateless",
-            "docker/test/stateless_pytest"
+            "docker/test/stateless_unbundled",
+            "docker/test/stateless_pytest",
+            "docker/test/integration/base",
+            "docker/test/fuzzer"
         ]
    },
    "docker/packager/unbundled": {
--- a/docker/server/Dockerfile
+++ b/docker/server/Dockerfile
@ -4,7 +4,17 @@ ARG repository="deb https://repo.clickhouse.tech/deb/stable/ main/"
 ARG version=21.1.0
 ARG gosu_ver=1.10

-RUN apt-get update \
+# user/group precreated explicitly with fixed uid/gid on purpose.
+# It is especially important for rootless containers: in that case entrypoint
+# can't do chown and owners of mounted volumes should be configured externally.
+# We do that in advance at the begining of Dockerfile before any packages will be
+# installed to prevent picking those uid / gid by some unrelated software.
+# The same uid / gid (101) is used both for alpine and ubuntu.
+# Number 101 is used by default in openshift
+
+RUN groupadd -r clickhouse --gid=101 \
+    && useradd -r -g clickhouse --uid=101 --home-dir=/var/lib/clickhouse --shell=/bin/bash clickhouse \
+    && apt-get update \
    && apt-get install --yes --no-install-recommends \
        apt-transport-https \
        ca-certificates \
--- a/docker/server/Dockerfile.alpine
+++ b/docker/server/Dockerfile.alpine
@ -8,9 +8,16 @@ ENV LANG=en_US.UTF-8 \

 COPY alpine-root/ /

-# from https://github.com/ClickHouse/ClickHouse/blob/master/debian/clickhouse-server.postinst
-RUN addgroup clickhouse \
-    && adduser -S -H -h /nonexistent -s /bin/false -G clickhouse -g "ClickHouse server" clickhouse \
+# user/group precreated explicitly with fixed uid/gid on purpose.
+# It is especially important for rootless containers: in that case entrypoint
+# can't do chown and owners of mounted volumes should be configured externally.
+# We do that in advance at the begining of Dockerfile before any packages will be
+# installed to prevent picking those uid / gid by some unrelated software.
+# The same uid / gid (101) is used both for alpine and ubuntu.
+# Number 101 is used by default in openshift
+
+RUN addgroup -S -g 101 clickhouse \
+    && adduser -S -h /var/lib/clickhouse -s /bin/bash -G clickhouse -g "ClickHouse server" -u 101 clickhouse \
    && chown clickhouse:clickhouse /var/lib/clickhouse \
    && chmod 700 /var/lib/clickhouse \
    && chown root:clickhouse /var/log/clickhouse-server \
--- a/docker/test/fuzzer/Dockerfile
+++ b/docker/test/fuzzer/Dockerfile
@ -1,5 +1,5 @@
 # docker build -t yandex/clickhouse-fuzzer .
-FROM ubuntu:18.04
+FROM yandex/clickhouse-test-base

 ENV LANG=C.UTF-8
 ENV TZ=Europe/Moscow
@ -7,11 +7,7 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

 RUN apt-get update \
    && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends \
-            bash \
            ca-certificates \
-            curl \
-            gdb \
-            git \
            libc6-dbg \
            moreutils \
            ncdu \
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -8,6 +8,7 @@ stage=${stage:-}
 script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
 echo "$script_dir"
 repo_dir=ch
+BINARY_TO_DOWNLOAD=${BINARY_TO_DOWNLOAD:="clang-11_debug_none_bundled_unsplitted_disable_False_binary"}

 function clone
 {
@ -35,7 +36,7 @@ function download
 #    wget -O- -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/performance/performance.tgz" \
 #        | tar --strip-components=1 -zxv

-    wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/clang-11_debug_none_bundled_unsplitted_disable_False_binary/clickhouse"
+    wget -nv -nd -c "https://clickhouse-builds.s3.yandex.net/$PR_TO_TEST/$SHA_TO_TEST/clickhouse_build_check/$BINARY_TO_DOWNLOAD/clickhouse"
    chmod +x clickhouse
    ln -s ./clickhouse ./clickhouse-server
    ln -s ./clickhouse ./clickhouse-client
@ -72,7 +73,7 @@ function watchdog

 function fuzz
 {
-    ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -10000 > server.log &
+    ./clickhouse-server --config-file db/config.xml -- --path db 2>&1 | tail -100000 > server.log &
    server_pid=$!
    kill -0 $server_pid
    while ! ./clickhouse-client --query "select 1" && kill -0 $server_pid ; do echo . ; sleep 1 ; done
@ -85,7 +86,7 @@ function fuzz
    # SC2046: Quote this to prevent word splitting. Actually I need word splitting.
    # shellcheck disable=SC2012,SC2046
    ./clickhouse-client --query-fuzzer-runs=1000 --queries-file $(ls -1 ch/tests/queries/0_stateless/*.sql | sort -R) \
-        > >(tail -n 10000 > fuzzer.log) \
+        > >(tail -n 100000 > fuzzer.log) \
        2>&1 \
        || fuzzer_exit_code=$?

--- a/docker/test/integration/base/Dockerfile
+++ b/docker/test/integration/base/Dockerfile
@ -30,3 +30,4 @@ RUN curl 'https://cdn.mysql.com//Downloads/Connector-ODBC/8.0/mysql-connector-od

 ENV TZ=Europe/Moscow
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
+
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -66,3 +66,6 @@ function run_tests()
 export -f run_tests

 timeout "$MAX_RUN_TIME" bash -c run_tests ||:
+
+tar -chf /test_output/text_log_dump.tar /var/lib/clickhouse/data/system/text_log ||:
+tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_log ||:
--- a/docker/test/stateless_unbundled/Dockerfile
+++ b/docker/test/stateless_unbundled/Dockerfile
@ -86,3 +86,4 @@ RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone

 COPY run.sh /
 CMD ["/bin/bash", "/run.sh"]
+
--- a/docker/test/unit/Dockerfile
+++ b/docker/test/unit/Dockerfile
@ -7,3 +7,4 @@ RUN apt-get install gdb

 CMD service zookeeper start && sleep 7 && /usr/share/zookeeper/bin/zkCli.sh -server localhost:2181 -create create /clickhouse_test ''; \
    gdb -q  -ex 'set print inferior-events off' -ex 'set confirm off' -ex 'set print thread-events off' -ex run -ex bt -ex quit --args ./unit_tests_dbms | tee test_output/test_result.txt
+
--- a/docs/en/commercial/cloud.md
+++ b/docs/en/commercial/cloud.md
@ -29,4 +29,14 @@ toc_title: Cloud
 -   Cross-AZ scaling for performance and high availability
 -   Built-in monitoring and SQL query editor

+## Tencent Cloud {#tencent-cloud}
+
+[Tencent Managed Service for ClickHouse](https://cloud.tencent.com/product/cdwch) provides the following key features:
+
+-   Easy to deploy and manage on Tencent Cloud
+-   Highly scalable and available
+-   Integrated monitor and alert service
+-   High security with isolated per cluster VPCs
+-   On-demand pricing with no upfront costs or long-term commitments
+
 {## [Original article](https://clickhouse.tech/docs/en/commercial/cloud/) ##}
--- a/docs/en/operations/settings/index.md
+++ b/docs/en/operations/settings/index.md
@ -25,6 +25,7 @@ Ways to configure settings, in order of priority:

    -   When starting the ClickHouse console client in non-interactive mode, set the startup parameter `--setting=value`.
    -   When using the HTTP API, pass CGI parameters (`URL?setting_1=value&setting_2=value...`).
+    -   Make settings in the [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) clause of the SELECT query. The setting value is applied only to that query and is reset to default or previous value after the query is executed.

 Settings that can only be made in the server config file are not covered in this section.

--- a/docs/en/sql-reference/data-types/float.md
+++ b/docs/en/sql-reference/data-types/float.md
@ -9,11 +9,18 @@ toc_title: Float32, Float64

 Types are equivalent to types of C:

-   `Float32` - `float`
-   `Float64` - `double`
+-   `Float32` — `float`.
+-   `Float64` — `double`.

 We recommend that you store data in integer form whenever possible. For example, convert fixed precision numbers to integer values, such as monetary amounts or page load times in milliseconds.

+Aliases:
+
+-   `Float32` — `FLOAT`.
+-   `Float64` — `DOUBLE`.
+
+When creating tables, numeric parameters for floating point numbers can be set (e.g. `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), but ClickHouse ignores them.
+
 ## Using Floating-point Numbers {#using-floating-point-numbers}

 -   Computations with floating-point numbers might produce a rounding error.
@ -52,7 +59,7 @@ SELECT 0.5 / 0
 └────────────────┘
 ```

-   `-Inf` – Negative infinity.
+-   `-Inf` — Negative infinity.

 <!-- -->

@ -66,7 +73,7 @@ SELECT -0.5 / 0
 └─────────────────┘
 ```

-   `NaN` – Not a number.
+-   `NaN` — Not a number.

 <!-- -->

@ -80,6 +87,6 @@ SELECT 0 / 0
 └──────────────┘
 ```

-    See the rules for `NaN` sorting in the section [ORDER BY clause](../sql_reference/statements/select/order-by.md).
+See the rules for `NaN` sorting in the section [ORDER BY clause](../../sql-reference/statements/select/order-by.md).

 [Original article](https://clickhouse.tech/docs/en/data_types/float/) <!--hide-->
--- a/docs/en/sql-reference/data-types/int-uint.md
+++ b/docs/en/sql-reference/data-types/int-uint.md
@ -7,23 +7,32 @@ toc_title: UInt8, UInt16, UInt32, UInt64, UInt256, Int8, Int16, Int32, Int64, In

 Fixed-length integers, with or without a sign.

+When creating tables, numeric parameters for integer numbers can be set (e.g. `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), but ClickHouse ignores them. 
+
 ## Int Ranges {#int-ranges}

-   Int8 - \[-128 : 127\]
-   Int16 - \[-32768 : 32767\]
-   Int32 - \[-2147483648 : 2147483647\]
-   Int64 - \[-9223372036854775808 : 9223372036854775807\]
-   Int128 - \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
-   Int256 - \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
+-   `Int8` — \[-128 : 127\]
+-   `Int16` — \[-32768 : 32767\]
+-   `Int32` — \[-2147483648 : 2147483647\]
+-   `Int64` — \[-9223372036854775808 : 9223372036854775807\]
+-   `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
+-   `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
+
+Aliases:
+
+-   `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`.
+-   `Int16` — `SMALLINT`, `INT2`.
+-   `Int32` — `INT`, `INT4`, `INTEGER`.
+-   `Int64` — `BIGINT`.

 ## Uint Ranges {#uint-ranges}

-   UInt8 - \[0 : 255\]
-   UInt16 - \[0 : 65535\]
-   UInt32 - \[0 : 4294967295\]
-   UInt64 - \[0 : 18446744073709551615\]
-   UInt256 - \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
+-   `UInt8` — \[0 : 255\]
+-   `UInt16` — \[0 : 65535\]
+-   `UInt32` — \[0 : 4294967295\]
+-   `UInt64` — \[0 : 18446744073709551615\]
+-   `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]

-UInt128 is not supported yet.
+`UInt128` is not supported yet.

 [Original article](https://clickhouse.tech/docs/en/data_types/int_uint/) <!--hide-->
--- a/docs/en/sql-reference/data-types/string.md
+++ b/docs/en/sql-reference/data-types/string.md
@ -8,6 +8,8 @@ toc_title: String
 Strings of an arbitrary length. The length is not limited. The value can contain an arbitrary set of bytes, including null bytes.
 The String type replaces the types VARCHAR, BLOB, CLOB, and others from other DBMSs.

+When creating tables, numeric parameters for string fields can be set (e.g. `VARCHAR(255)`), but ClickHouse ignores them. 
+
 ## Encodings {#encodings}

 ClickHouse doesn’t have the concept of encodings. Strings can contain an arbitrary set of bytes, which are stored and output as-is.
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@ -93,7 +93,7 @@ Setting fields:
 -   `path` – The absolute path to the file.
 -   `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.

-When dictionary with FILE source is created via DDL command (`CREATE DICTIONARY ...`), source of the dictionary have to be located in `user_files` directory, to prevent DB users accessing arbitrary file on clickhouse node.
+When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in `user_files` directory, to prevent DB users accessing arbitrary file on ClickHouse node.

 ## Executable File {#dicts-external_dicts_dict_sources-executable}

@ -115,7 +115,7 @@ Setting fields:
 -   `command` – The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
 -   `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.

-That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on clickhouse node.
+That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.

 ## Http(s) {#dicts-external_dicts_dict_sources-http}

@ -160,14 +160,14 @@ Setting fields:
 -   `url` – The source URL.
 -   `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
 -   `credentials` – Basic HTTP authentication. Optional parameter.
-    -   `user` – Username required for the authentication.
-    -   `password` – Password required for the authentication.
+-   `user` – Username required for the authentication.
+-   `password` – Password required for the authentication.
 -   `headers` – All custom HTTP headers entries used for the HTTP request. Optional parameter.
-    -   `header` – Single HTTP header entry.
-    -   `name` – Identifiant name used for the header send on the request.
-    -   `value` – Value set for a specific identifiant name.
+-   `header` – Single HTTP header entry.
+-   `name` – Identifiant name used for the header send on the request.
+-   `value` – Value set for a specific identifiant name.

-When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries checked with the `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server. 
+When creating a dictionary using the DDL command (`CREATE DICTIONARY ...`) remote hosts for HTTP dictionaries are checked against the contents of `remote_url_allow_hosts` section from config to prevent database users to access arbitrary HTTP server.

 ## ODBC {#dicts-external_dicts_dict_sources-odbc}

--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -622,7 +622,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %C       | year divided by 100 and truncated to integer (00-99)    | 20         |
 | %d       | day of the month, zero-padded (01-31)                   | 02         |
 | %D       | Short MM/DD/YY date, equivalent to %m/%d/%y             | 01/02/18   |
-| %e       | day of the month, space-padded ( 1-31)                  | 2          |
+| %e       | day of the month, space-padded ( 1-31)                  |  2         |
 | %F       | short YYYY-MM-DD date, equivalent to %Y-%m-%d           | 2018-01-02 |
 | %G       | four-digit year format for ISO week number, calculated from the week-based year [defined by the ISO 8601](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) standard, normally useful only with %V  | 2018         |
 | %g       | two-digit year format, aligned to ISO 8601, abbreviated from four-digit notation                                | 18       |
@ -633,6 +633,7 @@ Using replacement fields, you can define a pattern for the resulting string. “
 | %M       | minute (00-59)                                          | 33         |
 | %n       | new-line character (‘’)                                 |            |
 | %p       | AM or PM designation                                    | PM         |
+| %Q       | Quarter (1-4)                                           | 1          |
 | %R       | 24-hour HH:MM time, equivalent to %H:%M                 | 22:33      |
 | %S       | second (00-59)                                          | 44         |
 | %t       | horizontal-tab character (’)                            |            |
--- a/docs/en/sql-reference/functions/string-search-functions.md
+++ b/docs/en/sql-reference/functions/string-search-functions.md
@ -538,11 +538,11 @@ For case-insensitive search or/and in UTF-8 format use functions `ngramSearchCas
 !!! note "Note"
    For UTF-8 case we use 3-gram distance. All these are not perfectly fair n-gram distances. We use 2-byte hashes to hash n-grams and then calculate the (non-)symmetric difference between these hash tables – collisions may occur. With UTF-8 case-insensitive format we do not use fair `tolower` function – we zero the 5-th bit (starting from zero) of each codepoint byte and first bit of zeroth byte if bytes more than one – this works for Latin and mostly for all Cyrillic letters.

-## countSubstrings(haystack, needle) {#countSubstrings}
+## countSubstrings {#countSubstrings}

-Count the number of substring occurrences
+Returns the number of substring occurrences.

-For a case-insensitive search, use the function `countSubstringsCaseInsensitive` (or `countSubstringsCaseInsensitiveUTF8`).
+For a case-insensitive search, use [countSubstringsCaseInsensitive](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitive) or [countSubstringsCaseInsensitiveUTF8](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitiveUTF8) functions.

 **Syntax**

@ -554,20 +554,20 @@ countSubstrings(haystack, needle[, start_pos])

 -   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
 -   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
-   `start_pos` – Optional parameter, position of the first character in the string to start search. [UInt](../../sql-reference/data-types/int-uint.md)
+-   `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md).

 **Returned values**

 -   Number of occurrences.

-Type: `Integer`.
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).

 **Examples**

 Query:

 ``` sql
-SELECT countSubstrings('foobar.com', '.')
+SELECT countSubstrings('foobar.com', '.');
 ```

 Result:
@ -581,7 +581,7 @@ Result:
 Query:

 ``` sql
-SELECT countSubstrings('aaaa', 'aa')
+SELECT countSubstrings('aaaa', 'aa');
 ```

 Result:
@ -592,6 +592,138 @@ Result:
 └───────────────────────────────┘
 ```

+Query:
+
+```sql
+SELECT countSubstrings('abc___abc', 'abc', 4);
+```
+
+Result:
+
+``` text
+┌─countSubstrings('abc___abc', 'abc', 4)─┐
+│                                      1 │
+└────────────────────────────────────────┘
+```
+
+## countSubstringsCaseInsensitive {#countSubstringsCaseInsensitive}
+
+Returns the number of substring occurrences case-insensitive.
+
+**Syntax**
+
+``` sql
+countSubstringsCaseInsensitive(haystack, needle[, start_pos])
+```
+
+**Parameters**
+
+-   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+-   Number of occurrences.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Examples**
+
+Query:
+
+``` sql
+select countSubstringsCaseInsensitive('aba', 'B');
+```
+
+Result:
+
+``` text
+┌─countSubstringsCaseInsensitive('aba', 'B')─┐
+│                                          1 │
+└────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT countSubstringsCaseInsensitive('foobar.com', 'CoM');
+```
+
+Result:
+
+``` text
+┌─countSubstringsCaseInsensitive('foobar.com', 'CoM')─┐
+│                                                   1 │
+└─────────────────────────────────────────────────────┘
+```
+
+Query:
+
+``` sql
+SELECT countSubstringsCaseInsensitive('abC___abC', 'aBc', 2);
+```
+
+Result:
+
+``` text
+┌─countSubstringsCaseInsensitive('abC___abC', 'aBc', 2)─┐
+│                                                     1 │
+└───────────────────────────────────────────────────────┘
+```
+
+## countSubstringsCaseInsensitiveUTF8 {#countSubstringsCaseInsensitiveUTF8}
+
+Returns the number of substring occurrences in `UTF-8` case-insensitive.
+
+**Syntax**
+
+``` sql
+SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
+```
+
+**Parameters**
+
+-   `haystack` — The string to search in. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `needle` — The substring to search for. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `start_pos` – Position of the first character in the string to start search. Optional. [UInt](../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+-   Number of occurrences.
+
+Type: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Examples** 
+
+Query:
+
+``` sql
+SELECT countSubstringsCaseInsensitiveUTF8('абв', 'A');
+```
+
+Result:
+
+``` text
+┌─countSubstringsCaseInsensitiveUTF8('абв', 'A')─┐
+│                                              1 │
+└────────────────────────────────────────────────┘
+```
+
+Query:
+
+```sql
+SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
+```
+
+Result:
+
+``` text
+┌─countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв')─┐
+│                                                          3 │
+└────────────────────────────────────────────────────────────┘
+```
+
 ## countMatches(haystack, pattern) {#countmatcheshaystack-pattern}

 Returns the number of regular expression matches for a `pattern` in a `haystack`.
--- a/docs/en/sql-reference/statements/alter/partition.md
+++ b/docs/en/sql-reference/statements/alter/partition.md
@ -286,7 +286,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
 You can specify the partition expression in `ALTER ... PARTITION` queries in different ways:

 -   As a value from the `partition` column of the `system.parts` table. For example, `ALTER TABLE visits DETACH PARTITION 201901`.
-   As the expression from the table column. Constants and constant expressions are supported. For example, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
+-   As a tuple of expressions or constants that matches (in types) the table partitioning keys tuple. In the case of a single element partitioning key, the expression should be wrapped in the `tuple (...)` function. For example, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
 -   Using the partition ID. Partition ID is a string identifier of the partition (human-readable, if possible) that is used as the names of partitions in the file system and in ZooKeeper. The partition ID must be specified in the `PARTITION ID` clause, in a single quotes. For example, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
 -   In the [ALTER ATTACH PART](#alter_attach-partition) and [DROP DETACHED PART](#alter_drop-detached) query, to specify the name of a part, use string literal with a value from the `name` column of the [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts) table. For example, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

--- a/docs/en/sql-reference/statements/select/index.md
+++ b/docs/en/sql-reference/statements/select/index.md
@ -25,6 +25,7 @@ SELECT [DISTINCT] expr_list
 [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr]
 [LIMIT [offset_value, ]n BY columns]
 [LIMIT [n, ]m] [WITH TIES]
+[SETTINGS ...]
 [UNION  ...]
 [INTO OUTFILE filename]
 [FORMAT format]
@ -265,5 +266,17 @@ SELECT * REPLACE(i + 1 AS i) EXCEPT (j) APPLY(sum) from columns_transformers;
 └─────────────────┴────────┘
 ```

+## SETTINGS in SELECT Query {#settings-in-select}
+
+You can specify the necessary settings right in the `SELECT` query. The setting value is applied only to this query and is reset to default or previous value after the query is executed. 
+
+Other ways to make settings see [here](../../../operations/settings/index.md). 
+
+**Example**
+
+``` sql
+SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
+```
+
 [Original article](https://clickhouse.tech/docs/en/sql-reference/statements/select/)
 <!--hide-->
--- a/docs/en/sql-reference/table-functions/cluster.md
+++ b/docs/en/sql-reference/table-functions/cluster.md
@ -14,14 +14,16 @@ Allows to access all shards in an existing cluster which configured in `remote_s
 Signatures:

 ``` sql
-cluster('cluster_name', db.table)
-cluster('cluster_name', db, table)
-clusterAllReplicas('cluster_name', db.table)
-clusterAllReplicas('cluster_name', db, table)
+cluster('cluster_name', db.table[, sharding_key])
+cluster('cluster_name', db, table[, sharding_key])
+clusterAllReplicas('cluster_name', db.table[, sharding_key])
+clusterAllReplicas('cluster_name', db, table[, sharding_key])
 ```

 `cluster_name` – Name of a cluster that is used to build a set of addresses and connection parameters to remote and local servers.

+`sharding_key` - When insert into cluster function with more than one shard, sharding_key need to be provided.
+
 Using the `cluster` and `clusterAllReplicas` table functions are less efficient than creating a `Distributed` table because in this case, the server connection is re-established for every request. When processing a large number of queries, please always create the `Distributed` table ahead of time, and don’t use the `cluster` and `clusterAllReplicas` table functions.

 The `cluster` and `clusterAllReplicas` table functions can be useful in the following cases:
--- a/docs/en/sql-reference/table-functions/remote.md
+++ b/docs/en/sql-reference/table-functions/remote.md
@ -10,13 +10,14 @@ Allows you to access remote servers without creating a `Distributed` table.
 Signatures:

 ``` sql
-remote('addresses_expr', db, table[, 'user'[, 'password']])
-remote('addresses_expr', db.table[, 'user'[, 'password']])
-remoteSecure('addresses_expr', db, table[, 'user'[, 'password']])
-remoteSecure('addresses_expr', db.table[, 'user'[, 'password']])
+remote('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
+remote('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
+remoteSecure('addresses_expr', db, table[, 'user'[, 'password'], sharding_key])
+remoteSecure('addresses_expr', db.table[, 'user'[, 'password'], sharding_key])
 ```

 `addresses_expr` – An expression that generates addresses of remote servers. This may be just one server address. The server address is `host:port`, or just `host`. The host can be specified as the server name, or as the IPv4 or IPv6 address. An IPv6 address is specified in square brackets. The port is the TCP port on the remote server. If the port is omitted, it uses `tcp_port` from the server’s config file (by default, 9000).
+`sharding_key` - We can specify sharding key to support distributing data across nodes. For example: `insert into remote('127.0.0.1:9000,127.0.0.2', db, table, 'default', rand())`.

 !!! important "Important"
    The port is required for an IPv6 address.
--- a/docs/en/whats-new/changelog/2020.md
+++ b/docs/en/whats-new/changelog/2020.md
--- a/docs/en/whats-new/changelog/index.md
+++ b/docs/en/whats-new/changelog/index.md
@ -1,7 +1,7 @@
 ---
 toc_folder_title: Changelog
 toc_priority: 74
-toc_title: '2020'
+toc_title: '2021'
 ---

 {% include "content/changelog.md" %}
--- a/docs/ru/operations/settings/index.md
+++ b/docs/ru/operations/settings/index.md
@ -24,6 +24,7 @@ toc_title: Introduction

    -   При запуске консольного клиента ClickHouse в не интерактивном режиме установите параметр запуска `--setting=value`.
    -   При использовании HTTP API передавайте cgi-параметры (`URL?setting_1=value&setting_2=value...`).
+    -  Укажите необходимые настройки в секции [SETTINGS](../../sql-reference/statements/select/index.md#settings-in-select) запроса SELECT. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. 

 Настройки, которые можно задать только в конфигурационном файле сервера, в разделе не рассматриваются.

--- a/docs/ru/sql-reference/data-types/float.md
+++ b/docs/ru/sql-reference/data-types/float.md
@ -9,8 +9,15 @@ toc_title: Float32, Float64

 Типы эквивалентны типам языка С:

-   `Float32` - `float`;
-   `Float64` - `double`.
+-   `Float32` — `float`.
+-   `Float64` — `double`.
+
+Синонимы:
+
+-   `Float32` — `FLOAT`.
+-   `Float64` — `DOUBLE`.
+
+При создании таблиц для чисел с плавающей запятой можно указывать числовые параметры (например, `FLOAT(12)`, `FLOAT(15, 22)`, `DOUBLE(12)`, `DOUBLE(4, 18)`), но ClickHouse их проигнорирует.

 Рекомендуется хранить данные в целочисленном виде всегда, когда это возможно. Например, переводите в целочисленные значения числа с фиксированной точностью, такие как денежные суммы или времена загрузки страниц в миллисекундах.

@ -38,7 +45,7 @@ SELECT 1 - 0.9

 В отличие от стандартного SQL, ClickHouse поддерживает следующие категории чисел с плавающей запятой:

-   `Inf` - бесконечность.
+-   `Inf` — бесконечность.

 <!-- -->

@ -52,7 +59,7 @@ SELECT 0.5 / 0
 └────────────────┘
 ```

-   `-Inf` - отрицательная бесконечность;
+-   `-Inf` — отрицательная бесконечность.

 <!-- -->

@ -66,7 +73,7 @@ SELECT -0.5 / 0
 └─────────────────┘
 ```

-   `NaN` - не число.
+-   `NaN` — не число.

 <!-- -->

@ -80,6 +87,6 @@ SELECT 0 / 0
 └──────────────┘
 ```

-    Смотрите правила сортировки `NaN` в разделе [Секция ORDER BY](../sql_reference/data_types/float.md).
+Смотрите правила сортировки `NaN` в разделе [Секция ORDER BY ](../../sql-reference/statements/select/order-by.md).

 [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/float/) <!--hide-->
--- a/docs/ru/sql-reference/data-types/int-uint.md
+++ b/docs/ru/sql-reference/data-types/int-uint.md
@ -7,23 +7,32 @@ toc_title: UInt8, UInt16, UInt32, UInt64, Int8, Int16, Int32, Int64

 Целые числа фиксированной длины, без знака или со знаком.

+При создании таблиц для целых чисел можно указывать числовые параметры (например `TINYINT(8)`, `SMALLINT(16)`, `INT(32)`, `BIGINT(64)`), но ClickHouse их проигнорирует.
+
 ## Диапазоны Int {#int-ranges}

-   Int8 - \[-128 : 127\]
-   Int16 - \[-32768 : 32767\]
-   Int32 - \[-2147483648 : 2147483647\]
-   Int64 - \[-9223372036854775808 : 9223372036854775807\]
-   Int128 - \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
-   Int256 - \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
+-   `Int8` — \[-128 : 127\]
+-   `Int16` — \[-32768 : 32767\]
+-   `Int32` — \[-2147483648 : 2147483647\]
+-   `Int64` — \[-9223372036854775808 : 9223372036854775807\]
+-   `Int128` — \[-170141183460469231731687303715884105728 : 170141183460469231731687303715884105727\]
+-   `Int256` — \[-57896044618658097711785492504343953926634992332820282019728792003956564819968 : 57896044618658097711785492504343953926634992332820282019728792003956564819967\]
+
+Синонимы:
+
+-   `Int8` — `TINYINT`, `BOOL`, `BOOLEAN`, `INT1`.
+-   `Int16` — `SMALLINT`, `INT2`.
+-   `Int32` — `INT`, `INT4`, `INTEGER`.
+-   `Int64` — `BIGINT`.

 ## Диапазоны Uint {#uint-ranges}

-   UInt8 - \[0 : 255\]
-   UInt16 - \[0 : 65535\]
-   UInt32 - \[0 : 4294967295\]
-   UInt64 - \[0 : 18446744073709551615\]
-   UInt256 - \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]
+-   `UInt8` — \[0 : 255\]
+-   `UInt16` — \[0 : 65535\]
+-   `UInt32` — \[0 : 4294967295\]
+-   `UInt64` — \[0 : 18446744073709551615\]
+-   `UInt256` — \[0 : 115792089237316195423570985008687907853269984665640564039457584007913129639935\]

-UInt128 пока не реализован.
+`UInt128` пока не реализован.

 [Оригинальная статья](https://clickhouse.tech/docs/ru/data_types/int_uint/) <!--hide-->
--- a/docs/ru/sql-reference/data-types/string.md
+++ b/docs/ru/sql-reference/data-types/string.md
@ -8,6 +8,8 @@ toc_title: String
 Строки произвольной длины. Длина не ограничена. Значение может содержать произвольный набор байт, включая нулевые байты.
 Таким образом, тип String заменяет типы VARCHAR, BLOB, CLOB и т. п. из других СУБД.

+При создании таблиц для строк можно указывать числовые параметры (например `VARCHAR(255)`), но СlickHouse их проигнорирует.
+
 ## Кодировки {#kodirovki}

 В ClickHouse нет понятия кодировок. Строки могут содержать произвольный набор байт, который хранится и выводится, как есть.
--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@ -90,8 +90,10 @@ SOURCE(FILE(path '/opt/dictionaries/os.tsv' format 'TabSeparated'))

 Поля настройки:

-   `path` — Абсолютный путь к файлу.
-   `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+-   `path` — абсолютный путь к файлу.
+-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+
+Если словарь с источником `FILE` создается с помощью DDL-команды (`CREATE DICTIONARY ...`), источник словаря должен быть расположен в каталоге `user_files`. Иначе пользователи базы данных будут иметь доступ к произвольному файлу на узле ClickHouse.

 ## Исполняемый файл {#dicts-external_dicts_dict_sources-executable}

@ -108,16 +110,12 @@ SOURCE(FILE(path '/opt/dictionaries/os.tsv' format 'TabSeparated'))
 </source>
 ```

-или
-
-``` sql
-SOURCE(EXECUTABLE(command 'cat /opt/dictionaries/os.tsv' format 'TabSeparated'))
-```
-
 Поля настройки:

-   `command` — Абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`).
-   `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+-   `command` — абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`).
+-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+
+Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL отключено. Иначе пользователь базы данных сможет выполнить произвольный бинарный файл на узле ClickHouse.

 ## HTTP(s) {#dicts-external_dicts_dict_sources-http}

@ -160,7 +158,16 @@ SOURCE(HTTP(
 Поля настройки:

 -   `url` — URL источника.
-   `format` — Формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+-   `credentials` – базовая HTTP-аутентификация. Необязательный параметр.
+-   `user` – имя пользователя, необходимое для аутентификации.
+-   `password` – пароль, необходимый для аутентификации.
+-   `headers` – все пользовательские записи HTTP-заголовков, используемые для HTTP-запроса. Необязательный параметр.
+-   `header` – одна запись HTTP-заголовка.
+-   `name` – идентифицирующее имя, используемое для отправки заголовка запроса.
+-   `value` – значение, заданное для конкретного идентифицирующего имени.
+
+При создании словаря с помощью DDL-команды (`CREATE DICTIONARY ...`) удаленные хосты для HTTP-словарей проверяются в разделе `remote_url_allow_hosts` из конфигурации сервера. Иначе пользователи базы данных будут иметь доступ к произвольному HTTP-серверу.

 ## ODBC {#dicts-external_dicts_dict_sources-odbc}

--- a/docs/ru/sql-reference/functions/string-search-functions.md
+++ b/docs/ru/sql-reference/functions/string-search-functions.md
@ -573,4 +573,190 @@ SELECT countMatches('aaaa', 'aa');
 └───────────────────────────────┘
 ```

+## countSubstrings {#countSubstrings}
+
+Возвращает количество вхождений подстроки.
+
+Для поиска без учета регистра, используйте функции [countSubstringsCaseInsensitive](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitive) или [countSubstringsCaseInsensitiveUTF8](../../sql-reference/functions/string-search-functions.md#countSubstringsCaseInsensitiveUTF8)
+
+**Синтаксис**
+
+``` sql
+countSubstrings(haystack, needle[, start_pos])
+```
+
+**Параметры**
+
+-   `haystack` — строка, в которой ведется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `needle` — искомая подстрока. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `start_pos` – позиция первого символа в строке, с которого начнется поиск. Необязательный параметр. [UInt](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   Число вхождений.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT countSubstrings('foobar.com', '.');
+```
+
+Результат:
+
+``` text
+┌─countSubstrings('foobar.com', '.')─┐
+│                                  1 │
+└────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT countSubstrings('aaaa', 'aa');
+```
+
+Результат:
+
+``` text
+┌─countSubstrings('aaaa', 'aa')─┐
+│                             2 │
+└───────────────────────────────┘
+```
+
+Запрос:
+
+```sql
+SELECT countSubstrings('abc___abc', 'abc', 4);
+```
+
+Результат:
+
+``` text
+┌─countSubstrings('abc___abc', 'abc', 4)─┐
+│                                      1 │
+└────────────────────────────────────────┘
+```
+
+## countSubstringsCaseInsensitive {#countSubstringsCaseInsensitive}
+
+Возвращает количество вхождений подстроки без учета регистра.
+
+**Синтаксис**
+
+``` sql
+countSubstringsCaseInsensitive(haystack, needle[, start_pos])
+```
+
+**Параметры**
+
+-   `haystack` — строка, в которой ведется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `needle` — искомая подстрока. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `start_pos` – позиция первого символа в строке, с которого начнется поиск. Необязательный параметр. [UInt](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   Число вхождений.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+select countSubstringsCaseInsensitive('aba', 'B');
+```
+
+Результат:
+
+``` text
+┌─countSubstringsCaseInsensitive('aba', 'B')─┐
+│                                          1 │
+└────────────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT countSubstringsCaseInsensitive('foobar.com', 'CoM');
+```
+
+Результат:
+
+``` text
+┌─countSubstringsCaseInsensitive('foobar.com', 'CoM')─┐
+│                                                   1 │
+└─────────────────────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SELECT countSubstringsCaseInsensitive('abC___abC', 'aBc', 2);
+```
+
+Результат:
+
+``` text
+┌─countSubstringsCaseInsensitive('abC___abC', 'aBc', 2)─┐
+│                                                     1 │
+└───────────────────────────────────────────────────────┘
+```
+
+## countSubstringsCaseInsensitiveUTF8 {#countSubstringsCaseInsensitiveUTF8}
+
+Возвращает количество вхождений подстроки в `UTF-8` без учета регистра.
+
+**Синтаксис**
+
+``` sql
+SELECT countSubstringsCaseInsensitiveUTF8(haystack, needle[, start_pos])
+```
+
+**Параметры**
+
+-   `haystack` — строка, в которой ведется поиск. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `needle` — искомая подстрока. [String](../../sql-reference/syntax.md#syntax-string-literal).
+-   `start_pos` – позиция первого символа в строке, с которого начнется поиск. Необязательный параметр. [UInt](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-   Число вхождений.
+
+Тип: [UInt64](../../sql-reference/data-types/int-uint.md).
+
+**Примеры**
+
+Запрос:
+
+``` sql
+SELECT countSubstringsCaseInsensitiveUTF8('абв', 'A');
+```
+
+Результат:
+
+``` text
+┌─countSubstringsCaseInsensitiveUTF8('абв', 'A')─┐
+│                                              1 │
+└────────────────────────────────────────────────┘
+```
+
+Запрос:
+
+```sql
+SELECT countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв');
+```
+
+Результат:
+
+``` text
+┌─countSubstringsCaseInsensitiveUTF8('аБв__АбВ__абв', 'Абв')─┐
+│                                                          3 │
+└────────────────────────────────────────────────────────────┘
+```
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/functions/string_search_functions/) <!--hide-->
--- a/docs/ru/sql-reference/statements/alter/partition.md
+++ b/docs/ru/sql-reference/statements/alter/partition.md
@ -288,7 +288,7 @@ ALTER TABLE mt DELETE IN PARTITION 2 WHERE p = 2;
 Чтобы задать нужную партицию в запросах `ALTER ... PARTITION`, можно использовать:

 -   Имя партиции. Посмотреть имя партиции можно в столбце `partition` системной таблицы [system.parts](../../../operations/system-tables/parts.md#system_tables-parts). Например, `ALTER TABLE visits DETACH PARTITION 201901`.
-   Произвольное выражение из столбцов исходной таблицы. Также поддерживаются константы и константные выражения. Например, `ALTER TABLE visits DETACH PARTITION toYYYYMM(toDate('2019-01-25'))`.
+-   Кортеж из выражений или констант, совпадающий (в типах) с кортежем партиционирования. В случае ключа партиционирования из одного элемента, выражение следует обернуть в функцию `tuple(...)`. Например, `ALTER TABLE visits DETACH PARTITION tuple(toYYYYMM(toDate('2019-01-25')))`.
 -   Строковый идентификатор партиции. Идентификатор партиции используется для именования кусков партиции на файловой системе и в ZooKeeper. В запросах `ALTER` идентификатор партиции нужно указывать в секции `PARTITION ID`, в одинарных кавычках. Например, `ALTER TABLE visits DETACH PARTITION ID '201901'`.
 -   Для запросов [ATTACH PART](#alter_attach-partition) и [DROP DETACHED PART](#alter_drop-detached): чтобы задать имя куска партиции, используйте строковой литерал со значением из столбца `name` системной таблицы [system.detached_parts](../../../operations/system-tables/detached_parts.md#system_tables-detached_parts). Например, `ALTER TABLE visits ATTACH PART '201901_1_1_0'`.

@ -306,4 +306,4 @@ OPTIMIZE TABLE table_not_partitioned PARTITION tuple() FINAL;

 Примеры запросов `ALTER ... PARTITION` можно посмотреть в тестах: [`00502_custom_partitioning_local`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_local.sql) и [`00502_custom_partitioning_replicated_zookeeper`](https://github.com/ClickHouse/ClickHouse/blob/master/tests/queries/0_stateless/00502_custom_partitioning_replicated_zookeeper.sql).

-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/partition/) <!--hide-->
--- a/docs/ru/sql-reference/statements/select/index.md
+++ b/docs/ru/sql-reference/statements/select/index.md
@ -23,6 +23,7 @@ SELECT [DISTINCT] expr_list
 [ORDER BY expr_list] [WITH FILL] [FROM expr] [TO expr] [STEP expr] 
 [LIMIT [offset_value, ]n BY columns]
 [LIMIT [n, ]m] [WITH TIES]
+[SETTINGS ...]
 [UNION ALL ...]
 [INTO OUTFILE filename]
 [FORMAT format]
@ -161,4 +162,17 @@ Code: 42. DB::Exception: Received from localhost:9000. DB::Exception: Number of

 Подробнее смотрите в разделе «Настройки». Присутствует возможность использовать внешнюю сортировку (с сохранением временных данных на диск) и внешнюю агрегацию.

-{## [Оригинальная статья](https://clickhouse.tech/docs/en/sql-reference/statements/select/) ##}
+## SETTINGS в запросе SELECT {#settings-in-select}
+
+Вы можете задать значения необходимых настроек непосредственно в запросе `SELECT` в секции `SETTINGS`. Эти настройки действуют только в рамках данного запроса, а после его выполнения сбрасываются до предыдущего значения или значения по умолчанию. 
+
+Другие способы задания настроек описаны [здесь](../../../operations/settings/index.md). 
+
+**Пример**
+
+``` sql
+SELECT * FROM some_table SETTINGS optimize_read_in_order=1, cast_keep_nullable=1;
+```
+
+[Оригинальная статья](https://clickhouse.tech/docs/ru/sql-reference/statements/select/)
+<!--hide-->
--- a/docs/zh/commercial/cloud.md
+++ b/docs/zh/commercial/cloud.md
@ -1,23 +1,41 @@
 ---
-machine_translated: true
-machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
 toc_priority: 1
-toc_title: "\u4E91"
+toc_title: 云 
 ---

-# ﾂ环板Providersｮﾂ嘉ｯ {#clickhouse-cloud-service-providers}
+# ClickHouse 云服务提供商 {#clickhouse-cloud-service-providers}

-!!! info "信息"
-    如果您已经启动了带有托管ClickHouse服务的公共云，请随时 [打开拉取请求](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/cloud.md) 将其添加到以下列表。
+!!! info "注意"
+    如果您已经推出具有托管 ClickHouse 服务的公共云，请随时[提交一个 pull request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/cloud.md) 将其添加到以下列表。

-## Yandex云 {#yandex-cloud}
+## Yandex 云 {#yandex-cloud}

-[Yandex的ClickHouse托管服务](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) 提供以下主要功能:
+[Yandex的 ClickHouse 托管服务](https://cloud.yandex.com/services/managed-clickhouse?utm_source=referrals&utm_medium=clickhouseofficialsite&utm_campaign=link3) 提供以下主要功能:

-   全面管理的动物园管理员服务 [ClickHouse复制](../engines/table-engines/mergetree-family/replication.md)
+-   用于 ClickHouse 复制的完全托管的 ZooKeeper 服务 [ClickHouse复制](../engines/table-engines/mergetree-family/replication.md)
 -   多种存储类型选择
-   不同可用区中的副本
-   加密和隔离
+-   不同可用区副本
+-   加密与隔离
 -   自动化维护

+## Altinity.Cloud {#altinity.cloud}
+
+[Altinity.Cloud](https://altinity.com/cloud-database/) 是针对 Amazon 公共云的完全托管的 ClickHouse-as-a-Service
+
+-   在 Amazon 资源上快速部署 ClickHouse 集群 
+-   轻松进行横向扩展/纵向扩展以及节点的垂直扩展
+-   具有公共端点或VPC对等的租户隔离
+-   可配置存储类型以及卷配置
+-   跨可用区扩展以实现性能和高可用性
+-   内置监控和SQL查询编辑器
+
+## 腾讯云 {#tencent-cloud}
+
+[腾讯云的 ClickHouse 托管服务](https://cloud.tencent.com/product/cdwch)提供以下主要功能：
+
+-   易于部署和管理, 集成监控与警报服务
+-   高可用高扩展
+-   通过集群级别的 VPC 保证安全可靠
+-   按需定价，无需前期成本或长期承诺
+
 {## [原始文章](https://clickhouse.tech/docs/en/commercial/cloud/) ##}
--- a/docs/zh/commercial/index.md
+++ b/docs/zh/commercial/index.md
@ -1,9 +1,17 @@
 ---
-machine_translated: true
-machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
-toc_folder_title: "\u5546\u4E1A"
+toc_folder_title: 商业支持 
 toc_priority: 70
-toc_title: "\u5546\u4E1A"
+toc_title: 简介
 ---

+# ClickHouse 商业服务 {#clickhouse-commercial-services}

+本节是专门从事 ClickHouse 的服务提供商的目录，它们是一些独立的公司，不一定与 Yandex 有关系。
+
+服务类别：
+
+-	[云](../commercial/cloud.md)
+-	[支持](../commercial/support.md)
+
+!!! note "对于服务提供商"
+如果您碰巧是其中之一，可以随时提交一个 pull request，将您的公司添加到对应的章节（如果服务不属于现有的任何目录，也可以添加新的章节）。提交关于文档的 pull request 最简单的方式是点击右上角的“铅笔”编辑按钮。如果您的服务在某些本地市场上有售，请确保在本地化的文档界面中也提及它（或至少在 pull request 请求描述中指出）。
--- a/docs/zh/commercial/support.md
+++ b/docs/zh/commercial/support.md
@ -1,18 +1,16 @@
 ---
-machine_translated: true
-machine_translated_rev: 5decc73b5dc60054f19087d3690c4eb99446a6c3
 toc_priority: 3
-toc_title: "\u5546\u4e1a\u652f\u6301"
+toc_title: 支持
 ---

-# ClickHouse商业支持服务提供商 {#clickhouse-commercial-support-service-providers}
+# ClickHouse 商业支持服务提供商 {#clickhouse-commercial-support-service-providers}

-!!! info "信息"
-    如果您已经推出ClickHouse商业支持服务，请随时 [提交一个 pull-request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。
+!!! info "注意"
+    如果您已经推出 ClickHouse 商业支持服务，请随时[提交一个 pull request](https://github.com/ClickHouse/ClickHouse/edit/master/docs/en/commercial/support.md) 将其添加到以下列表。

 ## Altinity {#altinity}

- Altinity 自从 2017 年开始已经为企业提供 ClickHouse 支持服务。Altinity 的客户范围包含百强企业到初创企业。访问 [www.altinity.com](https://www.altinity.com/) 了解更多信息。
+ Altinity 自从 2017 年开始为企业提供 ClickHouse 支持服务。Altinity 的客户范围包含百强企业到初创企业等。访问 [www.altinity.com](https://www.altinity.com/) 了解更多信息。

 ## Mafiree {#mafiree}

--- a/docs/zh/sql-reference/functions/date-time-functions.md
+++ b/docs/zh/sql-reference/functions/date-time-functions.md
@ -613,24 +613,25 @@ formatDateTime(Time, Format\[, Timezone\])
 | %C     | 年除以100并截断为整数(00-99)                                                                                                                                                                        | 20         |
 | %d     | 月中的一天，零填充（01-31)                                                                                                                                                                          | 02         |
 | %D     | 短MM/DD/YY日期，相当于%m/%d/%y                                                                                                                                                                      | 01/02/2018 |
-| %e     | 月中的一天，空格填充（1-31)                                                                                                                                                                         | 2          |
+| %e     | 月中的一天，空格填充（ 1-31)                                                                                                                                                                         |  2          |
 | %F     | 短YYYY-MM-DD日期，相当于%Y-%m-%d                                                                                                                                                                    | 2018-01-02 |
 | %G     | ISO周号的四位数年份格式， 从基于周的年份[由ISO 8601定义](https://en.wikipedia.org/wiki/ISO_8601#Week_dates) 标准计算得出，通常仅对％V有用 | 2018       |
 | %g     | 两位数的年份格式，与ISO 8601一致，四位数表示法的缩写                                                                                                                    | 18         |
 | %H     | 24小时格式（00-23)                                                                                                                                                                                  | 22         |
-| %I     | 小时12h格式（01-12)                                                                                                                                                                                 | 10         |
-| %j     | 一年(001-366)                                                                                                                                                                                       | 002        |
+| %I     | 12小时格式（01-12)                                                                                                                                                                                 | 10         |
+| %j     | 一年中的一天 (001-366)                                                                                                                                                                                       | 002        |
 | %m     | 月份为十进制数（01-12)                                                                                                                                                                              | 01         |
 | %M     | 分钟(00-59)                                                                                                                                                                                         | 33         |
 | %n     | 换行符(")                                                                                                                                                                                           |            |
 | %p     | AM或PM指定                                                                                                                                                                                          | PM         |
+| %Q     | 季度（1-4)                                                                                                                                                                          | 1          |
 | %R     | 24小时HH:MM时间，相当于%H:%M                                                                                                                                                                        | 22:33      |
-| %S     | 第二(00-59)                                                                                                                                                                                         | 44         |
+| %S     | 秒 (00-59)                                                                                                                                                                                         | 44         |
 | %t     | 水平制表符(’)                                                                                                                                                                                       |            |
 | %T     | ISO8601时间格式(HH:MM:SS)，相当于%H:%M:%S                                                                                                                                                           | 22:33:44   |
-| %u     | ISO8601平日as编号，星期一为1(1-7)                                                                                                                                                                   | 2          |
+| %u     | ISO8601工作日为数字，星期一为1(1-7)                                                                                                                                                                   | 2          |
 | %V     | ISO8601周编号(01-53)                                                                                                                                                                                | 01         |
-| %w     | 周日为十进制数，周日为0(0-6)                                                                                                                                                                        | 2          |
+| %w     | 工作日为十进制数，周日为0(0-6)                                                                                                                                                                        | 2          |
 | %y     | 年份，最后两位数字（00-99)                                                                                                                                                                          | 18         |
 | %Y     | 年                                                                                                                                                                                                  | 2018       |
 | %%     | %符号                                                                                                                                                                                               | %          |
--- a/docs/zh/sql-reference/statements/insert-into.md
+++ b/docs/zh/sql-reference/statements/insert-into.md
@ -1,6 +1,6 @@
-## INSERT {#insert}
+## INSERT INTO 语句 {#insert}

-INSERT查询主要用于向系统中添加数据.
+INSERT INTO 语句主要用于向系统中添加数据.

 查询的基本格式:

@ -8,7 +8,52 @@ INSERT查询主要用于向系统中添加数据.
 INSERT INTO [db.]table [(c1, c2, c3)] VALUES (v11, v12, v13), (v21, v22, v23), ...
 ```

-您可以在查询中指定插入的列的列表，如：`[(c1, c2, c3)]`。对于存在于表结构中但不存在于插入列表中的列，它们将会按照如下方式填充数据：
+您可以在查询中指定要插入的列的列表，如：`[(c1, c2, c3)]`。您还可以使用列[匹配器](../../sql-reference/statements/select/index.md#asterisk)的表达式，例如`*`和/或[修饰符](../../sql-reference/statements/select/index.md#select-modifiers)，例如 [APPLY](../../sql-reference/statements/select/index.md#apply-modifier)， [EXCEPT](../../sql-reference/statements/select/index.md#apply-modifier)， [REPLACE](../../sql-reference/statements/select/index.md#replace-modifier)。
+
+例如，考虑该表:
+
+``` sql
+SHOW CREATE insert_select_testtable;
+```
+
+```text
+CREATE TABLE insert_select_testtable
+(
+    `a` Int8,
+    `b` String,
+    `c` Int8
+)
+ENGINE = MergeTree()
+ORDER BY a
+SETTINGS index_granularity = 8192 
+```
+
+``` sql
+INSERT INTO insert_select_testtable (*) VALUES (1, 'a', 1) ;
+```
+
+如果要在除了'b'列以外的所有列中插入数据，您需要传递和括号中选择的列数一样多的值:
+
+``` sql
+INSERT INTO insert_select_testtable (* EXCEPT(b)) Values (2, 2);
+```
+
+``` sql
+SELECT * FROM insert_select_testtable;
+```
+
+```
+┌─a─┬─b─┬─c─┐
+│ 2 │   │ 2 │
+└───┴───┴───┘
+┌─a─┬─b─┬─c─┐
+│ 1 │ a │ 1 │
+└───┴───┴───┘
+```
+
+在这个示例中，我们看到插入的第二行的`a`和`c`列的值由传递的值填充，而`b`列由默认值填充。
+
+对于存在于表结构中但不存在于插入列表中的列，它们将会按照如下方式填充数据：

 -   如果存在`DEFAULT`表达式，根据`DEFAULT`表达式计算被填充的值。
 -   如果没有定义`DEFAULT`表达式，则填充零或空字符串。
--- a/docs/zh/sql-reference/statements/select/all.md
+++ b/docs/zh/sql-reference/statements/select/all.md
@ -0,0 +1,17 @@
+# ALL 子句 {#select-all}
+
+`SELECT ALL` 和 `SELECT` 不带 `DISTINCT` 是一样的。
+
+- 如果指定了 `ALL` ，则忽略它。
+- 如果同时指定了 `ALL` 和 `DISTINCT` ，则会抛出异常。
+
+`ALL` 也可以在聚合函数中指定，具有相同的效果（空操作）。例如：
+
+```sql
+SELECT sum(ALL number) FROM numbers(10);
+```
+等于
+
+```sql
+SELECT sum(number) FROM numbers(10);
+```
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@ -692,6 +692,37 @@ int Server::main(const std::vector<std::string> & /*args*/)
        {
            Settings::checkNoSettingNamesAtTopLevel(*config, config_path);

+            /// Limit on total memory usage
+            size_t max_server_memory_usage = config->getUInt64("max_server_memory_usage", 0);
+
+            double max_server_memory_usage_to_ram_ratio = config->getDouble("max_server_memory_usage_to_ram_ratio", 0.9);
+            size_t default_max_server_memory_usage = memory_amount * max_server_memory_usage_to_ram_ratio;
+
+            if (max_server_memory_usage == 0)
+            {
+                max_server_memory_usage = default_max_server_memory_usage;
+                LOG_INFO(log, "Setting max_server_memory_usage was set to {}"
+                    " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)",
+                    formatReadableSizeWithBinarySuffix(max_server_memory_usage),
+                    formatReadableSizeWithBinarySuffix(memory_amount),
+                    max_server_memory_usage_to_ram_ratio);
+            }
+            else if (max_server_memory_usage > default_max_server_memory_usage)
+            {
+                max_server_memory_usage = default_max_server_memory_usage;
+                LOG_INFO(log, "Setting max_server_memory_usage was lowered to {}"
+                    " because the system has low amount of memory. The amount was"
+                    " calculated as {} available"
+                    " * {:.2f} max_server_memory_usage_to_ram_ratio",
+                    formatReadableSizeWithBinarySuffix(max_server_memory_usage),
+                    formatReadableSizeWithBinarySuffix(memory_amount),
+                    max_server_memory_usage_to_ram_ratio);
+            }
+
+            total_memory_tracker.setHardLimit(max_server_memory_usage);
+            total_memory_tracker.setDescription("(total)");
+            total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
+
            // FIXME logging-related things need synchronization -- see the 'Logger * log' saved
            // in a lot of places. For now, disable updating log configuration without server restart.
            //setTextLog(global_context->getTextLog());
@ -780,37 +811,6 @@ int Server::main(const std::vector<std::string> & /*args*/)
    global_context->getMergeTreeSettings().sanityCheck(settings);
    global_context->getReplicatedMergeTreeSettings().sanityCheck(settings);

-    /// Limit on total memory usage
-    size_t max_server_memory_usage = config().getUInt64("max_server_memory_usage", 0);
-
-    double max_server_memory_usage_to_ram_ratio = config().getDouble("max_server_memory_usage_to_ram_ratio", 0.9);
-    size_t default_max_server_memory_usage = memory_amount * max_server_memory_usage_to_ram_ratio;
-
-    if (max_server_memory_usage == 0)
-    {
-        max_server_memory_usage = default_max_server_memory_usage;
-        LOG_INFO(log, "Setting max_server_memory_usage was set to {}"
-            " ({} available * {:.2f} max_server_memory_usage_to_ram_ratio)",
-            formatReadableSizeWithBinarySuffix(max_server_memory_usage),
-            formatReadableSizeWithBinarySuffix(memory_amount),
-            max_server_memory_usage_to_ram_ratio);
-    }
-    else if (max_server_memory_usage > default_max_server_memory_usage)
-    {
-        max_server_memory_usage = default_max_server_memory_usage;
-        LOG_INFO(log, "Setting max_server_memory_usage was lowered to {}"
-            " because the system has low amount of memory. The amount was"
-            " calculated as {} available"
-            " * {:.2f} max_server_memory_usage_to_ram_ratio",
-            formatReadableSizeWithBinarySuffix(max_server_memory_usage),
-            formatReadableSizeWithBinarySuffix(memory_amount),
-            max_server_memory_usage_to_ram_ratio);
-    }
-
-    total_memory_tracker.setOrRaiseHardLimit(max_server_memory_usage);
-    total_memory_tracker.setDescription("(total)");
-    total_memory_tracker.setMetric(CurrentMetrics::MemoryTracking);
-
    Poco::Timespan keep_alive_timeout(config().getUInt("keep_alive_timeout", 10), 0);

    Poco::ThreadPool server_pool(3, config().getUInt("max_connections", 1024));
--- a/src/Access/AccessType.h
+++ b/src/Access/AccessType.h
@ -159,6 +159,7 @@ enum class AccessType
    M(REMOTE, "", GLOBAL, SOURCES) \
    M(MONGO, "", GLOBAL, SOURCES) \
    M(MYSQL, "", GLOBAL, SOURCES) \
+    M(POSTGRES, "", GLOBAL, SOURCES) \
    M(ODBC, "", GLOBAL, SOURCES) \
    M(JDBC, "", GLOBAL, SOURCES) \
    M(HDFS, "", GLOBAL, SOURCES) \
--- a/src/AggregateFunctions/AggregateFunctionSum.h
+++ b/src/AggregateFunctions/AggregateFunctionSum.h
@ -29,61 +29,74 @@ struct AggregateFunctionSumData
    template <typename Value>
    void NO_INLINE addMany(const Value * __restrict ptr, size_t count)
    {
-        /// Compiler cannot unroll this loop, do it manually.
-        /// (at least for floats, most likely due to the lack of -fassociative-math)
-
-        /// Something around the number of SSE registers * the number of elements fit in register.
-        constexpr size_t unroll_count = 128 / sizeof(T);
-        T partial_sums[unroll_count]{};
-
        const auto * end = ptr + count;
-        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);

-        while (ptr < unrolled_end)
+        if constexpr (std::is_floating_point_v<T>)
        {
+            /// Compiler cannot unroll this loop, do it manually.
+            /// (at least for floats, most likely due to the lack of -fassociative-math)
+
+            /// Something around the number of SSE registers * the number of elements fit in register.
+            constexpr size_t unroll_count = 128 / sizeof(T);
+            T partial_sums[unroll_count]{};
+
+            const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+            while (ptr < unrolled_end)
+            {
+                for (size_t i = 0; i < unroll_count; ++i)
+                    partial_sums[i] += ptr[i];
+                ptr += unroll_count;
+            }
+
            for (size_t i = 0; i < unroll_count; ++i)
-                partial_sums[i] += ptr[i];
-            ptr += unroll_count;
+                sum += partial_sums[i];
        }

-        for (size_t i = 0; i < unroll_count; ++i)
-            sum += partial_sums[i];
-
+        /// clang cannot vectorize the loop if accumulator is class member instead of local variable.
+        T local_sum{};
        while (ptr < end)
        {
-            sum += *ptr;
+            local_sum += *ptr;
            ++ptr;
        }
+        sum += local_sum;
    }

    template <typename Value>
    void NO_INLINE addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
    {
-        constexpr size_t unroll_count = 128 / sizeof(T);
-        T partial_sums[unroll_count]{};
-
        const auto * end = ptr + count;
-        const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);

-        while (ptr < unrolled_end)
+        if constexpr (std::is_floating_point_v<T>)
        {
+            constexpr size_t unroll_count = 128 / sizeof(T);
+            T partial_sums[unroll_count]{};
+
+            const auto * unrolled_end = ptr + (count / unroll_count * unroll_count);
+
+            while (ptr < unrolled_end)
+            {
+                for (size_t i = 0; i < unroll_count; ++i)
+                    if (!null_map[i])
+                        partial_sums[i] += ptr[i];
+                ptr += unroll_count;
+                null_map += unroll_count;
+            }
+
            for (size_t i = 0; i < unroll_count; ++i)
-                if (!null_map[i])
-                    partial_sums[i] += ptr[i];
-            ptr += unroll_count;
-            null_map += unroll_count;
+                sum += partial_sums[i];
        }

-        for (size_t i = 0; i < unroll_count; ++i)
-            sum += partial_sums[i];
-
+        T local_sum{};
        while (ptr < end)
        {
            if (!*null_map)
-                sum += *ptr;
+                local_sum += *ptr;
            ++ptr;
            ++null_map;
        }
+        sum += local_sum;
    }

    void merge(const AggregateFunctionSumData & rhs)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -79,6 +79,11 @@ if (USE_AMQPCPP)
    add_headers_and_sources(dbms Storages/RabbitMQ)
 endif()

+if (USE_LIBPQXX)
+    add_headers_and_sources(dbms Databases/PostgreSQL)
+    add_headers_and_sources(dbms Storages/PostgreSQL)
+endif()
+
 if (USE_ROCKSDB)
    add_headers_and_sources(dbms Storages/RocksDB)
 endif()
@ -439,6 +444,11 @@ if (USE_ROCKSDB)
    dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ROCKSDB_INCLUDE_DIR})
 endif()

+if (USE_LIBPQXX)
+    dbms_target_link_libraries(PUBLIC ${LIBPQXX_LIBRARY})
+    dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${LIBPQXX_INCLUDE_DIR})
+endif()
+
 dbms_target_link_libraries(PRIVATE _boost_context)

 if (ENABLE_TESTS AND USE_GTEST)
--- a/src/Client/MultiplexedConnections.h
+++ b/src/Client/MultiplexedConnections.h
@ -106,7 +106,7 @@ private:
    /// in separate thread.
    mutable std::mutex cancel_mutex;

-    friend class RemoteQueryExecutorReadContext;
+    friend struct RemoteQueryExecutorRoutine;
 };

 }
--- a/src/Columns/ColumnAggregateFunction.cpp
+++ b/src/Columns/ColumnAggregateFunction.cpp
@ -670,4 +670,32 @@ ColumnAggregateFunction::ColumnAggregateFunction(const ColumnAggregateFunction &
 {
 }

+MutableColumnPtr ColumnAggregateFunction::cloneResized(size_t size) const
+{
+    if (size == 0)
+        return cloneEmpty();
+
+    size_t from_size = data.size();
+
+    if (size <= from_size)
+    {
+        auto res = createView();
+        auto & res_data = res->data;
+        res_data.assign(data.begin(), data.begin() + size);
+        return res;
+    }
+    else
+    {
+        /// Create a new column to return.
+        MutableColumnPtr cloned_col = cloneEmpty();
+        auto * res = typeid_cast<ColumnAggregateFunction *>(cloned_col.get());
+
+        res->insertRangeFrom(*this, 0, from_size);
+        for (size_t i = from_size; i < size; ++i)
+            res->insertDefault();
+
+        return cloned_col;
+    }
+}
+
 }
--- a/src/Columns/ColumnAggregateFunction.h
+++ b/src/Columns/ColumnAggregateFunction.h
@ -215,7 +215,7 @@ public:
    void getExtremes(Field & min, Field & max) const override;

    bool structureEquals(const IColumn &) const override;
+
+    MutableColumnPtr cloneResized(size_t size) const override;
 };
-
-
 }
--- a/src/Columns/ColumnMap.h
+++ b/src/Columns/ColumnMap.h
@ -86,6 +86,9 @@ public:
    const ColumnArray & getNestedColumn() const { return assert_cast<const ColumnArray &>(*nested); }
    ColumnArray & getNestedColumn() { return assert_cast<ColumnArray &>(*nested); }

+    const ColumnPtr & getNestedColumnPtr() const { return nested; }
+    ColumnPtr & getNestedColumnPtr() { return nested; }
+
    const ColumnTuple & getNestedData() const { return assert_cast<const ColumnTuple &>(getNestedColumn().getData()); }
    ColumnTuple & getNestedData() { return assert_cast<ColumnTuple &>(getNestedColumn().getData()); }
 };
--- a/src/Columns/ColumnNullable.h
+++ b/src/Columns/ColumnNullable.h
@ -143,9 +143,11 @@ public:
    const IColumn & getNestedColumn() const { return *nested_column; }

    const ColumnPtr & getNestedColumnPtr() const { return nested_column; }
+    ColumnPtr & getNestedColumnPtr() { return nested_column; }

    /// Return the column that represents the byte map.
    const ColumnPtr & getNullMapColumnPtr() const { return null_map; }
+    ColumnPtr & getNullMapColumnPtr() { return null_map; }

    ColumnUInt8 & getNullMapColumn() { return assert_cast<ColumnUInt8 &>(*null_map); }
    const ColumnUInt8 & getNullMapColumn() const { return assert_cast<const ColumnUInt8 &>(*null_map); }
--- a/src/Columns/ColumnTuple.h
+++ b/src/Columns/ColumnTuple.h
@ -99,6 +99,7 @@ public:
    Columns getColumnsCopy() const { return {columns.begin(), columns.end()}; }

    const ColumnPtr & getColumnPtr(size_t idx) const { return columns[idx]; }
+    ColumnPtr & getColumnPtr(size_t idx) { return columns[idx]; }

 private:
    int compareAtImpl(size_t n, size_t m, const IColumn & rhs, int nan_direction_hint, const Collator * collator=nullptr) const;
--- a/src/Common/CurrentMetrics.cpp
+++ b/src/Common/CurrentMetrics.cpp
@ -56,7 +56,13 @@
    M(LocalThreadActive, "Number of threads in local thread pools running a task.") \
    M(DistributedFilesToInsert, "Number of pending files to process for asynchronous insertion into Distributed tables. Number of files for every shard is summed.") \
    M(TablesToDropQueueSize, "Number of dropped tables, that are waiting for background data removal.") \
-    M(MaxDDLEntryID, "Max processed DDL entry of DDLWorker.")           \
+    M(MaxDDLEntryID, "Max processed DDL entry of DDLWorker.") \
+    M(PartsTemporary, "The part is generating now, it is not in data_parts list.") \
+    M(PartsPreCommitted, "The part is in data_parts, but not used for SELECTs.") \
+    M(PartsCommitted, "Active data part, used by current and upcoming SELECTs.") \
+    M(PartsOutdated, "Not active data part, but could be used by only current SELECTs, could be deleted after SELECTs finishes.") \
+    M(PartsDeleting, "Not active data part with identity refcounter, it is deleting right now by a cleaner.") \
+    M(PartsDeleteOnDestroy, "Part was moved to another disk and should be deleted in own destructor.") \

 namespace CurrentMetrics
 {
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@ -532,6 +532,7 @@
    M(563, CANNOT_READ_MAP_FROM_TEXT) \
    M(564, INTERSERVER_SCHEME_DOESNT_MATCH) \
    M(565, TOO_MANY_PARTITIONS) \
+    M(566, CANNOT_RMDIR) \
    \
    M(999, KEEPER_EXCEPTION) \
    M(1000, POCO_EXCEPTION) \
--- a/src/Common/FiberStack.h
+++ b/src/Common/FiberStack.h
@ -3,6 +3,7 @@
 #include <boost/context/stack_context.hpp>
 #include <Common/formatReadable.h>
 #include <Common/CurrentMemoryTracker.h>
+#include <Common/Exception.h>

 #include <sys/time.h>
 #include <sys/resource.h>
--- a/src/Common/MemoryTracker.cpp
+++ b/src/Common/MemoryTracker.cpp
@ -145,8 +145,9 @@ void MemoryTracker::alloc(Int64 size)
      */
    Int64 will_be = size + amount.fetch_add(size, std::memory_order_relaxed);

-    if (metric != CurrentMetrics::end())
-        CurrentMetrics::add(metric, size);
+    auto metric_loaded = metric.load(std::memory_order_relaxed);
+    if (metric_loaded != CurrentMetrics::end())
+        CurrentMetrics::add(metric_loaded, size);

    Int64 current_hard_limit = hard_limit.load(std::memory_order_relaxed);
    Int64 current_profiler_limit = profiler_limit.load(std::memory_order_relaxed);
@ -181,7 +182,7 @@ void MemoryTracker::alloc(Int64 size)
    if (unlikely(fault_probability && fault(thread_local_rng)) && memoryTrackerCanThrow(level, true))
    {
        /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
-        BlockerInThread untrack_lock;
+        BlockerInThread untrack_lock(VariableContext::Global);

        ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
        const auto * description = description_ptr.load(std::memory_order_relaxed);
@ -195,7 +196,7 @@ void MemoryTracker::alloc(Int64 size)

    if (unlikely(current_profiler_limit && will_be > current_profiler_limit))
    {
-        BlockerInThread untrack_lock;
+        BlockerInThread untrack_lock(VariableContext::Global);
        DB::TraceCollector::collect(DB::TraceType::Memory, StackTrace(), size);
        setOrRaiseProfilerLimit((will_be + profiler_step - 1) / profiler_step * profiler_step);
    }
@ -203,14 +204,14 @@ void MemoryTracker::alloc(Int64 size)
    std::bernoulli_distribution sample(sample_probability);
    if (unlikely(sample_probability && sample(thread_local_rng)))
    {
-        BlockerInThread untrack_lock;
+        BlockerInThread untrack_lock(VariableContext::Global);
        DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), size);
    }

    if (unlikely(current_hard_limit && will_be > current_hard_limit) && memoryTrackerCanThrow(level, false))
    {
        /// Prevent recursion. Exception::ctor -> std::string -> new[] -> MemoryTracker::alloc
-        BlockerInThread untrack_lock;
+        BlockerInThread untrack_lock(VariableContext::Global);

        ProfileEvents::increment(ProfileEvents::QueryMemoryLimitExceeded);
        const auto * description = description_ptr.load(std::memory_order_relaxed);
@ -256,7 +257,7 @@ void MemoryTracker::free(Int64 size)
    std::bernoulli_distribution sample(sample_probability);
    if (unlikely(sample_probability && sample(thread_local_rng)))
    {
-        BlockerInThread untrack_lock;
+        BlockerInThread untrack_lock(VariableContext::Global);
        DB::TraceCollector::collect(DB::TraceType::MemorySample, StackTrace(), -size);
    }

@ -286,8 +287,9 @@ void MemoryTracker::free(Int64 size)
    if (auto * loaded_next = parent.load(std::memory_order_relaxed))
        loaded_next->free(size);

-    if (metric != CurrentMetrics::end())
-        CurrentMetrics::sub(metric, accounted_size);
+    auto metric_loaded = metric.load(std::memory_order_relaxed);
+    if (metric_loaded != CurrentMetrics::end())
+        CurrentMetrics::sub(metric_loaded, accounted_size);
 }


@ -302,8 +304,9 @@ void MemoryTracker::resetCounters()

 void MemoryTracker::reset()
 {
-    if (metric != CurrentMetrics::end())
-        CurrentMetrics::sub(metric, amount.load(std::memory_order_relaxed));
+    auto metric_loaded = metric.load(std::memory_order_relaxed);
+    if (metric_loaded != CurrentMetrics::end())
+        CurrentMetrics::sub(metric_loaded, amount.load(std::memory_order_relaxed));

    resetCounters();
 }
@ -316,6 +319,12 @@ void MemoryTracker::set(Int64 to)
 }


+void MemoryTracker::setHardLimit(Int64 value)
+{
+    hard_limit.store(value, std::memory_order_relaxed);
+}
+
+
 void MemoryTracker::setOrRaiseHardLimit(Int64 value)
 {
    /// This is just atomic set to maximum.
--- a/src/Common/MemoryTracker.h
+++ b/src/Common/MemoryTracker.h
@ -53,7 +53,7 @@ private:
    std::atomic<MemoryTracker *> parent {};

    /// You could specify custom metric to track memory usage.
-    CurrentMetrics::Metric metric = CurrentMetrics::end();
+    std::atomic<CurrentMetrics::Metric> metric = CurrentMetrics::end();

    /// This description will be used as prefix into log messages (if isn't nullptr)
    std::atomic<const char *> description_ptr = nullptr;
@ -96,6 +96,8 @@ public:
        return peak.load(std::memory_order_relaxed);
    }

+    void setHardLimit(Int64 value);
+
    /** Set limit if it was not set.
      * Otherwise, set limit to new value, if new value is greater than previous limit.
      */
@ -132,7 +134,7 @@ public:
    /// The memory consumption could be shown in realtime via CurrentMetrics counter
    void setMetric(CurrentMetrics::Metric metric_)
    {
-        metric = metric_;
+        metric.store(metric_, std::memory_order_relaxed);
    }

    void setDescription(const char * description)
@ -165,7 +167,7 @@ public:
        VariableContext previous_level;
    public:
        /// level_ - block in level and above
-        BlockerInThread(VariableContext level_ = VariableContext::Global);
+        BlockerInThread(VariableContext level_ = VariableContext::User);
        ~BlockerInThread();

        static bool isBlocked(VariableContext current_level)
@ -201,7 +203,7 @@ public:
    public:
        /// level_ - block in level and above
        /// block_fault_injections_ - block in fault injection too
-        LockExceptionInThread(VariableContext level_ = VariableContext::Global, bool block_fault_injections_ = true);
+        LockExceptionInThread(VariableContext level_ = VariableContext::User, bool block_fault_injections_ = true);
        ~LockExceptionInThread();

        static bool isBlocked(VariableContext current_level, bool fault_injection)
--- a/src/Common/UnicodeBar.cpp
+++ b/src/Common/UnicodeBar.cpp
@ -14,7 +14,7 @@ namespace UnicodeBar
 {
    double getWidth(double x, double min, double max, double max_width)
    {
-        if (isNaN(x))
+        if (isNaN(x) || isNaN(min) || isNaN(max))
            return 0;

        if (x <= min)
--- a/src/Compression/CachedCompressedReadBuffer.cpp
+++ b/src/Compression/CachedCompressedReadBuffer.cpp
@ -45,7 +45,7 @@ bool CachedCompressedReadBuffer::nextImpl()

        size_t size_decompressed;
        size_t size_compressed_without_checksum;
-        owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum);
+        owned_cell->compressed_size = readCompressedData(size_decompressed, size_compressed_without_checksum, false);

        if (owned_cell->compressed_size)
        {
--- a/src/Compression/CheckingCompressedReadBuffer.cpp
+++ b/src/Compression/CheckingCompressedReadBuffer.cpp
@ -0,0 +1,24 @@
+#include <Compression/CheckingCompressedReadBuffer.h>
+
+namespace DB
+{
+
+bool CheckingCompressedReadBuffer::nextImpl()
+{
+    size_t size_decompressed;
+    size_t size_compressed_without_checksum;
+    size_t size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, true);
+
+    if (!size_compressed)
+        return false;
+
+    /// own_compressed_buffer also includes getAdditionalSizeAtTheEndOfBuffer()
+    /// which should not be accounted here, so size_compressed is used.
+    ///
+    /// And BufferBase is used over ReadBuffer, since former reset the working_buffer.
+    BufferBase::set(own_compressed_buffer.data(), size_compressed, 0);
+
+    return true;
+}
+
+}
--- a/src/Compression/CheckingCompressedReadBuffer.h
+++ b/src/Compression/CheckingCompressedReadBuffer.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <Compression/CompressedReadBufferBase.h>
+#include <IO/BufferWithOwnMemory.h>
+#include <IO/ReadBuffer.h>
+
+
+namespace DB
+{
+
+/** A buffer for reading from a compressed file with just checking checksums of
+  * the compressed blocks, without any decompression.
+  */
+class CheckingCompressedReadBuffer : public CompressedReadBufferBase, public ReadBuffer
+{
+protected:
+    bool nextImpl() override;
+
+public:
+    CheckingCompressedReadBuffer(ReadBuffer & in_, bool allow_different_codecs_ = false)
+        : CompressedReadBufferBase(&in_, allow_different_codecs_)
+        , ReadBuffer(nullptr, 0)
+    {
+    }
+};
+
+}
--- a/src/Compression/CompressedReadBuffer.cpp
+++ b/src/Compression/CompressedReadBuffer.cpp
@ -9,7 +9,7 @@ bool CompressedReadBuffer::nextImpl()
 {
    size_t size_decompressed;
    size_t size_compressed_without_checksum;
-    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum);
+    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
    if (!size_compressed)
        return false;

@ -40,7 +40,7 @@ size_t CompressedReadBuffer::readBig(char * to, size_t n)
        size_t size_decompressed;
        size_t size_compressed_without_checksum;

-        if (!readCompressedData(size_decompressed, size_compressed_without_checksum))
+        if (!readCompressedData(size_decompressed, size_compressed_without_checksum, false))
            return bytes_read;

        auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();
--- a/src/Compression/CompressedReadBufferBase.cpp
+++ b/src/Compression/CompressedReadBufferBase.cpp
@ -105,19 +105,18 @@ static void validateChecksum(char * data, size_t size, const Checksum expected_c

 /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
 /// Returns number of compressed bytes read.
-size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum)
+size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy)
 {
    if (compressed_in->eof())
        return 0;

-    Checksum checksum;
-    compressed_in->readStrict(reinterpret_cast<char *>(&checksum), sizeof(Checksum));
-
    UInt8 header_size = ICompressionCodec::getHeaderSize();
-    own_compressed_buffer.resize(header_size);
-    compressed_in->readStrict(own_compressed_buffer.data(), header_size);
+    own_compressed_buffer.resize(header_size + sizeof(Checksum));

-    uint8_t method = ICompressionCodec::readMethod(own_compressed_buffer.data());
+    compressed_in->readStrict(own_compressed_buffer.data(), sizeof(Checksum) + header_size);
+    char * compressed_header = own_compressed_buffer.data() + sizeof(Checksum);
+
+    uint8_t method = ICompressionCodec::readMethod(compressed_header);

    if (!codec)
    {
@ -139,8 +138,8 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
        }
    }

-    size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(own_compressed_buffer.data());
-    size_decompressed = ICompressionCodec::readDecompressedBlockSize(own_compressed_buffer.data());
+    size_compressed_without_checksum = ICompressionCodec::readCompressedBlockSize(compressed_header);
+    size_decompressed = ICompressionCodec::readDecompressedBlockSize(compressed_header);

    /// This is for clang static analyzer.
    assert(size_decompressed > 0);
@ -160,8 +159,9 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
    auto additional_size_at_the_end_of_buffer = codec->getAdditionalSizeAtTheEndOfBuffer();

    /// Is whole compressed block located in 'compressed_in->' buffer?
-    if (compressed_in->offset() >= header_size &&
-        compressed_in->position() + size_compressed_without_checksum + additional_size_at_the_end_of_buffer  - header_size <= compressed_in->buffer().end())
+    if (!always_copy &&
+        compressed_in->offset() >= header_size + sizeof(Checksum) &&
+        compressed_in->available() >= (size_compressed_without_checksum - header_size) + additional_size_at_the_end_of_buffer + sizeof(Checksum))
    {
        compressed_in->position() -= header_size;
        compressed_buffer = compressed_in->position();
@ -169,13 +169,16 @@ size_t CompressedReadBufferBase::readCompressedData(size_t & size_decompressed,
    }
    else
    {
-        own_compressed_buffer.resize(size_compressed_without_checksum + additional_size_at_the_end_of_buffer);
-        compressed_buffer = own_compressed_buffer.data();
+        own_compressed_buffer.resize(sizeof(Checksum) + size_compressed_without_checksum + additional_size_at_the_end_of_buffer);
+        compressed_buffer = own_compressed_buffer.data() + sizeof(Checksum);
        compressed_in->readStrict(compressed_buffer + header_size, size_compressed_without_checksum - header_size);
    }

    if (!disable_checksum)
+    {
+        Checksum & checksum = *reinterpret_cast<Checksum *>(own_compressed_buffer.data());
        validateChecksum(compressed_buffer, size_compressed_without_checksum, checksum);
+    }

    return size_compressed_without_checksum + sizeof(Checksum);
 }
--- a/src/Compression/CompressedReadBufferBase.h
+++ b/src/Compression/CompressedReadBufferBase.h
@ -30,8 +30,12 @@ protected:
    bool allow_different_codecs;

    /// Read compressed data into compressed_buffer. Get size of decompressed data from block header. Checksum if need.
+    ///
+    /// If always_copy is true then even if the compressed block is already stored in compressed_in.buffer() it will be copied into own_compressed_buffer.
+    /// This is required for CheckingCompressedReadBuffer, since this is just a proxy.
+    ///
    /// Returns number of compressed bytes read.
-    size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum);
+    size_t readCompressedData(size_t & size_decompressed, size_t & size_compressed_without_checksum, bool always_copy);

    void decompress(char * to, size_t size_decompressed, size_t size_compressed_without_checksum);

--- a/src/Compression/CompressedReadBufferFromFile.cpp
+++ b/src/Compression/CompressedReadBufferFromFile.cpp
@ -19,7 +19,7 @@ bool CompressedReadBufferFromFile::nextImpl()
 {
    size_t size_decompressed = 0;
    size_t size_compressed_without_checksum;
-    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum);
+    size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
    if (!size_compressed)
        return false;

@ -98,7 +98,7 @@ size_t CompressedReadBufferFromFile::readBig(char * to, size_t n)
        size_t size_decompressed = 0;
        size_t size_compressed_without_checksum = 0;

-        size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum);
+        size_t new_size_compressed = readCompressedData(size_decompressed, size_compressed_without_checksum, false);
        size_compressed = 0; /// file_in no longer points to the end of the block in working_buffer.
        if (!new_size_compressed)
            return bytes_read;
--- a/src/Compression/ya.make
+++ b/src/Compression/ya.make
@ -17,6 +17,7 @@ PEERDIR(

 SRCS(
    CachedCompressedReadBuffer.cpp
+    CheckingCompressedReadBuffer.cpp
    CompressedReadBuffer.cpp
    CompressedReadBufferBase.cpp
    CompressedReadBufferFromFile.cpp
--- a/src/Core/BackgroundSchedulePool.cpp
+++ b/src/Core/BackgroundSchedulePool.cpp
@ -250,7 +250,16 @@ void BackgroundSchedulePool::threadFunction()

    while (!shutdown)
    {
-        if (Poco::AutoPtr<Poco::Notification> notification = queue.waitDequeueNotification())
+        /// We have to wait with timeout to prevent very rare deadlock, caused by the following race condition:
+        /// 1. Background thread N: threadFunction(): checks for shutdown (it's false)
+        /// 2. Main thread: ~BackgroundSchedulePool(): sets shutdown to true, calls queue.wakeUpAll(), it triggers
+        ///    all existing Poco::Events inside Poco::NotificationQueue which background threads are waiting on.
+        /// 3. Background thread N: threadFunction(): calls queue.waitDequeueNotification(), it creates
+        ///    new Poco::Event inside Poco::NotificationQueue and starts to wait on it
+        /// Background thread N will never be woken up.
+        /// TODO Do we really need Poco::NotificationQueue? Why not to use std::queue + mutex + condvar or maybe even DB::ThreadPool?
+        constexpr size_t wait_timeout_ms = 500;
+        if (Poco::AutoPtr<Poco::Notification> notification = queue.waitDequeueNotification(wait_timeout_ms))
        {
            TaskNotification & task_notification = static_cast<TaskNotification &>(*notification);
            task_notification.execute();
--- a/src/Core/ExternalResultDescription.cpp
+++ b/src/Core/ExternalResultDescription.cpp
@ -4,6 +4,7 @@
 #include <DataTypes/DataTypeDateTime64.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFixedString.h>
 #include <DataTypes/DataTypeUUID.h>
 #include <DataTypes/DataTypesDecimal.h>
@ -35,49 +36,53 @@ void ExternalResultDescription::init(const Block & sample_block_)
        DataTypePtr type_not_nullable = removeNullable(elem.type);
        const IDataType * type = type_not_nullable.get();

-        if (typeid_cast<const DataTypeUInt8 *>(type))
+        WhichDataType which(type);
+
+        if (which.isUInt8())
            types.emplace_back(ValueType::vtUInt8, is_nullable);
-        else if (typeid_cast<const DataTypeUInt16 *>(type))
+        else if (which.isUInt16())
            types.emplace_back(ValueType::vtUInt16, is_nullable);
-        else if (typeid_cast<const DataTypeUInt32 *>(type))
+        else if (which.isUInt32())
            types.emplace_back(ValueType::vtUInt32, is_nullable);
-        else if (typeid_cast<const DataTypeUInt64 *>(type))
+        else if (which.isUInt64())
            types.emplace_back(ValueType::vtUInt64, is_nullable);
-        else if (typeid_cast<const DataTypeInt8 *>(type))
+        else if (which.isInt8())
            types.emplace_back(ValueType::vtInt8, is_nullable);
-        else if (typeid_cast<const DataTypeInt16 *>(type))
+        else if (which.isInt16())
            types.emplace_back(ValueType::vtInt16, is_nullable);
-        else if (typeid_cast<const DataTypeInt32 *>(type))
+        else if (which.isInt32())
            types.emplace_back(ValueType::vtInt32, is_nullable);
-        else if (typeid_cast<const DataTypeInt64 *>(type))
+        else if (which.isInt64())
            types.emplace_back(ValueType::vtInt64, is_nullable);
-        else if (typeid_cast<const DataTypeFloat32 *>(type))
+        else if (which.isFloat32())
            types.emplace_back(ValueType::vtFloat32, is_nullable);
-        else if (typeid_cast<const DataTypeFloat64 *>(type))
+        else if (which.isFloat64())
            types.emplace_back(ValueType::vtFloat64, is_nullable);
-        else if (typeid_cast<const DataTypeString *>(type))
+        else if (which.isString())
            types.emplace_back(ValueType::vtString, is_nullable);
-        else if (typeid_cast<const DataTypeDate *>(type))
+        else if (which.isDate())
            types.emplace_back(ValueType::vtDate, is_nullable);
-        else if (typeid_cast<const DataTypeDateTime *>(type))
+        else if (which.isDateTime())
            types.emplace_back(ValueType::vtDateTime, is_nullable);
-        else if (typeid_cast<const DataTypeUUID *>(type))
+        else if (which.isUUID())
            types.emplace_back(ValueType::vtUUID, is_nullable);
-        else if (typeid_cast<const DataTypeEnum8 *>(type))
+        else if (which.isEnum8())
            types.emplace_back(ValueType::vtString, is_nullable);
-        else if (typeid_cast<const DataTypeEnum16 *>(type))
+        else if (which.isEnum16())
            types.emplace_back(ValueType::vtString, is_nullable);
-        else if (typeid_cast<const DataTypeDateTime64 *>(type))
+        else if (which.isDateTime64())
            types.emplace_back(ValueType::vtDateTime64, is_nullable);
-        else if (typeid_cast<const DataTypeDecimal<Decimal32> *>(type))
+        else if (which.isDecimal32())
            types.emplace_back(ValueType::vtDecimal32, is_nullable);
-        else if (typeid_cast<const DataTypeDecimal<Decimal64> *>(type))
+        else if (which.isDecimal64())
            types.emplace_back(ValueType::vtDecimal64, is_nullable);
-        else if (typeid_cast<const DataTypeDecimal<Decimal128> *>(type))
+        else if (which.isDecimal128())
            types.emplace_back(ValueType::vtDecimal128, is_nullable);
-        else if (typeid_cast<const DataTypeDecimal<Decimal256> *>(type))
+        else if (which.isDecimal256())
            types.emplace_back(ValueType::vtDecimal256, is_nullable);
-        else if (typeid_cast<const DataTypeFixedString *>(type))
+        else if (which.isArray())
+            types.emplace_back(ValueType::vtArray, is_nullable);
+        else if (which.isFixedString())
            types.emplace_back(ValueType::vtFixedString, is_nullable);
        else
            throw Exception{"Unsupported type " + type->getName(), ErrorCodes::UNKNOWN_TYPE};
--- a/src/Core/ExternalResultDescription.h
+++ b/src/Core/ExternalResultDescription.h
@ -31,6 +31,7 @@ struct ExternalResultDescription
        vtDecimal64,
        vtDecimal128,
        vtDecimal256,
+        vtArray,
        vtFixedString
    };

--- a/src/Core/NamesAndTypes.cpp
+++ b/src/Core/NamesAndTypes.cpp
@ -17,6 +17,29 @@ namespace ErrorCodes
    extern const int THERE_IS_NO_COLUMN;
 }

+NameAndTypePair::NameAndTypePair(
+    const String & name_in_storage_, const String & subcolumn_name_,
+    const DataTypePtr & type_in_storage_, const DataTypePtr & subcolumn_type_)
+    : name(name_in_storage_ + (subcolumn_name_.empty() ? "" : "." + subcolumn_name_))
+    , type(subcolumn_type_)
+    , type_in_storage(type_in_storage_)
+    , subcolumn_delimiter_position(name_in_storage_.size()) {}
+
+String NameAndTypePair::getNameInStorage() const
+{
+    if (!subcolumn_delimiter_position)
+        return name;
+
+    return name.substr(0, *subcolumn_delimiter_position);
+}
+
+String NameAndTypePair::getSubcolumnName() const
+{
+    if (!subcolumn_delimiter_position)
+        return "";
+
+    return name.substr(*subcolumn_delimiter_position + 1, name.size() - *subcolumn_delimiter_position);
+}

 void NamesAndTypesList::readText(ReadBuffer & buf)
 {
@ -137,25 +160,20 @@ NamesAndTypesList NamesAndTypesList::filter(const Names & names) const

 NamesAndTypesList NamesAndTypesList::addTypes(const Names & names) const
 {
-    /// NOTE: It's better to make a map in `IStorage` than to create it here every time again.
-#if !defined(ARCADIA_BUILD)
-    google::dense_hash_map<StringRef, const DataTypePtr *, StringRefHash> types;
-#else
-    google::sparsehash::dense_hash_map<StringRef, const DataTypePtr *, StringRefHash> types;
-#endif
-    types.set_empty_key(StringRef());
+    std::unordered_map<std::string_view, const NameAndTypePair *> self_columns;

-    for (const NameAndTypePair & column : *this)
-        types[column.name] = &column.type;
+    for (const auto & column : *this)
+        self_columns[column.name] = &column;

    NamesAndTypesList res;
    for (const String & name : names)
    {
-        auto it = types.find(name);
-        if (it == types.end())
+        auto it = self_columns.find(name);
+        if (it == self_columns.end())
            throw Exception("No column " + name, ErrorCodes::THERE_IS_NO_COLUMN);
-        res.emplace_back(name, *it->second);
+        res.emplace_back(*it->second);
    }
+
    return res;
 }

--- a/src/Core/NamesAndTypes.h
+++ b/src/Core/NamesAndTypes.h
@ -15,11 +15,19 @@ namespace DB

 struct NameAndTypePair
 {
-    String name;
-    DataTypePtr type;
+public:
+    NameAndTypePair() = default;
+    NameAndTypePair(const String & name_, const DataTypePtr & type_)
+        : name(name_), type(type_), type_in_storage(type_) {}

-    NameAndTypePair() {}
-    NameAndTypePair(const String & name_, const DataTypePtr & type_) : name(name_), type(type_) {}
+    NameAndTypePair(const String & name_in_storage_, const String & subcolumn_name_,
+        const DataTypePtr & type_in_storage_, const DataTypePtr & subcolumn_type_);
+
+    String getNameInStorage() const;
+    String getSubcolumnName() const;
+
+    bool isSubcolumn() const { return subcolumn_delimiter_position != std::nullopt; }
+    DataTypePtr getTypeInStorage() const { return type_in_storage; }

    bool operator<(const NameAndTypePair & rhs) const
    {
@ -30,8 +38,26 @@ struct NameAndTypePair
    {
        return name == rhs.name && type->equals(*rhs.type);
    }
+
+    String name;
+    DataTypePtr type;
+
+private:
+    DataTypePtr type_in_storage;
+    std::optional<size_t> subcolumn_delimiter_position;
 };

+/// This needed to use structured bindings for NameAndTypePair
+/// const auto & [name, type] = name_and_type
+template <int I>
+decltype(auto) get(const NameAndTypePair & name_and_type)
+{
+    if constexpr (I == 0)
+        return name_and_type.name;
+    else if constexpr (I == 1)
+        return name_and_type.type;
+}
+
 using NamesAndTypes = std::vector<NameAndTypePair>;

 class NamesAndTypesList : public std::list<NameAndTypePair>
@ -81,3 +107,10 @@ public:
 };

 }
+
+namespace std
+{
+    template <> struct tuple_size<DB::NameAndTypePair> : std::integral_constant<size_t, 2> {};
+    template <> struct tuple_element<0, DB::NameAndTypePair> { using type = DB::String; };
+    template <> struct tuple_element<1, DB::NameAndTypePair> { using type = DB::DataTypePtr; };
+}
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -67,6 +67,7 @@ class IColumn;
    M(UInt64, s3_min_upload_part_size, 512*1024*1024, "The minimum size of part to upload during multipart upload to S3.", 0) \
    M(UInt64, s3_max_single_part_upload_size, 64*1024*1024, "The maximum size of object to upload using singlepart upload to S3.", 0) \
    M(UInt64, s3_max_redirects, 10, "Max number of S3 redirects hops allowed.", 0) \
+    M(UInt64, s3_max_connections, 1024, "The maximum number of connections per server.", 0) \
    M(Bool, extremes, false, "Calculate minimums and maximums of the result columns. They can be output in JSON-formats.", IMPORTANT) \
    M(Bool, use_uncompressed_cache, true, "Whether to use the cache of uncompressed blocks.", 0) \
    M(Bool, replace_running_query, false, "Whether the running request should be canceled with the same id as the new one.", 0) \
@ -405,6 +406,7 @@ class IColumn;
    M(Bool, allow_non_metadata_alters, true, "Allow to execute alters which affects not only tables metadata, but also data on disk", 0) \
    M(Bool, enable_global_with_statement, false, "Propagate WITH statements to UNION queries and all subqueries", 0) \
    M(Bool, aggregate_functions_null_for_empty, false, "Rewrite all aggregate functions in a query, adding -OrNull suffix to them", 0) \
+    M(Bool, flatten_nested, true, "If true, columns of type Nested will be flatten to separate array columns instead of one array of tuples", 0) \
    M(Bool, asterisk_include_materialized_columns, false, "Include MATERIALIZED columns for wildcard query", 0) \
    M(Bool, asterisk_include_alias_columns, false, "Include ALIAS columns for wildcard query", 0) \
    M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
--- a/src/Core/config_core.h.in
+++ b/src/Core/config_core.h.in
@ -12,4 +12,4 @@
 #cmakedefine01 USE_OPENCL
 #cmakedefine01 USE_LDAP
 #cmakedefine01 USE_ROCKSDB
-
+#cmakedefine01 USE_LIBPQXX
--- a/src/DataStreams/NativeBlockInputStream.cpp
+++ b/src/DataStreams/NativeBlockInputStream.cpp
@ -71,7 +71,7 @@ void NativeBlockInputStream::resetParser()
    is_killed.store(false);
 }

-void NativeBlockInputStream::readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint)
+void NativeBlockInputStream::readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint)
 {
    IDataType::DeserializeBinaryBulkSettings settings;
    settings.getter = [&](IDataType::SubstreamPath) -> ReadBuffer * { return &istr; };
@ -82,8 +82,8 @@ void NativeBlockInputStream::readData(const IDataType & type, IColumn & column,
    type.deserializeBinaryBulkStatePrefix(settings, state);
    type.deserializeBinaryBulkWithMultipleStreams(column, rows, settings, state);

-    if (column.size() != rows)
-        throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column.size()) + ". Rows expected: " + toString(rows) + ".",
+    if (column->size() != rows)
+        throw Exception("Cannot read all data in NativeBlockInputStream. Rows read: " + toString(column->size()) + ". Rows expected: " + toString(rows) + ".",
            ErrorCodes::CANNOT_READ_ALL_DATA);
 }

@ -158,11 +158,11 @@ Block NativeBlockInputStream::readImpl()
        }

        /// Data
-        MutableColumnPtr read_column = column.type->createColumn();
+        ColumnPtr read_column = column.type->createColumn();

        double avg_value_size_hint = avg_value_size_hints.empty() ? 0 : avg_value_size_hints[i];
        if (rows)    /// If no rows, nothing to read.
-            readData(*column.type, *read_column, istr, rows, avg_value_size_hint);
+            readData(*column.type, read_column, istr, rows, avg_value_size_hint);

        column.column = std::move(read_column);

--- a/src/DataStreams/NativeBlockInputStream.h
+++ b/src/DataStreams/NativeBlockInputStream.h
@ -74,7 +74,7 @@ public:

    String getName() const override { return "Native"; }

-    static void readData(const IDataType & type, IColumn & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint);
+    static void readData(const IDataType & type, ColumnPtr & column, ReadBuffer & istr, size_t rows, double avg_value_size_hint);

    Block getHeader() const override;

--- a/src/DataStreams/PostgreSQLBlockInputStream.cpp
+++ b/src/DataStreams/PostgreSQLBlockInputStream.cpp
@ -0,0 +1,297 @@
+#include "PostgreSQLBlockInputStream.h"
+
+#if USE_LIBPQXX
+#include <Columns/ColumnNullable.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnDecimal.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypesDecimal.h>
+#include <Interpreters/convertFieldToType.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadBufferFromString.h>
+#include <Common/assert_cast.h>
+#include <ext/range.h>
+#include <common/logger_useful.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+PostgreSQLBlockInputStream::PostgreSQLBlockInputStream(
+    ConnectionPtr connection_,
+    const std::string & query_str_,
+    const Block & sample_block,
+    const UInt64 max_block_size_)
+    : query_str(query_str_)
+    , max_block_size(max_block_size_)
+    , connection(connection_)
+{
+    description.init(sample_block);
+    for (const auto idx : ext::range(0, description.sample_block.columns()))
+        if (description.types[idx].first == ValueType::vtArray)
+            prepareArrayInfo(idx, description.sample_block.getByPosition(idx).type);
+    /// pqxx::stream_from uses COPY command, will get error if ';' is present
+    if (query_str.ends_with(';'))
+        query_str.resize(query_str.size() - 1);
+}
+
+
+void PostgreSQLBlockInputStream::readPrefix()
+{
+    tx = std::make_unique<pqxx::read_transaction>(*connection);
+    stream = std::make_unique<pqxx::stream_from>(*tx, pqxx::from_query, std::string_view(query_str));
+}
+
+
+Block PostgreSQLBlockInputStream::readImpl()
+{
+    /// Check if pqxx::stream_from is finished
+    if (!stream || !(*stream))
+        return Block();
+
+    MutableColumns columns = description.sample_block.cloneEmptyColumns();
+    size_t num_rows = 0;
+
+    while (true)
+    {
+        const std::vector<pqxx::zview> * row{stream->read_row()};
+
+        /// row is nullptr if pqxx::stream_from is finished
+        if (!row)
+            break;
+
+        for (const auto idx : ext::range(0, row->size()))
+        {
+            const auto & sample = description.sample_block.getByPosition(idx);
+
+            /// if got NULL type, then pqxx::zview will return nullptr in c_str()
+            if ((*row)[idx].c_str())
+            {
+                if (description.types[idx].second)
+                {
+                    ColumnNullable & column_nullable = assert_cast<ColumnNullable &>(*columns[idx]);
+                    const auto & data_type = assert_cast<const DataTypeNullable &>(*sample.type);
+                    insertValue(column_nullable.getNestedColumn(), (*row)[idx], description.types[idx].first, data_type.getNestedType(), idx);
+                    column_nullable.getNullMapData().emplace_back(0);
+                }
+                else
+                {
+                    insertValue(*columns[idx], (*row)[idx], description.types[idx].first, sample.type, idx);
+                }
+            }
+            else
+            {
+                insertDefaultValue(*columns[idx], *sample.column);
+            }
+
+        }
+
+        if (++num_rows == max_block_size)
+            break;
+    }
+
+    return description.sample_block.cloneWithColumns(std::move(columns));
+}
+
+
+void PostgreSQLBlockInputStream::readSuffix()
+{
+    if (stream)
+    {
+        stream->complete();
+        tx->commit();
+    }
+}
+
+
+void PostgreSQLBlockInputStream::insertValue(IColumn & column, std::string_view value,
+        const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx)
+{
+    switch (type)
+    {
+        case ValueType::vtUInt8:
+            assert_cast<ColumnUInt8 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
+            break;
+        case ValueType::vtUInt16:
+            assert_cast<ColumnUInt16 &>(column).insertValue(pqxx::from_string<uint16_t>(value));
+            break;
+        case ValueType::vtUInt32:
+            assert_cast<ColumnUInt32 &>(column).insertValue(pqxx::from_string<uint32_t>(value));
+            break;
+        case ValueType::vtUInt64:
+            assert_cast<ColumnUInt64 &>(column).insertValue(pqxx::from_string<uint64_t>(value));
+            break;
+        case ValueType::vtInt8:
+            assert_cast<ColumnInt8 &>(column).insertValue(pqxx::from_string<int16_t>(value));
+            break;
+        case ValueType::vtInt16:
+            assert_cast<ColumnInt16 &>(column).insertValue(pqxx::from_string<int16_t>(value));
+            break;
+        case ValueType::vtInt32:
+            assert_cast<ColumnInt32 &>(column).insertValue(pqxx::from_string<int32_t>(value));
+            break;
+        case ValueType::vtInt64:
+            assert_cast<ColumnInt64 &>(column).insertValue(pqxx::from_string<int64_t>(value));
+            break;
+        case ValueType::vtFloat32:
+            assert_cast<ColumnFloat32 &>(column).insertValue(pqxx::from_string<float>(value));
+            break;
+        case ValueType::vtFloat64:
+            assert_cast<ColumnFloat64 &>(column).insertValue(pqxx::from_string<double>(value));
+            break;
+        case ValueType::vtFixedString:[[fallthrough]];
+        case ValueType::vtString:
+            assert_cast<ColumnString &>(column).insertData(value.data(), value.size());
+            break;
+        case ValueType::vtUUID:
+            assert_cast<ColumnUInt128 &>(column).insert(parse<UUID>(value.data(), value.size()));
+            break;
+        case ValueType::vtDate:
+            assert_cast<ColumnUInt16 &>(column).insertValue(UInt16{LocalDate{std::string(value)}.getDayNum()});
+            break;
+        case ValueType::vtDateTime:
+            assert_cast<ColumnUInt32 &>(column).insertValue(time_t{LocalDateTime{std::string(value)}});
+            break;
+        case ValueType::vtDateTime64:[[fallthrough]];
+        case ValueType::vtDecimal32: [[fallthrough]];
+        case ValueType::vtDecimal64: [[fallthrough]];
+        case ValueType::vtDecimal128: [[fallthrough]];
+        case ValueType::vtDecimal256:
+        {
+            ReadBufferFromString istr(value);
+            data_type->deserializeAsWholeText(column, istr, FormatSettings{});
+            break;
+        }
+        case ValueType::vtArray:
+        {
+            pqxx::array_parser parser{value};
+            std::pair<pqxx::array_parser::juncture, std::string> parsed = parser.get_next();
+
+            size_t dimension = 0, max_dimension = 0, expected_dimensions = array_info[idx].num_dimensions;
+            const auto parse_value = array_info[idx].pqxx_parser;
+            std::vector<std::vector<Field>> dimensions(expected_dimensions + 1);
+
+            while (parsed.first != pqxx::array_parser::juncture::done)
+            {
+                if ((parsed.first == pqxx::array_parser::juncture::row_start) && (++dimension > expected_dimensions))
+                    throw Exception("Got more dimensions than expected", ErrorCodes::BAD_ARGUMENTS);
+
+                else if (parsed.first == pqxx::array_parser::juncture::string_value)
+                    dimensions[dimension].emplace_back(parse_value(parsed.second));
+
+                else if (parsed.first == pqxx::array_parser::juncture::null_value)
+                    dimensions[dimension].emplace_back(array_info[idx].default_value);
+
+                else if (parsed.first == pqxx::array_parser::juncture::row_end)
+                {
+                    max_dimension = std::max(max_dimension, dimension);
+
+                    if (--dimension == 0)
+                        break;
+
+                    dimensions[dimension].emplace_back(Array(dimensions[dimension + 1].begin(), dimensions[dimension + 1].end()));
+                    dimensions[dimension + 1].clear();
+                }
+
+                parsed = parser.get_next();
+            }
+
+            if (max_dimension < expected_dimensions)
+                throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Got less dimensions than expected. ({} instead of {})", max_dimension, expected_dimensions);
+
+            assert_cast<ColumnArray &>(column).insert(Array(dimensions[1].begin(), dimensions[1].end()));
+            break;
+        }
+    }
+}
+
+
+void PostgreSQLBlockInputStream::prepareArrayInfo(size_t column_idx, const DataTypePtr data_type)
+{
+    const auto * array_type = typeid_cast<const DataTypeArray *>(data_type.get());
+    auto nested = array_type->getNestedType();
+
+    size_t count_dimensions = 1;
+    while (isArray(nested))
+    {
+        ++count_dimensions;
+        nested = typeid_cast<const DataTypeArray *>(nested.get())->getNestedType();
+    }
+
+    Field default_value = nested->getDefault();
+    if (nested->isNullable())
+        nested = static_cast<const DataTypeNullable *>(nested.get())->getNestedType();
+
+    WhichDataType which(nested);
+    std::function<Field(std::string & fields)> parser;
+
+    if (which.isUInt8() || which.isUInt16())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<uint16_t>(field); };
+    else if (which.isInt8() || which.isInt16())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<int16_t>(field); };
+    else if (which.isUInt32())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<uint32_t>(field); };
+    else if (which.isInt32())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<int32_t>(field); };
+    else if (which.isUInt64())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<uint64_t>(field); };
+    else if (which.isInt64())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<int64_t>(field); };
+    else if (which.isFloat32())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<float>(field); };
+    else if (which.isFloat64())
+        parser = [](std::string & field) -> Field { return pqxx::from_string<double>(field); };
+    else if (which.isString() || which.isFixedString())
+        parser = [](std::string & field) -> Field { return field; };
+    else if (which.isDate())
+        parser = [](std::string & field) -> Field { return UInt16{LocalDate{field}.getDayNum()}; };
+    else if (which.isDateTime())
+        parser = [](std::string & field) -> Field { return time_t{LocalDateTime{field}}; };
+    else if (which.isDecimal32())
+        parser = [nested](std::string & field) -> Field
+        {
+            const auto & type = typeid_cast<const DataTypeDecimal<Decimal32> *>(nested.get());
+            DataTypeDecimal<Decimal32> res(getDecimalPrecision(*type), getDecimalScale(*type));
+            return convertFieldToType(field, res);
+        };
+    else if (which.isDecimal64())
+        parser = [nested](std::string & field) -> Field
+        {
+            const auto & type = typeid_cast<const DataTypeDecimal<Decimal64> *>(nested.get());
+            DataTypeDecimal<Decimal64> res(getDecimalPrecision(*type), getDecimalScale(*type));
+            return convertFieldToType(field, res);
+        };
+    else if (which.isDecimal128())
+        parser = [nested](std::string & field) -> Field
+        {
+            const auto & type = typeid_cast<const DataTypeDecimal<Decimal128> *>(nested.get());
+            DataTypeDecimal<Decimal128> res(getDecimalPrecision(*type), getDecimalScale(*type));
+            return convertFieldToType(field, res);
+        };
+    else if (which.isDecimal256())
+        parser = [nested](std::string & field) -> Field
+        {
+            const auto & type = typeid_cast<const DataTypeDecimal<Decimal256> *>(nested.get());
+            DataTypeDecimal<Decimal256> res(getDecimalPrecision(*type), getDecimalScale(*type));
+            return convertFieldToType(field, res);
+        };
+    else
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Type conversion to {} is not supported", nested->getName());
+
+    array_info[column_idx] = {count_dimensions, default_value, parser};
+}
+
+}
+
+#endif
--- a/src/DataStreams/PostgreSQLBlockInputStream.h
+++ b/src/DataStreams/PostgreSQLBlockInputStream.h
@ -0,0 +1,65 @@
+#pragma once
+
+#if !defined(ARCADIA_BUILD)
+#include "config_core.h"
+#endif
+
+#if USE_LIBPQXX
+#include <Core/Block.h>
+#include <DataStreams/IBlockInputStream.h>
+#include <Core/ExternalResultDescription.h>
+#include <Core/Field.h>
+#include <pqxx/pqxx>
+
+
+namespace DB
+{
+using ConnectionPtr = std::shared_ptr<pqxx::connection>;
+
+class PostgreSQLBlockInputStream : public IBlockInputStream
+{
+public:
+    PostgreSQLBlockInputStream(
+        ConnectionPtr connection_,
+        const std::string & query_str,
+        const Block & sample_block,
+        const UInt64 max_block_size_);
+
+    String getName() const override { return "PostgreSQL"; }
+    Block getHeader() const override { return description.sample_block.cloneEmpty(); }
+
+private:
+    using ValueType = ExternalResultDescription::ValueType;
+
+    void readPrefix() override;
+    Block readImpl() override;
+    void readSuffix() override;
+
+    void insertValue(IColumn & column, std::string_view value,
+        const ExternalResultDescription::ValueType type, const DataTypePtr data_type, size_t idx);
+    void insertDefaultValue(IColumn & column, const IColumn & sample_column)
+    {
+        column.insertFrom(sample_column, 0);
+    }
+    void prepareArrayInfo(size_t column_idx, const DataTypePtr data_type);
+
+    String query_str;
+    const UInt64 max_block_size;
+    ExternalResultDescription description;
+
+    ConnectionPtr connection;
+    std::unique_ptr<pqxx::read_transaction> tx;
+    std::unique_ptr<pqxx::stream_from> stream;
+
+    struct ArrayInfo
+    {
+        size_t num_dimensions;
+        Field default_value;
+        std::function<Field(std::string & field)> pqxx_parser;
+    };
+    std::unordered_map<size_t, ArrayInfo> array_info;
+};
+
+}
+
+#endif
--- a/src/DataStreams/RemoteQueryExecutorReadContext.cpp
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.cpp
@ -0,0 +1,238 @@
+#if defined(OS_LINUX)
+
+#include <DataStreams/RemoteQueryExecutorReadContext.h>
+#include <Common/Exception.h>
+#include <Common/NetException.h>
+#include <Client/MultiplexedConnections.h>
+#include <sys/epoll.h>
+
+namespace DB
+{
+
+struct RemoteQueryExecutorRoutine
+{
+    MultiplexedConnections & connections;
+    RemoteQueryExecutorReadContext & read_context;
+
+    struct ReadCallback
+    {
+        RemoteQueryExecutorReadContext & read_context;
+        Fiber & fiber;
+
+        void operator()(Poco::Net::Socket & socket)
+        {
+            try
+            {
+                read_context.setSocket(socket);
+            }
+            catch (DB::Exception & e)
+            {
+                e.addMessage(" while reading from socket ({})", socket.peerAddress().toString());
+                throw;
+            }
+
+            read_context.is_read_in_progress.store(true, std::memory_order_relaxed);
+            fiber = std::move(fiber).resume();
+            read_context.is_read_in_progress.store(false, std::memory_order_relaxed);
+        }
+    };
+
+    Fiber operator()(Fiber && sink) const
+    {
+        try
+        {
+            while (true)
+            {
+                read_context.packet = connections.receivePacketUnlocked(ReadCallback{read_context, sink});
+                sink = std::move(sink).resume();
+            }
+        }
+        catch (const boost::context::detail::forced_unwind &)
+        {
+            /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
+            /// It should not be caught or it will segfault.
+            /// Other exceptions must be caught
+            throw;
+        }
+        catch (...)
+        {
+            read_context.exception = std::current_exception();
+        }
+
+        return std::move(sink);
+    }
+};
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_READ_FROM_SOCKET;
+    extern const int CANNOT_OPEN_FILE;
+    extern const int SOCKET_TIMEOUT;
+}
+
+RemoteQueryExecutorReadContext::RemoteQueryExecutorReadContext(MultiplexedConnections & connections_)
+    : connections(connections_)
+{
+    epoll_fd = epoll_create(2);
+    if (-1 == epoll_fd)
+        throwFromErrno("Cannot create epoll descriptor", ErrorCodes::CANNOT_OPEN_FILE);
+
+    if (-1 == pipe2(pipe_fd, O_NONBLOCK))
+        throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
+
+    {
+        epoll_event socket_event;
+        socket_event.events = EPOLLIN | EPOLLPRI;
+        socket_event.data.fd = pipe_fd[0];
+
+        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fd[0], &socket_event))
+            throwFromErrno("Cannot add pipe descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+    }
+
+    {
+        epoll_event timer_event;
+        timer_event.events = EPOLLIN | EPOLLPRI;
+        timer_event.data.fd = timer.getDescriptor();
+
+        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, timer_event.data.fd, &timer_event))
+            throwFromErrno("Cannot add timer descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+    }
+
+    auto routine = RemoteQueryExecutorRoutine{connections, *this};
+    fiber = boost::context::fiber(std::allocator_arg_t(), stack, std::move(routine));
+}
+
+void RemoteQueryExecutorReadContext::setSocket(Poco::Net::Socket & socket)
+{
+    int fd = socket.impl()->sockfd();
+    if (fd == socket_fd)
+        return;
+
+    epoll_event socket_event;
+    socket_event.events = EPOLLIN | EPOLLPRI;
+    socket_event.data.fd = fd;
+
+    if (socket_fd != -1)
+    {
+        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_DEL, socket_fd, &socket_event))
+            throwFromErrno("Cannot remove socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+    }
+
+    socket_fd = fd;
+
+    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, socket_fd, &socket_event))
+        throwFromErrno("Cannot add socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+
+    receive_timeout = socket.impl()->getReceiveTimeout();
+}
+
+bool RemoteQueryExecutorReadContext::checkTimeout() const
+{
+    try
+    {
+        return checkTimeoutImpl();
+    }
+    catch (DB::Exception & e)
+    {
+        if (last_used_socket)
+            e.addMessage(" while reading from socket ({})", last_used_socket->peerAddress().toString());
+        throw;
+    }
+}
+
+bool RemoteQueryExecutorReadContext::checkTimeoutImpl() const
+{
+    epoll_event events[3];
+    events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
+
+    /// Wait for epoll_fd will not block if it was polled externally.
+    int num_events = epoll_wait(epoll_fd, events, 3, 0);
+    if (num_events == -1)
+        throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+
+    bool is_socket_ready = false;
+    bool is_pipe_alarmed = false;
+    bool has_timer_alarm = false;
+
+    for (int i = 0; i < num_events; ++i)
+    {
+        if (events[i].data.fd == socket_fd)
+            is_socket_ready = true;
+        if (events[i].data.fd == timer.getDescriptor())
+            has_timer_alarm = true;
+        if (events[i].data.fd == pipe_fd[0])
+            is_pipe_alarmed = true;
+    }
+
+    if (is_pipe_alarmed)
+        return false;
+
+    if (has_timer_alarm && !is_socket_ready)
+    {
+        /// Socket receive timeout. Drain it in case or error, or it may be hide by timeout exception.
+        timer.drain();
+        throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT);
+    }
+
+    return true;
+}
+
+void RemoteQueryExecutorReadContext::setTimer() const
+{
+    /// Did not get packet yet. Init timeout for the next async reading.
+    timer.reset();
+
+    if (receive_timeout.totalMicroseconds())
+        timer.setRelative(receive_timeout);
+}
+
+bool RemoteQueryExecutorReadContext::resumeRoutine()
+{
+    if (is_read_in_progress.load(std::memory_order_relaxed) && !checkTimeout())
+        return false;
+
+    {
+        std::lock_guard guard(fiber_lock);
+        if (!fiber)
+            return false;
+
+        fiber = std::move(fiber).resume();
+    }
+
+    if (exception)
+        std::rethrow_exception(std::move(exception));
+
+    return true;
+}
+
+void RemoteQueryExecutorReadContext::cancel()
+{
+    std::lock_guard guard(fiber_lock);
+    /// It is safe to just destroy fiber - we are not in the process of reading from socket.
+    boost::context::fiber to_destroy = std::move(fiber);
+
+    /// Send something to pipe to cancel executor waiting.
+    uint64_t buf = 0;
+    while (-1 == write(pipe_fd[1], &buf, sizeof(buf)))
+    {
+        if (errno == EAGAIN)
+            break;
+
+        if (errno != EINTR)
+            throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    }
+}
+
+RemoteQueryExecutorReadContext::~RemoteQueryExecutorReadContext()
+{
+    /// socket_fd is closed by Poco::Net::Socket
+    if (epoll_fd != -1)
+        close(epoll_fd);
+    if (pipe_fd[0] != -1)
+        close(pipe_fd[0]);
+    if (pipe_fd[1] != -1)
+        close(pipe_fd[1]);
+}
+
+}
+#endif
--- a/src/DataStreams/RemoteQueryExecutorReadContext.h
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.h
@ -2,26 +2,27 @@

 #if defined(OS_LINUX)

-#include <sys/epoll.h>
+#include <mutex>
+#include <atomic>
 #include <Common/Fiber.h>
 #include <Common/FiberStack.h>
 #include <Common/TimerDescriptor.h>
+#include <Client/Connection.h>
+#include <Poco/Timespan.h>
+
+namespace Poco::Net
+{
+class Socket;
+}

 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int CANNOT_READ_FROM_SOCKET;
-    extern const int CANNOT_OPEN_FILE;
-    extern const int SOCKET_TIMEOUT;
-}
+class MultiplexedConnections;

 class RemoteQueryExecutorReadContext
 {
 public:
-    using Self = RemoteQueryExecutorReadContext;
-
    std::atomic_bool is_read_in_progress = false;
    Packet packet;

@ -42,224 +43,26 @@ public:
    /// We put those descriptors into our own epoll_fd which is used by external executor.
    TimerDescriptor timer{CLOCK_MONOTONIC, 0};
    int socket_fd = -1;
-    int epoll_fd;
-    int pipe_fd[2];
+    int epoll_fd = -1;
+    int pipe_fd[2] = { -1, -1 };

-    explicit RemoteQueryExecutorReadContext(MultiplexedConnections & connections_) : connections(connections_)
-    {
-        epoll_fd = epoll_create(2);
-        if (-1 == epoll_fd)
-            throwFromErrno("Cannot create epoll descriptor", ErrorCodes::CANNOT_OPEN_FILE);
+    explicit RemoteQueryExecutorReadContext(MultiplexedConnections & connections_);
+    ~RemoteQueryExecutorReadContext();

-        if (-1 == pipe2(pipe_fd, O_NONBLOCK))
-            throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
+    bool checkTimeout() const;
+    bool checkTimeoutImpl() const;

-        {
-            epoll_event socket_event;
-            socket_event.events = EPOLLIN | EPOLLPRI;
-            socket_event.data.fd = pipe_fd[0];
+    void setSocket(Poco::Net::Socket & socket);
+    void setTimer() const;

-            if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fd[0], &socket_event))
-                throwFromErrno("Cannot add pipe descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
-        }
-
-        {
-            epoll_event timer_event;
-            timer_event.events = EPOLLIN | EPOLLPRI;
-            timer_event.data.fd = timer.getDescriptor();
-
-            if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, timer_event.data.fd, &timer_event))
-                throwFromErrno("Cannot add timer descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
-        }
-
-        auto routine = Routine{connections, *this};
-        fiber = boost::context::fiber(std::allocator_arg_t(), stack, std::move(routine));
-    }
-
-    void setSocket(Poco::Net::Socket & socket)
-    {
-        int fd = socket.impl()->sockfd();
-        if (fd == socket_fd)
-            return;
-
-        epoll_event socket_event;
-        socket_event.events = EPOLLIN | EPOLLPRI;
-        socket_event.data.fd = fd;
-
-        if (socket_fd != -1)
-        {
-            if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_DEL, socket_fd, &socket_event))
-                throwFromErrno("Cannot remove socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
-        }
-
-        socket_fd = fd;
-
-        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, socket_fd, &socket_event))
-            throwFromErrno("Cannot add socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
-
-        receive_timeout = socket.impl()->getReceiveTimeout();
-    }
-
-    bool checkTimeout() const
-    {
-        try
-        {
-            return checkTimeoutImpl();
-        }
-        catch (DB::Exception & e)
-        {
-            if (last_used_socket)
-                e.addMessage(" while reading from socket ({})", last_used_socket->peerAddress().toString());
-            throw;
-        }
-    }
-
-    bool checkTimeoutImpl() const
-    {
-        epoll_event events[3];
-        events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
-
-        /// Wait for epoll_fd will not block if it was polled externally.
-        int num_events = epoll_wait(epoll_fd, events, 3, 0);
-        if (num_events == -1)
-            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
-
-        bool is_socket_ready = false;
-        bool is_pipe_alarmed = false;
-        bool has_timer_alarm = false;
-
-        for (int i = 0; i < num_events; ++i)
-        {
-            if (events[i].data.fd == socket_fd)
-                is_socket_ready = true;
-            if (events[i].data.fd == timer.getDescriptor())
-                has_timer_alarm = true;
-            if (events[i].data.fd == pipe_fd[0])
-                is_pipe_alarmed = true;
-        }
-
-        if (is_pipe_alarmed)
-            return false;
-
-        if (has_timer_alarm && !is_socket_ready)
-        {
-            /// Socket receive timeout. Drain it in case or error, or it may be hide by timeout exception.
-            timer.drain();
-            throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT);
-        }
-
-        return true;
-    }
-
-    void setTimer() const
-    {
-        /// Did not get packet yet. Init timeout for the next async reading.
-        timer.reset();
-
-        if (receive_timeout.totalMicroseconds())
-            timer.setRelative(receive_timeout);
-    }
-
-    bool resumeRoutine()
-    {
-        if (is_read_in_progress.load(std::memory_order_relaxed) && !checkTimeout())
-            return false;
-
-        {
-            std::lock_guard guard(fiber_lock);
-            if (!fiber)
-                return false;
-
-            fiber = std::move(fiber).resume();
-        }
-
-        if (exception)
-            std::rethrow_exception(std::move(exception));
-
-        return true;
-    }
-
-    void cancel()
-    {
-        std::lock_guard guard(fiber_lock);
-        /// It is safe to just destroy fiber - we are not in the process of reading from socket.
-        boost::context::fiber to_destroy = std::move(fiber);
-
-        /// Send something to pipe to cancel executor waiting.
-        uint64_t buf = 0;
-        while (-1 == write(pipe_fd[1], &buf, sizeof(buf)))
-        {
-            if (errno == EAGAIN)
-                break;
-
-            if (errno != EINTR)
-                throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
-        }
-    }
-
-    ~RemoteQueryExecutorReadContext()
-    {
-        /// socket_fd is closed by Poco::Net::Socket
-        /// timer_fd is closed by TimerDescriptor
-        close(epoll_fd);
-    }
-
-    struct Routine
-    {
-        MultiplexedConnections & connections;
-        Self & read_context;
-
-        struct ReadCallback
-        {
-            Self & read_context;
-            Fiber & fiber;
-
-            void operator()(Poco::Net::Socket & socket)
-            {
-                try
-                {
-                    read_context.setSocket(socket);
-                }
-                catch (DB::Exception & e)
-                {
-                    e.addMessage(" while reading from socket ({})", socket.peerAddress().toString());
-                    throw;
-                }
-
-                read_context.is_read_in_progress.store(true, std::memory_order_relaxed);
-                fiber = std::move(fiber).resume();
-                read_context.is_read_in_progress.store(false, std::memory_order_relaxed);
-            }
-        };
-
-        Fiber operator()(Fiber && sink) const
-        {
-            try
-            {
-                while (true)
-                {
-                    read_context.packet = connections.receivePacketUnlocked(ReadCallback{read_context, sink});
-                    sink = std::move(sink).resume();
-                }
-            }
-            catch (const boost::context::detail::forced_unwind &)
-            {
-                /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
-                /// It should not be caught or it will segfault.
-                /// Other exceptions must be caught
-                throw;
-            }
-            catch (...)
-            {
-                read_context.exception = std::current_exception();
-            }
-
-            return std::move(sink);
-        }
-    };
+    bool resumeRoutine();
+    void cancel();
 };
+
 }
+
 #else
+
 namespace DB
 {
 class RemoteQueryExecutorReadContext
--- a/src/DataStreams/ya.make
+++ b/src/DataStreams/ya.make
@ -39,6 +39,7 @@ SRCS(
    RemoteBlockInputStream.cpp
    RemoteBlockOutputStream.cpp
    RemoteQueryExecutor.cpp
+    RemoteQueryExecutorReadContext.cpp
    SizeLimits.cpp
    SquashingBlockInputStream.cpp
    SquashingBlockOutputStream.cpp
--- a/src/DataStreams/ya.make.in
+++ b/src/DataStreams/ya.make.in
@ -12,7 +12,7 @@ NO_COMPILER_WARNINGS()


 SRCS(
-<? find . -name '*.cpp' | grep -v -F tests | sed 's/^\.\//    /' | sort ?>
+<? find . -name '*.cpp' | grep -v -P 'tests|PostgreSQL' | sed 's/^\.\//    /' | sort ?>
 )

 END()
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@ -10,12 +10,15 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeOneElementTuple.h>

 #include <Parsers/IAST.h>

 #include <Common/typeid_cast.h>
 #include <Common/assert_cast.h>

+#include <Core/NamesAndTypes.h>
+

 namespace DB
 {
@ -145,10 +148,57 @@ namespace

        offset_values.resize(i);
    }
+
+    ColumnPtr arrayOffsetsToSizes(const IColumn & column)
+    {
+        const auto & column_offsets = assert_cast<const ColumnArray::ColumnOffsets &>(column);
+        MutableColumnPtr column_sizes = column_offsets.cloneEmpty();
+
+        if (column_offsets.empty())
+            return column_sizes;
+
+        const auto & offsets_data = column_offsets.getData();
+        auto & sizes_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_sizes).getData();
+
+        sizes_data.resize(offsets_data.size());
+
+        IColumn::Offset prev_offset = 0;
+        for (size_t i = 0, size = offsets_data.size(); i < size; ++i)
+        {
+            auto current_offset = offsets_data[i];
+            sizes_data[i] = current_offset - prev_offset;
+            prev_offset =  current_offset;
+        }
+
+        return column_sizes;
+    }
+
+    ColumnPtr arraySizesToOffsets(const IColumn & column)
+    {
+        const auto & column_sizes = assert_cast<const ColumnArray::ColumnOffsets &>(column);
+        MutableColumnPtr column_offsets = column_sizes.cloneEmpty();
+
+        if (column_sizes.empty())
+            return column_offsets;
+
+        const auto & sizes_data = column_sizes.getData();
+        auto & offsets_data = assert_cast<ColumnArray::ColumnOffsets &>(*column_offsets).getData();
+
+        offsets_data.resize(sizes_data.size());
+
+        IColumn::Offset prev_offset = 0;
+        for (size_t i = 0, size = sizes_data.size(); i < size; ++i)
+        {
+            prev_offset += sizes_data[i];
+            offsets_data[i] = prev_offset;
+        }
+
+        return column_offsets;
+    }
 }


-void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
+void DataTypeArray::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
 {
    path.push_back(Substream::ArraySizes);
    callback(path, *this);
@ -158,7 +208,7 @@ void DataTypeArray::enumerateStreams(const StreamCallback & callback, SubstreamP
 }


-void DataTypeArray::serializeBinaryBulkStatePrefix(
+void DataTypeArray::serializeBinaryBulkStatePrefixImpl(
    SerializeBinaryBulkSettings & settings,
    SerializeBinaryBulkStatePtr & state) const
 {
@ -168,7 +218,7 @@ void DataTypeArray::serializeBinaryBulkStatePrefix(
 }


-void DataTypeArray::serializeBinaryBulkStateSuffix(
+void DataTypeArray::serializeBinaryBulkStateSuffixImpl(
    SerializeBinaryBulkSettings & settings,
    SerializeBinaryBulkStatePtr & state) const
 {
@ -178,7 +228,7 @@ void DataTypeArray::serializeBinaryBulkStateSuffix(
 }


-void DataTypeArray::deserializeBinaryBulkStatePrefix(
+void DataTypeArray::deserializeBinaryBulkStatePrefixImpl(
    DeserializeBinaryBulkSettings & settings,
    DeserializeBinaryBulkStatePtr & state) const
 {
@ -188,7 +238,7 @@ void DataTypeArray::deserializeBinaryBulkStatePrefix(
 }


-void DataTypeArray::serializeBinaryBulkWithMultipleStreams(
+void DataTypeArray::serializeBinaryBulkWithMultipleStreamsImpl(
    const IColumn & column,
    size_t offset,
    size_t limit,
@ -235,44 +285,52 @@ void DataTypeArray::serializeBinaryBulkWithMultipleStreams(
 }


-void DataTypeArray::deserializeBinaryBulkWithMultipleStreams(
+void DataTypeArray::deserializeBinaryBulkWithMultipleStreamsImpl(
    IColumn & column,
    size_t limit,
    DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsCache * cache) const
 {
    ColumnArray & column_array = typeid_cast<ColumnArray &>(column);
-
    settings.path.push_back(Substream::ArraySizes);
-    if (auto * stream = settings.getter(settings.path))
+
+    if (auto cached_column = getFromSubstreamsCache(cache, settings.path))
+    {
+        column_array.getOffsetsPtr() = arraySizesToOffsets(*cached_column);
+    }
+    else if (auto * stream = settings.getter(settings.path))
    {
        if (settings.position_independent_encoding)
            deserializeArraySizesPositionIndependent(column, *stream, limit);
        else
            DataTypeNumber<ColumnArray::Offset>().deserializeBinaryBulk(column_array.getOffsetsColumn(), *stream, limit, 0);
+
+        addToSubstreamsCache(cache, settings.path, arrayOffsetsToSizes(column_array.getOffsetsColumn()));
    }

    settings.path.back() = Substream::ArrayElements;

    ColumnArray::Offsets & offset_values = column_array.getOffsets();
-    IColumn & nested_column = column_array.getData();
+    ColumnPtr & nested_column = column_array.getDataPtr();

    /// Number of values corresponding with `offset_values` must be read.
    size_t last_offset = offset_values.back();
-    if (last_offset < nested_column.size())
+    if (last_offset < nested_column->size())
        throw Exception("Nested column is longer than last offset", ErrorCodes::LOGICAL_ERROR);
-    size_t nested_limit = last_offset - nested_column.size();
+    size_t nested_limit = last_offset - nested_column->size();

    /// Adjust value size hint. Divide it to the average array size.
    settings.avg_value_size_hint = nested_limit ? settings.avg_value_size_hint / nested_limit * offset_values.size() : 0;

-    nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state);
+    nested->deserializeBinaryBulkWithMultipleStreams(nested_column, nested_limit, settings, state, cache);
+
    settings.path.pop_back();

    /// Check consistency between offsets and elements subcolumns.
    /// But if elements column is empty - it's ok for columns of Nested types that was added by ALTER.
-    if (!nested_column.empty() && nested_column.size() != last_offset)
-        throw ParsingException("Cannot read all array values: read just " + toString(nested_column.size()) + " of " + toString(last_offset),
+    if (!nested_column->empty() && nested_column->size() != last_offset)
+        throw ParsingException("Cannot read all array values: read just " + toString(nested_column->size()) + " of " + toString(last_offset),
            ErrorCodes::CANNOT_READ_ALL_DATA);
 }

@ -530,6 +588,44 @@ bool DataTypeArray::equals(const IDataType & rhs) const
    return typeid(rhs) == typeid(*this) && nested->equals(*static_cast<const DataTypeArray &>(rhs).nested);
 }

+DataTypePtr DataTypeArray::tryGetSubcolumnType(const String & subcolumn_name) const
+{
+    return tryGetSubcolumnTypeImpl(subcolumn_name, 0);
+}
+
+DataTypePtr DataTypeArray::tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const
+{
+    if (subcolumn_name == "size" + std::to_string(level))
+        return createOneElementTuple(std::make_shared<DataTypeUInt64>(), subcolumn_name, false);
+
+    DataTypePtr subcolumn;
+    if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
+        subcolumn = nested_array->tryGetSubcolumnTypeImpl(subcolumn_name, level + 1);
+    else
+        subcolumn = nested->tryGetSubcolumnType(subcolumn_name);
+
+    return (subcolumn ? std::make_shared<DataTypeArray>(std::move(subcolumn)) : subcolumn);
+}
+
+ColumnPtr DataTypeArray::getSubcolumn(const String & subcolumn_name, const IColumn & column) const
+{
+    return getSubcolumnImpl(subcolumn_name, column, 0);
+}
+
+ColumnPtr DataTypeArray::getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const
+{
+    const auto & column_array = assert_cast<const ColumnArray &>(column);
+    if (subcolumn_name == "size" + std::to_string(level))
+        return arrayOffsetsToSizes(column_array.getOffsetsColumn());
+
+    ColumnPtr subcolumn;
+    if (const auto * nested_array = typeid_cast<const DataTypeArray *>(nested.get()))
+        subcolumn = nested_array->getSubcolumnImpl(subcolumn_name, column_array.getData(), level + 1);
+    else
+        subcolumn = nested->getSubcolumn(subcolumn_name, column_array.getData());
+
+    return ColumnArray::create(subcolumn, column_array.getOffsetsPtr());
+}

 size_t DataTypeArray::getNumberOfDimensions() const
 {
--- a/src/DataTypes/DataTypeArray.h
+++ b/src/DataTypes/DataTypeArray.h
@ -57,32 +57,33 @@ public:
      * This is necessary, because when implementing nested structures, several arrays can have common sizes.
      */

-    void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
+    void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;

-    void serializeBinaryBulkStatePrefix(
+    void serializeBinaryBulkStatePrefixImpl(
            SerializeBinaryBulkSettings & settings,
            SerializeBinaryBulkStatePtr & state) const override;

-    void serializeBinaryBulkStateSuffix(
+    void serializeBinaryBulkStateSuffixImpl(
            SerializeBinaryBulkSettings & settings,
            SerializeBinaryBulkStatePtr & state) const override;

-    void deserializeBinaryBulkStatePrefix(
+    void deserializeBinaryBulkStatePrefixImpl(
            DeserializeBinaryBulkSettings & settings,
            DeserializeBinaryBulkStatePtr & state) const override;

-    void serializeBinaryBulkWithMultipleStreams(
+    void serializeBinaryBulkWithMultipleStreamsImpl(
            const IColumn & column,
            size_t offset,
            size_t limit,
            SerializeBinaryBulkSettings & settings,
            SerializeBinaryBulkStatePtr & state) const override;

-    void deserializeBinaryBulkWithMultipleStreams(
+    void deserializeBinaryBulkWithMultipleStreamsImpl(
            IColumn & column,
            size_t limit,
            DeserializeBinaryBulkSettings & settings,
-            DeserializeBinaryBulkStatePtr & state) const override;
+            DeserializeBinaryBulkStatePtr & state,
+            SubstreamsCache * cache) const override;

    void serializeProtobuf(const IColumn & column,
                           size_t row_num,
@ -111,10 +112,17 @@ public:
        return nested->isValueUnambiguouslyRepresentedInFixedSizeContiguousMemoryRegion();
    }

+    DataTypePtr tryGetSubcolumnType(const String & subcolumn_name) const override;
+    ColumnPtr getSubcolumn(const String & subcolumn_name, const IColumn & column) const override;
+
    const DataTypePtr & getNestedType() const { return nested; }

    /// 1 for plain array, 2 for array of arrays and so on.
    size_t getNumberOfDimensions() const;
+
+private:
+    ColumnPtr getSubcolumnImpl(const String & subcolumn_name, const IColumn & column, size_t level) const;
+    DataTypePtr tryGetSubcolumnTypeImpl(const String & subcolumn_name, size_t level) const;
 };

 }
--- a/src/DataTypes/DataTypeCustom.h
+++ b/src/DataTypes/DataTypeCustom.h
@ -3,6 +3,7 @@
 #include <memory>
 #include <cstddef>
 #include <Core/Types.h>
+#include <DataTypes/IDataType.h>

 namespace DB
 {
@ -62,8 +63,51 @@ public:
    virtual void serializeTextXML(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const = 0;
 };

+/** Allows to customize an existing data type by representation with custom substreams.
+  * Customized data type will be serialized/deserialized to files with different names than base type,
+  * but binary and text representation will be unchanged.
+  * E.g it can be used for reading single subcolumns of complex types.
+  */
+class IDataTypeCustomStreams
+{
+public:
+    virtual ~IDataTypeCustomStreams() = default;
+
+    virtual void enumerateStreams(
+        const IDataType::StreamCallback & callback,
+        IDataType::SubstreamPath & path) const = 0;
+
+    virtual void serializeBinaryBulkStatePrefix(
+        IDataType::SerializeBinaryBulkSettings & settings,
+        IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
+
+    virtual void serializeBinaryBulkStateSuffix(
+        IDataType::SerializeBinaryBulkSettings & settings,
+        IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
+
+    virtual void deserializeBinaryBulkStatePrefix(
+        IDataType::DeserializeBinaryBulkSettings & settings,
+        IDataType::DeserializeBinaryBulkStatePtr & state) const = 0;
+
+    virtual void serializeBinaryBulkWithMultipleStreams(
+        const IColumn & column,
+        size_t offset,
+        size_t limit,
+        IDataType::SerializeBinaryBulkSettings & settings,
+        IDataType::SerializeBinaryBulkStatePtr & state) const = 0;
+
+    virtual void deserializeBinaryBulkWithMultipleStreams(
+        ColumnPtr & column,
+        size_t limit,
+        IDataType::DeserializeBinaryBulkSettings & settings,
+        IDataType::DeserializeBinaryBulkStatePtr & state,
+        IDataType::SubstreamsCache * cache) const = 0;
+};
+
 using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
 using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
+using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
+

 /** Describe a data type customization
 */
@ -71,9 +115,15 @@ struct DataTypeCustomDesc
 {
    DataTypeCustomNamePtr name;
    DataTypeCustomTextSerializationPtr text_serialization;
+    DataTypeCustomStreamsPtr streams;

-    DataTypeCustomDesc(DataTypeCustomNamePtr name_, DataTypeCustomTextSerializationPtr text_serialization_)
-            : name(std::move(name_)), text_serialization(std::move(text_serialization_)) {}
+    DataTypeCustomDesc(
+        DataTypeCustomNamePtr name_,
+        DataTypeCustomTextSerializationPtr text_serialization_ = nullptr,
+        DataTypeCustomStreamsPtr streams_ = nullptr)
+    : name(std::move(name_))
+    , text_serialization(std::move(text_serialization_))
+    , streams(std::move(streams_)) {}
 };

 using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
--- a/src/DataTypes/DataTypeCustom_fwd.h
+++ b/src/DataTypes/DataTypeCustom_fwd.h
@ -0,0 +1,18 @@
+#pragma once
+
+#include <memory>
+
+namespace DB
+{
+
+class IDataTypeCustomName;
+class IDataTypeCustomTextSerialization;
+class IDataTypeCustomStreams;
+struct DataTypeCustomDesc;
+
+using DataTypeCustomNamePtr = std::unique_ptr<const IDataTypeCustomName>;
+using DataTypeCustomTextSerializationPtr = std::unique_ptr<const IDataTypeCustomTextSerialization>;
+using DataTypeCustomStreamsPtr = std::unique_ptr<const IDataTypeCustomStreams>;
+using DataTypeCustomDescPtr = std::unique_ptr<DataTypeCustomDesc>;
+
+}
--- a/src/DataTypes/DataTypeFactory.cpp
+++ b/src/DataTypes/DataTypeFactory.cpp
@ -79,6 +79,16 @@ DataTypePtr DataTypeFactory::get(const String & family_name_param, const ASTPtr
    return findCreatorByName(family_name)(parameters);
 }

+DataTypePtr DataTypeFactory::getCustom(DataTypeCustomDescPtr customization) const
+{
+    if (!customization->name)
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot create custom type without name");
+
+    auto type = get(customization->name->getName());
+    type->setCustomization(std::move(customization));
+    return type;
+}
+

 void DataTypeFactory::registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness)
 {
--- a/src/DataTypes/DataTypeFactory.h
+++ b/src/DataTypes/DataTypeFactory.h
@ -3,6 +3,7 @@
 #include <DataTypes/IDataType.h>
 #include <Parsers/IAST_fwd.h>
 #include <Common/IFactoryWithAliases.h>
+#include <DataTypes/DataTypeCustom_fwd.h>


 #include <functional>
@ -33,6 +34,7 @@ public:
    DataTypePtr get(const String & full_name) const;
    DataTypePtr get(const String & family_name, const ASTPtr & parameters) const;
    DataTypePtr get(const ASTPtr & ast) const;
+    DataTypePtr getCustom(DataTypeCustomDescPtr customization) const;

    /// Register a type family by its name.
    void registerDataType(const String & family_name, Value creator, CaseSensitiveness case_sensitiveness = CaseSensitive);
@ -84,5 +86,6 @@ void registerDataTypeLowCardinality(DataTypeFactory & factory);
 void registerDataTypeDomainIPv4AndIPv6(DataTypeFactory & factory);
 void registerDataTypeDomainSimpleAggregateFunction(DataTypeFactory & factory);
 void registerDataTypeDomainGeo(DataTypeFactory & factory);
+void registerDataTypeOneElementTuple(DataTypeFactory & factory);

 }
--- a/src/DataTypes/DataTypeLowCardinality.cpp
+++ b/src/DataTypes/DataTypeLowCardinality.cpp
@ -50,7 +50,7 @@ DataTypeLowCardinality::DataTypeLowCardinality(DataTypePtr dictionary_type_)
                        + dictionary_type->getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 }

-void DataTypeLowCardinality::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
+void DataTypeLowCardinality::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
 {
    path.push_back(Substream::DictionaryKeys);
    dictionary_type->enumerateStreams(callback, path);
@ -243,7 +243,7 @@ static DeserializeStateLowCardinality * checkAndGetLowCardinalityDeserializeStat
    return low_cardinality_state;
 }

-void DataTypeLowCardinality::serializeBinaryBulkStatePrefix(
+void DataTypeLowCardinality::serializeBinaryBulkStatePrefixImpl(
    SerializeBinaryBulkSettings & settings,
    SerializeBinaryBulkStatePtr & state) const
 {
@ -263,7 +263,7 @@ void DataTypeLowCardinality::serializeBinaryBulkStatePrefix(
    state = std::make_shared<SerializeStateLowCardinality>(key_version);
 }

-void DataTypeLowCardinality::serializeBinaryBulkStateSuffix(
+void DataTypeLowCardinality::serializeBinaryBulkStateSuffixImpl(
    SerializeBinaryBulkSettings & settings,
    SerializeBinaryBulkStatePtr & state) const
 {
@ -289,7 +289,7 @@ void DataTypeLowCardinality::serializeBinaryBulkStateSuffix(
    }
 }

-void DataTypeLowCardinality::deserializeBinaryBulkStatePrefix(
+void DataTypeLowCardinality::deserializeBinaryBulkStatePrefixImpl(
    DeserializeBinaryBulkSettings & settings,
    DeserializeBinaryBulkStatePtr & state) const
 {
@ -482,7 +482,7 @@ namespace
    }
 }

-void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams(
+void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreamsImpl(
    const IColumn & column,
    size_t offset,
    size_t limit,
@ -579,11 +579,12 @@ void DataTypeLowCardinality::serializeBinaryBulkWithMultipleStreams(
    index_version.getDataType()->serializeBinaryBulk(*positions, *indexes_stream, 0, num_rows);
 }

-void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams(
+void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreamsImpl(
    IColumn & column,
    size_t limit,
    DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsCache * /* cache */) const
 {
    ColumnLowCardinality & low_cardinality_column = typeid_cast<ColumnLowCardinality &>(column);

--- a/src/DataTypes/DataTypeLowCardinality.h
+++ b/src/DataTypes/DataTypeLowCardinality.h
@ -22,32 +22,33 @@ public:
    const char * getFamilyName() const override { return "LowCardinality"; }
    TypeIndex getTypeId() const override { return TypeIndex::LowCardinality; }

-    void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
+    void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;

-    void serializeBinaryBulkStatePrefix(
+    void serializeBinaryBulkStatePrefixImpl(
            SerializeBinaryBulkSettings & settings,
            SerializeBinaryBulkStatePtr & state) const override;

-    void serializeBinaryBulkStateSuffix(
+    void serializeBinaryBulkStateSuffixImpl(
            SerializeBinaryBulkSettings & settings,
            SerializeBinaryBulkStatePtr & state) const override;

-    void deserializeBinaryBulkStatePrefix(
+    void deserializeBinaryBulkStatePrefixImpl(
            DeserializeBinaryBulkSettings & settings,
            DeserializeBinaryBulkStatePtr & state) const override;

-    void serializeBinaryBulkWithMultipleStreams(
+    void serializeBinaryBulkWithMultipleStreamsImpl(
            const IColumn & column,
            size_t offset,
            size_t limit,
            SerializeBinaryBulkSettings & settings,
            SerializeBinaryBulkStatePtr & state) const override;

-    void deserializeBinaryBulkWithMultipleStreams(
+    void deserializeBinaryBulkWithMultipleStreamsImpl(
            IColumn & column,
            size_t limit,
            DeserializeBinaryBulkSettings & settings,
-            DeserializeBinaryBulkStatePtr & state) const override;
+            DeserializeBinaryBulkStatePtr & state,
+            SubstreamsCache * cache) const override;

    void serializeBinary(const Field & field, WriteBuffer & ostr) const override;
    void deserializeBinary(Field & field, ReadBuffer & istr) const override;
--- a/src/DataTypes/DataTypeMap.cpp
+++ b/src/DataTypes/DataTypeMap.cpp
@ -278,34 +278,34 @@ void DataTypeMap::deserializeTextCSV(IColumn & column, ReadBuffer & istr, const
 }


-void DataTypeMap::enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const
+void DataTypeMap::enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const
 {
    nested->enumerateStreams(callback, path);
 }

-void DataTypeMap::serializeBinaryBulkStatePrefix(
+void DataTypeMap::serializeBinaryBulkStatePrefixImpl(
    SerializeBinaryBulkSettings & settings,
    SerializeBinaryBulkStatePtr & state) const
 {
    nested->serializeBinaryBulkStatePrefix(settings, state);
 }

-void DataTypeMap::serializeBinaryBulkStateSuffix(
+void DataTypeMap::serializeBinaryBulkStateSuffixImpl(
    SerializeBinaryBulkSettings & settings,
    SerializeBinaryBulkStatePtr & state) const
 {
    nested->serializeBinaryBulkStateSuffix(settings, state);
 }

-void DataTypeMap::deserializeBinaryBulkStatePrefix(
-        DeserializeBinaryBulkSettings & settings,
-        DeserializeBinaryBulkStatePtr & state) const
+void DataTypeMap::deserializeBinaryBulkStatePrefixImpl(
+    DeserializeBinaryBulkSettings & settings,
+    DeserializeBinaryBulkStatePtr & state) const
 {
    nested->deserializeBinaryBulkStatePrefix(settings, state);
 }


-void DataTypeMap::serializeBinaryBulkWithMultipleStreams(
+void DataTypeMap::serializeBinaryBulkWithMultipleStreamsImpl(
    const IColumn & column,
    size_t offset,
    size_t limit,
@ -315,13 +315,15 @@ void DataTypeMap::serializeBinaryBulkWithMultipleStreams(
    nested->serializeBinaryBulkWithMultipleStreams(extractNestedColumn(column), offset, limit, settings, state);
 }

-void DataTypeMap::deserializeBinaryBulkWithMultipleStreams(
+void DataTypeMap::deserializeBinaryBulkWithMultipleStreamsImpl(
    IColumn & column,
    size_t limit,
    DeserializeBinaryBulkSettings & settings,
-    DeserializeBinaryBulkStatePtr & state) const
+    DeserializeBinaryBulkStatePtr & state,
+    SubstreamsCache * cache) const
 {
-    nested->deserializeBinaryBulkWithMultipleStreams(extractNestedColumn(column), limit, settings, state);
+    auto & column_map = assert_cast<ColumnMap &>(column);
+    nested->deserializeBinaryBulkWithMultipleStreams(column_map.getNestedColumnPtr(), limit, settings, state, cache);
 }

 void DataTypeMap::serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const
--- a/src/DataTypes/DataTypeMap.h
+++ b/src/DataTypes/DataTypeMap.h
@ -46,34 +46,33 @@ public:
    void deserializeTextCSV(IColumn & column, ReadBuffer & istr, const FormatSettings &) const override;


-    /** Each sub-column in a map is serialized in separate stream.
-      */
-    void enumerateStreams(const StreamCallback & callback, SubstreamPath & path) const override;
+    void enumerateStreamsImpl(const StreamCallback & callback, SubstreamPath & path) const override;

-    void serializeBinaryBulkStatePrefix(
+    void serializeBinaryBulkStatePrefixImpl(
           SerializeBinaryBulkSettings & settings,
           SerializeBinaryBulkStatePtr & state) const override;

-    void serializeBinaryBulkStateSuffix(
+    void serializeBinaryBulkStateSuffixImpl(
           SerializeBinaryBulkSettings & settings,
           SerializeBinaryBulkStatePtr & state) const override;

-    void deserializeBinaryBulkStatePrefix(
+    void deserializeBinaryBulkStatePrefixImpl(
           DeserializeBinaryBulkSettings & settings,
           DeserializeBinaryBulkStatePtr & state) const override;

-    void serializeBinaryBulkWithMultipleStreams(
+    void serializeBinaryBulkWithMultipleStreamsImpl(
           const IColumn & column,
           size_t offset,
           size_t limit,
           SerializeBinaryBulkSettings & settings,
           SerializeBinaryBulkStatePtr & state) const override;

-    void deserializeBinaryBulkWithMultipleStreams(
+    void deserializeBinaryBulkWithMultipleStreamsImpl(
           IColumn & column,
           size_t limit,
           DeserializeBinaryBulkSettings & settings,
-           DeserializeBinaryBulkStatePtr & state) const override;
+           DeserializeBinaryBulkStatePtr & state,
+           SubstreamsCache * cache) const override;

    void serializeProtobuf(const IColumn & column, size_t row_num, ProtobufWriter & protobuf, size_t & value_index) const override;
    void deserializeProtobuf(IColumn & column, ProtobufReader & protobuf, bool allow_add_row, bool & row_added) const override;
--- a/src/DataTypes/DataTypeNested.cpp
+++ b/src/DataTypes/DataTypeNested.cpp
@ -0,0 +1,76 @@
+#include <DataTypes/DataTypeNested.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/Operators.h>
+#include <Common/quoteString.h>
+#include <Parsers/ASTNameTypePair.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int EMPTY_DATA_PASSED;
+    extern const int BAD_ARGUMENTS;
+}
+
+String DataTypeNestedCustomName::getName() const
+{
+    WriteBufferFromOwnString s;
+    s << "Nested(";
+    for (size_t i = 0; i < elems.size(); ++i)
+    {
+        if (i != 0)
+            s << ", ";
+
+        s << backQuoteIfNeed(names[i]) << ' ';
+        s << elems[i]->getName();
+    }
+    s << ")";
+
+    return s.str();
+}
+
+static std::pair<DataTypePtr, DataTypeCustomDescPtr> create(const ASTPtr & arguments)
+{
+    if (!arguments || arguments->children.empty())
+        throw Exception("Nested cannot be empty", ErrorCodes::EMPTY_DATA_PASSED);
+
+    DataTypes nested_types;
+    Strings nested_names;
+    nested_types.reserve(arguments->children.size());
+    nested_names.reserve(arguments->children.size());
+
+    for (const auto & child : arguments->children)
+    {
+        const auto * name_type = child->as<ASTNameTypePair>();
+        if (!name_type)
+            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Data type Nested accepts only pairs with name and type");
+
+        auto nested_type = DataTypeFactory::instance().get(name_type->type);
+        nested_types.push_back(std::move(nested_type));
+        nested_names.push_back(name_type->name);
+    }
+
+    auto data_type = std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(nested_types, nested_names));
+    auto custom_name = std::make_unique<DataTypeNestedCustomName>(nested_types, nested_names);
+
+    return std::make_pair(std::move(data_type), std::make_unique<DataTypeCustomDesc>(std::move(custom_name), nullptr));
+}
+
+void registerDataTypeNested(DataTypeFactory & factory)
+{
+    return factory.registerDataTypeCustom("Nested", create);
+}
+
+DataTypePtr createNested(const DataTypes & types, const Names & names)
+{
+    auto custom_desc = std::make_unique<DataTypeCustomDesc>(
+        std::make_unique<DataTypeNestedCustomName>(types, names));
+
+    return DataTypeFactory::instance().getCustom(std::move(custom_desc));
+}
+
+}
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`Subproject commit 8e7e905854714a7fbb49c124dbc45c7bd4b98e07`
				`@ -0,0 +1 @@`
				`Subproject commit 58d2a028d1600225ac3a478d6b3a06ba2f0c01f6`