Merge branch 'master' of https://github.com/ClickHouse/ClickHouse into gyuton-DOCSUP-10114-Document-the-quantileExact-functions

2024-11-25 17:12:03 +00:00 · 2021-06-22 16:39:25 +03:00 · 2021-06-22 16:39:25 +03:00 · 22b5e0e6c2
commit 22b5e0e6c2
parent 9f36eb6210 2987d11268
886 changed files with 15891 additions and 9777 deletions
--- a/.gitmodules
+++ b/.gitmodules
@ -103,7 +103,7 @@
 	url = https://github.com/ClickHouse-Extras/fastops
 [submodule "contrib/orc"]
 	path = contrib/orc
-	url = https://github.com/apache/orc
+	url = https://github.com/ClickHouse-Extras/orc
 [submodule "contrib/sparsehash-c11"]
 	path = contrib/sparsehash-c11
 	url = https://github.com/sparsehash/sparsehash-c11.git
@ -210,9 +210,6 @@
 [submodule "contrib/fast_float"]
 	path = contrib/fast_float
 	url = https://github.com/fastfloat/fast_float
-[submodule "contrib/libpqxx"]
-	path = contrib/libpqxx
-	url = https://github.com/jtv/libpqxx
 [submodule "contrib/libpq"]
 	path = contrib/libpq
 	url = https://github.com/ClickHouse-Extras/libpq
@ -231,3 +228,6 @@
 [submodule "contrib/yaml-cpp"]
 	path = contrib/yaml-cpp
 	url = https://github.com/ClickHouse-Extras/yaml-cpp.git
+[submodule "contrib/libpqxx"]
+	path = contrib/libpqxx
+	url = https://github.com/ClickHouse-Extras/libpqxx.git
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,8 +2,6 @@

 #### Upgrade Notes

-* One bug has been found after release: [#25187](https://github.com/ClickHouse/ClickHouse/issues/25187).
-* Do not upgrade if you have partition key with `UUID`.
 * `zstd` compression library is updated to v1.5.0. You may get messages about "checksum does not match" in replication. These messages are expected due to update of compression algorithm and you can ignore them. These messages are informational and do not indicate any kinds of undesired behaviour.
 * The setting `compile_expressions` is enabled by default. Although it has been heavily tested on variety of scenarios, if you find some undesired behaviour on your servers, you can try turning this setting off.
 * Values of `UUID` type cannot be compared with integer. For example, instead of writing `uuid != 0` type `uuid != '00000000-0000-0000-0000-000000000000'`.
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -183,24 +183,20 @@ endif ()
 # Make sure the final executable has symbols exported
 set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -rdynamic")

-if (OS_LINUX)
 find_program (OBJCOPY_PATH NAMES "llvm-objcopy" "llvm-objcopy-12" "llvm-objcopy-11" "llvm-objcopy-10" "llvm-objcopy-9" "llvm-objcopy-8" "objcopy")
 if (OBJCOPY_PATH)
   message(STATUS "Using objcopy: ${OBJCOPY_PATH}.")
-
-        if (ARCH_AMD64)
-            set(OBJCOPY_ARCH_OPTIONS -O elf64-x86-64 -B i386)
-        elseif (ARCH_AARCH64)
-            set(OBJCOPY_ARCH_OPTIONS -O elf64-aarch64 -B aarch64)
-        endif ()
 else ()
  message(FATAL_ERROR "Cannot find objcopy.")
 endif ()
-endif ()

 if (OS_DARWIN)
-    set(WHOLE_ARCHIVE -all_load)
-    set(NO_WHOLE_ARCHIVE -noall_load)
+    # The `-all_load` flag forces loading of all symbols from all libraries,
+    # and leads to multiply-defined symbols. This flag allows force loading
+    # from a _specific_ library, which is what we need.
+    set(WHOLE_ARCHIVE -force_load)
+    # The `-noall_load` flag is the default and now obsolete.
+    set(NO_WHOLE_ARCHIVE "")
 else ()
    set(WHOLE_ARCHIVE --whole-archive)
    set(NO_WHOLE_ARCHIVE --no-whole-archive)
--- a/README.md
+++ b/README.md
@ -15,4 +15,4 @@ ClickHouse® is an open-source column-oriented database management system that a
 * You can also [fill this form](https://clickhouse.tech/#meet) to meet Yandex ClickHouse team in person.

 ## Upcoming Events
-* [SF Bay Area ClickHouse Community Meetup (online)](https://www.meetup.com/San-Francisco-Bay-Area-ClickHouse-Meetup/events/278144089/) on 16 June 2021.
+* [China ClickHouse Community Meetup (online)](http://hdxu.cn/rhbfZ) on 26 June 2021.
--- a/base/bridge/IBridge.cpp
+++ b/base/bridge/IBridge.cpp
@ -1,14 +1,22 @@
 #include "IBridge.h"

-#include <IO/ReadHelpers.h>
 #include <boost/program_options.hpp>
 #include <Poco/Net/NetException.h>
 #include <Poco/Util/HelpFormatter.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Formats/registerFormats.h>
+
 #include <common/logger_useful.h>
+#include <common/range.h>
+
+#include <Common/StringUtils/StringUtils.h>
 #include <Common/SensitiveDataMasker.h>
+#include <common/errnoToString.h>
+#include <IO/ReadHelpers.h>
+#include <Formats/registerFormats.h>
 #include <Server/HTTP/HTTPServer.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteHelpers.h>
+#include <sys/time.h>
+#include <sys/resource.h>

 #if USE_ODBC
 #    include <Poco/Data/ODBC/Connector.h>
@ -163,6 +171,31 @@ void IBridge::initialize(Application & self)
    max_server_connections = config().getUInt("max-server-connections", 1024);
    keep_alive_timeout = config().getUInt64("keep-alive-timeout", 10);

+    struct rlimit limit;
+    const UInt64 gb = 1024 * 1024 * 1024;
+
+    /// Set maximum RSS to 1 GiB.
+    limit.rlim_max = limit.rlim_cur = gb;
+    if (setrlimit(RLIMIT_RSS, &limit))
+        LOG_WARNING(log, "Unable to set maximum RSS to 1GB: {} (current rlim_cur={}, rlim_max={})",
+                    errnoToString(errno), limit.rlim_cur, limit.rlim_max);
+
+    if (!getrlimit(RLIMIT_RSS, &limit))
+        LOG_INFO(log, "RSS limit: cur={}, max={}", limit.rlim_cur, limit.rlim_max);
+
+    try
+    {
+        const auto oom_score = toString(config().getUInt64("bridge_oom_score", 500));
+        WriteBufferFromFile buf("/proc/self/oom_score_adj");
+        buf.write(oom_score.data(), oom_score.size());
+        buf.close();
+        LOG_INFO(log, "OOM score is set to {}", oom_score);
+    }
+    catch (const Exception & e)
+    {
+        LOG_WARNING(log, "Failed to set OOM score, error: {}", e.what());
+    }
+
    initializeTerminationAndSignalProcessing();

    ServerApplication::initialize(self); // NOLINT
@ -214,7 +247,7 @@ int IBridge::main(const std::vector<std::string> & /*args*/)

        server.stop();

-        for (size_t count : ext::range(1, 6))
+        for (size_t count : collections::range(1, 6))
        {
            if (server.currentConnections() == 0)
                break;
--- a/base/common/DecomposedFloat.h
+++ b/base/common/DecomposedFloat.h
@ -91,10 +91,12 @@ struct DecomposedFloat


    /// Compare float with integer of arbitrary width (both signed and unsigned are supported). Assuming two's complement arithmetic.
+    /// This function is generic, big integers (128, 256 bit) are supported as well.
    /// Infinities are compared correctly. NaNs are treat similarly to infinities, so they can be less than all numbers.
    /// (note that we need total order)
+    /// Returns -1, 0 or 1.
    template <typename Int>
-    int compare(Int rhs)
+    int compare(Int rhs) const
    {
        if (rhs == 0)
            return sign();
@ -137,10 +139,11 @@ struct DecomposedFloat
        if (normalized_exponent() >= static_cast<int16_t>(8 * sizeof(Int) - is_signed_v<Int>))
            return is_negative() ? -1 : 1;

-        using UInt = make_unsigned_t<Int>;
+        using UInt = std::conditional_t<(sizeof(Int) > sizeof(typename Traits::UInt)), make_unsigned_t<Int>, typename Traits::UInt>;
        UInt uint_rhs = rhs < 0 ? -rhs : rhs;

        /// Smaller octave: abs(rhs) < abs(float)
+        /// FYI, TIL: octave is also called "binade", https://en.wikipedia.org/wiki/Binade
        if (uint_rhs < (static_cast<UInt>(1) << normalized_exponent()))
            return is_negative() ? -1 : 1;

@ -154,11 +157,11 @@ struct DecomposedFloat

        bool large_and_always_integer = normalized_exponent() >= static_cast<int16_t>(Traits::mantissa_bits);

-        typename Traits::UInt a = large_and_always_integer
-            ? mantissa() << (normalized_exponent() - Traits::mantissa_bits)
-            : mantissa() >> (Traits::mantissa_bits - normalized_exponent());
+        UInt a = large_and_always_integer
+            ? static_cast<UInt>(mantissa()) << (normalized_exponent() - Traits::mantissa_bits)
+            : static_cast<UInt>(mantissa()) >> (Traits::mantissa_bits - normalized_exponent());

-        typename Traits::UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());
+        UInt b = uint_rhs - (static_cast<UInt>(1) << normalized_exponent());

        if (a < b)
            return is_negative() ? 1 : -1;
@ -175,37 +178,37 @@ struct DecomposedFloat


    template <typename Int>
-    bool equals(Int rhs)
+    bool equals(Int rhs) const
    {
        return compare(rhs) == 0;
    }

    template <typename Int>
-    bool notEquals(Int rhs)
+    bool notEquals(Int rhs) const
    {
        return compare(rhs) != 0;
    }

    template <typename Int>
-    bool less(Int rhs)
+    bool less(Int rhs) const
    {
        return compare(rhs) < 0;
    }

    template <typename Int>
-    bool greater(Int rhs)
+    bool greater(Int rhs) const
    {
        return compare(rhs) > 0;
    }

    template <typename Int>
-    bool lessOrEquals(Int rhs)
+    bool lessOrEquals(Int rhs) const
    {
        return compare(rhs) <= 0;
    }

    template <typename Int>
-    bool greaterOrEquals(Int rhs)
+    bool greaterOrEquals(Int rhs) const
    {
        return compare(rhs) >= 0;
    }
--- a/base/common/ReadlineLineReader.cpp
+++ b/base/common/ReadlineLineReader.cpp
@ -1,6 +1,6 @@
 #include <common/ReadlineLineReader.h>
 #include <common/errnoToString.h>
-#include <ext/scope_guard.h>
+#include <common/scope_guard.h>

 #include <errno.h>
 #include <signal.h>
--- a/base/common/SimpleCache.h
+++ b/base/common/SimpleCache.h
@ -3,7 +3,7 @@
 #include <map>
 #include <tuple>
 #include <mutex>
-#include <ext/function_traits.h>
+#include <common/function_traits.h>


 /** The simplest cache for a free function.
@ -32,10 +32,11 @@ public:
    template <typename... Args>
    Result operator() (Args &&... args)
    {
+        Key key{std::forward<Args>(args)...};
+
        {
            std::lock_guard lock(mutex);

-            Key key{std::forward<Args>(args)...};
            auto it = cache.find(key);

            if (cache.end() != it)
@ -43,7 +44,7 @@ public:
        }

        /// The calculations themselves are not done under mutex.
-        Result res = f(std::forward<Args>(args)...);
+        Result res = std::apply(f, key);

        {
            std::lock_guard lock(mutex);
@ -57,11 +58,12 @@ public:
    template <typename... Args>
    void update(Args &&... args)
    {
-        Result res = f(std::forward<Args>(args)...);
+        Key key{std::forward<Args>(args)...};
+
+        Result res = std::apply(f, key);
+
        {
            std::lock_guard lock(mutex);
-
-            Key key{std::forward<Args>(args)...};
            cache[key] = std::move(res);
        }
    }
--- a/base/common/arraySize.h
+++ b/base/common/arraySize.h
@ -0,0 +1,7 @@
+#pragma once
+
+#include <cstdlib>
+
+/** \brief Returns number of elements in an automatic array. */
+template <typename T, std::size_t N>
+constexpr size_t arraySize(const T (&)[N]) noexcept { return N; }
--- a/base/common/bit_cast.h
+++ b/base/common/bit_cast.h
@ -0,0 +1,27 @@
+#pragma once
+
+#include <string.h>
+#include <algorithm>
+#include <type_traits>
+
+
+/** \brief Returns value `from` converted to type `To` while retaining bit representation.
+  *    `To` and `From` must satisfy `CopyConstructible`.
+  */
+template <typename To, typename From>
+std::decay_t<To> bit_cast(const From & from)
+{
+    To res {};
+    memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
+    return res;
+}
+
+/** \brief Returns value `from` converted to type `To` while retaining bit representation.
+  *    `To` and `From` must satisfy `CopyConstructible`.
+  */
+template <typename To, typename From>
+std::decay_t<To> safe_bit_cast(const From & from)
+{
+    static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
+    return bit_cast<To, From>(from);
+}
--- a/base/common/chrono_io.h
+++ b/base/common/chrono_io.h
@ -0,0 +1,46 @@
+#pragma once
+
+#include <chrono>
+#include <string>
+#include <sstream>
+#include <cctz/time_zone.h>
+
+
+inline std::string to_string(const std::time_t & time)
+{
+    return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
+}
+
+template <typename Clock, typename Duration = typename Clock::duration>
+std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
+{
+    // Don't use DateLUT because it shows weird characters for
+    // TimePoint::max(). I wish we could use C++20 format, but it's not
+    // there yet.
+    // return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
+
+    auto in_time_t = std::chrono::system_clock::to_time_t(tp);
+    return to_string(in_time_t);
+}
+
+template <typename Rep, typename Period = std::ratio<1>>
+std::string to_string(const std::chrono::duration<Rep, Period> & duration)
+{
+    auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
+    if (seconds_as_int == duration)
+        return std::to_string(seconds_as_int.count()) + "s";
+    auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
+    return std::to_string(seconds_as_double.count()) + "s";
+}
+
+template <typename Clock, typename Duration = typename Clock::duration>
+std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
+{
+    return o << to_string(tp);
+}
+
+template <typename Rep, typename Period = std::ratio<1>>
+std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
+{
+    return o << to_string(duration);
+}
--- a/base/common/function_traits.h
+++ b/base/common/function_traits.h
--- a/base/common/getResource.cpp
+++ b/base/common/getResource.cpp
@ -4,23 +4,42 @@
 #include <string>
 #include <boost/algorithm/string/replace.hpp>

-
 std::string_view getResource(std::string_view name)
 {
+    // Convert the resource file name into the form generated by `ld -r -b binary`.
    std::string name_replaced(name);
    std::replace(name_replaced.begin(), name_replaced.end(), '/', '_');
    std::replace(name_replaced.begin(), name_replaced.end(), '-', '_');
    std::replace(name_replaced.begin(), name_replaced.end(), '.', '_');
    boost::replace_all(name_replaced, "+", "_PLUS_");

-    /// These are the names that are generated by "ld -r -b binary"
-    std::string symbol_name_data = "_binary_" + name_replaced + "_start";
-    std::string symbol_name_size = "_binary_" + name_replaced + "_size";
+    // In most `dlsym(3)` APIs, one passes the symbol name as it appears via
+    // something like `nm` or `objdump -t`. For example, a symbol `_foo` would be
+    // looked up with the string `"_foo"`.
+    //
+    // Apple's linker is confusingly different. The NOTES on the man page for
+    // `dlsym(3)` claim that one looks up the symbol with "the name used in C
+    // source code". In this example, that would mean using the string `"foo"`.
+    // This apparently applies even in the case where the symbol did not originate
+    // from C source, such as the embedded binary resource files used here. So
+    // the symbol name must not have a leading `_` on Apple platforms. It's not
+    // clear how this applies to other symbols, such as those which _have_ a leading
+    // underscore in them by design, many leading underscores, etc.
+#if defined OS_DARWIN
+    std::string prefix = "binary_";
+#else
+    std::string prefix = "_binary_";
+#endif
+    std::string symbol_name_start = prefix + name_replaced + "_start";
+    std::string symbol_name_end = prefix + name_replaced + "_end";

-    const void * sym_data = dlsym(RTLD_DEFAULT, symbol_name_data.c_str());
-    const void * sym_size = dlsym(RTLD_DEFAULT, symbol_name_size.c_str());
+    const char* sym_start = reinterpret_cast<const char*>(dlsym(RTLD_DEFAULT, symbol_name_start.c_str()));
+    const char* sym_end = reinterpret_cast<const char*>(dlsym(RTLD_DEFAULT, symbol_name_end.c_str()));

-    if (sym_data && sym_size)
-        return { static_cast<const char *>(sym_data), unalignedLoad<size_t>(&sym_size) };
+    if (sym_start && sym_end)
+    {
+        auto resource_size = static_cast<size_t>(std::distance(sym_start, sym_end));
+        return { sym_start, resource_size };
+    }
    return {};
 }
--- a/base/common/map.h
+++ b/base/common/map.h
@ -0,0 +1,52 @@
+#pragma once
+
+#include <type_traits>
+#include <boost/iterator/transform_iterator.hpp>
+
+namespace collections
+{
+
+/// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers
+template <typename T>
+using unqualified_t = std::remove_cv_t<std::remove_reference_t<T>>;
+
+/** \brief Returns collection of the same container-type as the input collection,
+  *    with each element transformed by the application of `mapper`.
+  */
+template <template <typename...> class Collection, typename... Params, typename Mapper>
+auto map(const Collection<Params...> & collection, Mapper && mapper)
+{
+    using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
+
+    return Collection<value_type>(
+        boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
+        boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
+}
+
+/** \brief Returns collection of specified container-type,
+  *    with each element transformed by the application of `mapper`.
+  *    Allows conversion between different container-types, e.g. std::vector to std::list
+  */
+template <template <typename...> class ResultCollection, typename Collection, typename Mapper>
+auto map(const Collection & collection, Mapper && mapper)
+{
+    using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
+
+    return ResultCollection<value_type>(
+        boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
+        boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
+}
+
+/** \brief Returns collection of specified type,
+  *    with each element transformed by the application of `mapper`.
+  *    Allows leveraging implicit conversion between the result of applying `mapper` and R::value_type.
+  */
+template <typename ResultCollection, typename Collection, typename Mapper>
+auto map(const Collection & collection, Mapper && mapper)
+{
+    return ResultCollection(
+        boost::make_transform_iterator(std::begin(collection), std::forward<Mapper>(mapper)),
+        boost::make_transform_iterator(std::end(collection), std::forward<Mapper>(mapper)));
+}
+
+}
--- a/base/common/range.h
+++ b/base/common/range.h
@ -4,9 +4,9 @@
 #include <boost/range/adaptor/transformed.hpp>
 #include <type_traits>

-
-namespace ext
+namespace collections
 {
+
 namespace internal
 {
    template <typename ResultType, typename CountingType, typename BeginType, typename EndType>
@ -59,4 +59,5 @@ inline auto range(Type end)
    else
        return internal::rangeImpl<Type, std::underlying_type_t<Type>>(0, end);
 }
+
 }
--- a/base/common/scope_guard.h
+++ b/base/common/scope_guard.h
@ -4,9 +4,6 @@
 #include <memory>
 #include <utility>

-
-namespace ext
-{
 template <class F>
 class [[nodiscard]] basic_scope_guard
 {
@ -105,10 +102,9 @@ using scope_guard = basic_scope_guard<std::function<void(void)>>;

 template <class F>
 inline basic_scope_guard<F> make_scope_guard(F && function_) { return std::forward<F>(function_); }
-}

 #define SCOPE_EXIT_CONCAT(n, ...) \
-const auto scope_exit##n = ext::make_scope_guard([&] { __VA_ARGS__; })
+const auto scope_exit##n = make_scope_guard([&] { __VA_ARGS__; })
 #define SCOPE_EXIT_FWD(n, ...) SCOPE_EXIT_CONCAT(n, __VA_ARGS__)
 #define SCOPE_EXIT(...) SCOPE_EXIT_FWD(__LINE__, __VA_ARGS__)

--- a/base/common/scope_guard_safe.h
+++ b/base/common/scope_guard_safe.h
@ -1,6 +1,6 @@
 #pragma once

-#include <ext/scope_guard.h>
+#include <common/scope_guard.h>
 #include <common/logger_useful.h>
 #include <Common/MemoryTracker.h>

--- a/base/common/shared_ptr_helper.h
+++ b/base/common/shared_ptr_helper.h
@ -2,8 +2,6 @@

 #include <memory>

-namespace ext
-{

 /** Allows to make std::shared_ptr from T with protected constructor.
  *
@ -36,4 +34,3 @@ struct is_shared_ptr<std::shared_ptr<T>>

 template <typename T>
 inline constexpr bool is_shared_ptr_v = is_shared_ptr<T>::value;
-}
--- a/base/common/wide_integer.h
+++ b/base/common/wide_integer.h
@ -109,10 +109,7 @@ public:

    constexpr explicit operator bool() const noexcept;

-    template <class T>
-    using _integral_not_wide_integer_class = typename std::enable_if<std::is_arithmetic<T>::value, T>::type;
-
-    template <class T, class = _integral_not_wide_integer_class<T>>
+    template <typename T, typename = std::enable_if_t<std::is_arithmetic_v<T>, T>>
    constexpr operator T() const noexcept;

    constexpr operator long double() const noexcept;
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
@ -255,13 +255,13 @@ struct integer<Bits, Signed>::_impl
            set_multiplier<double>(self, alpha);

        self *= max_int;
-        self += static_cast<uint64_t>(t - alpha * static_cast<T>(max_int)); // += b_i
+        self += static_cast<uint64_t>(t - floor(alpha) * static_cast<T>(max_int)); // += b_i
    }

    constexpr static void wide_integer_from_builtin(integer<Bits, Signed> & self, double rhs) noexcept
    {
        constexpr int64_t max_int = std::numeric_limits<int64_t>::max();
-        constexpr int64_t min_int = std::numeric_limits<int64_t>::min();
+        constexpr int64_t min_int = std::numeric_limits<int64_t>::lowest();

        /// There are values in int64 that have more than 53 significant bits (in terms of double
        /// representation). Such values, being promoted to double, are rounded up or down. If they are rounded up,
@ -278,7 +278,7 @@ struct integer<Bits, Signed>::_impl
            "which may result in UB when initializing double from int64_t");
 #endif

-        if ((rhs > 0 && rhs < static_cast<long double>(max_int)) || (rhs < 0 && rhs > static_cast<long double>(min_int)))
+        if (rhs > static_cast<long double>(min_int) && rhs < static_cast<long double>(max_int))
        {
            self = static_cast<int64_t>(rhs);
            return;
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@ -21,7 +21,7 @@
 #include <fstream>
 #include <sstream>
 #include <memory>
-#include <ext/scope_guard.h>
+#include <common/scope_guard.h>

 #include <Poco/Observer.h>
 #include <Poco/AutoPtr.h>
--- a/base/ext/bit_cast.h
+++ b/base/ext/bit_cast.h
@ -1,30 +0,0 @@
-#pragma once
-
-#include <string.h>
-#include <algorithm>
-#include <type_traits>
-
-
-namespace ext
-{
-    /** \brief Returns value `from` converted to type `To` while retaining bit representation.
-      *    `To` and `From` must satisfy `CopyConstructible`.
-      */
-    template <typename To, typename From>
-    std::decay_t<To> bit_cast(const From & from)
-    {
-        To res {};
-        memcpy(static_cast<void*>(&res), &from, std::min(sizeof(res), sizeof(from)));
-        return res;
-    }
-
-    /** \brief Returns value `from` converted to type `To` while retaining bit representation.
-      *    `To` and `From` must satisfy `CopyConstructible`.
-      */
-    template <typename To, typename From>
-    std::decay_t<To> safe_bit_cast(const From & from)
-    {
-        static_assert(sizeof(To) == sizeof(From), "bit cast on types of different width");
-        return bit_cast<To, From>(from);
-    }
-}
--- a/base/ext/chrono_io.h
+++ b/base/ext/chrono_io.h
@ -1,49 +0,0 @@
-#pragma once
-
-#include <chrono>
-#include <string>
-#include <sstream>
-#include <cctz/time_zone.h>
-
-
-namespace ext
-{
-    inline std::string to_string(const std::time_t & time)
-    {
-        return cctz::format("%Y-%m-%d %H:%M:%S", std::chrono::system_clock::from_time_t(time), cctz::local_time_zone());
-    }
-
-    template <typename Clock, typename Duration = typename Clock::duration>
-    std::string to_string(const std::chrono::time_point<Clock, Duration> & tp)
-    {
-        // Don't use DateLUT because it shows weird characters for
-        // TimePoint::max(). I wish we could use C++20 format, but it's not
-        // there yet.
-        // return DateLUT::instance().timeToString(std::chrono::system_clock::to_time_t(tp));
-
-        auto in_time_t = std::chrono::system_clock::to_time_t(tp);
-        return to_string(in_time_t);
-    }
-
-    template <typename Rep, typename Period = std::ratio<1>>
-    std::string to_string(const std::chrono::duration<Rep, Period> & duration)
-    {
-        auto seconds_as_int = std::chrono::duration_cast<std::chrono::seconds>(duration);
-        if (seconds_as_int == duration)
-            return std::to_string(seconds_as_int.count()) + "s";
-        auto seconds_as_double = std::chrono::duration_cast<std::chrono::duration<double>>(duration);
-        return std::to_string(seconds_as_double.count()) + "s";
-    }
-
-    template <typename Clock, typename Duration = typename Clock::duration>
-    std::ostream & operator<<(std::ostream & o, const std::chrono::time_point<Clock, Duration> & tp)
-    {
-        return o << to_string(tp);
-    }
-
-    template <typename Rep, typename Period = std::ratio<1>>
-    std::ostream & operator<<(std::ostream & o, const std::chrono::duration<Rep, Period> & duration)
-    {
-        return o << to_string(duration);
-    }
-}
--- a/base/ext/collection_cast.h
+++ b/base/ext/collection_cast.h
@ -1,24 +0,0 @@
-#pragma once
-
-#include <iterator>
-
-namespace ext
-{
-    /** \brief Returns collection of specified container-type.
-     *    Retains stored value_type, constructs resulting collection using iterator range. */
-    template <template <typename...> class ResultCollection, typename Collection>
-    auto collection_cast(const Collection & collection)
-    {
-        using value_type = typename Collection::value_type;
-
-        return ResultCollection<value_type>(std::begin(collection), std::end(collection));
-    }
-
-    /** \brief Returns collection of specified type.
-     *    Performs implicit conversion of between source and result value_type, if available and required. */
-    template <typename ResultCollection, typename Collection>
-    auto collection_cast(const Collection & collection)
-    {
-        return ResultCollection(std::begin(collection), std::end(collection));
-    }
-}
--- a/base/ext/enumerate.h
+++ b/base/ext/enumerate.h
@ -1,60 +0,0 @@
-#pragma once
-
-#include <ext/size.h>
-#include <type_traits>
-#include <utility>
-#include <iterator>
-
-
-/** \brief Provides a wrapper view around a container, allowing to iterate over it's elements and indices.
-  *    Allow writing code like shown below:
-  *
-  *        std::vector<T> v = getVector();
-  *        for (const std::pair<const std::size_t, T &> index_and_value : ext::enumerate(v))
-  *            std::cout << "element " << index_and_value.first << " is " << index_and_value.second << std::endl;
-  */
-namespace ext
-{
-    template <typename It> struct enumerate_iterator
-    {
-        using traits = typename std::iterator_traits<It>;
-        using iterator_category = typename traits::iterator_category;
-        using value_type = std::pair<const std::size_t, typename traits::value_type>;
-        using difference_type = typename traits::difference_type;
-        using reference = std::pair<const std::size_t, typename traits::reference>;
-
-        std::size_t idx;
-        It it;
-
-        enumerate_iterator(const std::size_t idx_, It it_) : idx{idx_}, it{it_} {}
-
-        auto operator*() const { return reference(idx, *it); }
-
-        bool operator!=(const enumerate_iterator & other) const { return it != other.it; }
-
-        enumerate_iterator & operator++() { return ++idx, ++it, *this; }
-    };
-
-    template <typename Collection> struct enumerate_wrapper
-    {
-        using underlying_iterator = decltype(std::begin(std::declval<Collection &>()));
-        using iterator = enumerate_iterator<underlying_iterator>;
-
-        Collection & collection;
-
-        enumerate_wrapper(Collection & collection_) : collection(collection_) {}
-
-        auto begin() { return iterator(0, std::begin(collection)); }
-        auto end() { return iterator(ext::size(collection), std::end(collection)); }
-    };
-
-    template <typename Collection> auto enumerate(Collection & collection)
-    {
-        return enumerate_wrapper<Collection>{collection};
-    }
-
-    template <typename Collection> auto enumerate(const Collection & collection)
-    {
-        return enumerate_wrapper<const Collection>{collection};
-    }
-}
--- a/base/ext/identity.h
+++ b/base/ext/identity.h
@ -1,24 +0,0 @@
-#pragma once
-
-#include <utility>
-
-namespace ext
-{
-    /// \brief Identity function for use with other algorithms as a pass-through.
-    class identity
-    {
-        /** \brief Function pointer type template for converting identity to a function pointer.
-         *    Presumably useless, provided for completeness. */
-        template <typename T> using function_ptr_t = T &&(*)(T &&);
-
-        /** \brief Implementation of identity as a non-instance member function for taking function pointer. */
-        template <typename T> static T && invoke(T && t) { return std::forward<T>(t); }
-
-    public:
-        /** \brief Returns the value passed as a sole argument using perfect forwarding. */
-        template <typename T> T && operator()(T && t) const { return std::forward<T>(t); }
-
-        /** \brief Allows conversion of identity instance to a function pointer. */
-        template <typename T> operator function_ptr_t<T>() const { return &invoke; };
-    };
-}
--- a/base/ext/make_array_n.h
+++ b/base/ext/make_array_n.h
@ -1,43 +0,0 @@
-#pragma once
-
-#include <utility>
-#include <type_traits>
-#include <array>
-
-
-/** \brief Produces std::array of specified size, containing copies of provided object.
-  *    Copy is performed N-1 times, and the last element is being moved.
-  * This helper allows to initialize std::array in place.
-  */
-namespace ext
-{
-    namespace detail
-    {
-
-        template<std::size_t size, typename T, std::size_t... indexes>
-        constexpr auto make_array_n_impl(T && value, std::index_sequence<indexes...>)
-        {
-            /// Comma is used to make N-1 copies of value
-            return std::array<std::decay_t<T>, size>{ (static_cast<void>(indexes), value)..., std::forward<T>(value) };
-        }
-
-    }
-
-    template<typename T>
-    constexpr auto make_array_n(std::integral_constant<std::size_t, 0>, T &&)
-    {
-        return std::array<std::decay_t<T>, 0>{};
-    }
-
-    template<std::size_t size, typename T>
-    constexpr auto make_array_n(std::integral_constant<std::size_t, size>, T && value)
-    {
-        return detail::make_array_n_impl<size>(std::forward<T>(value), std::make_index_sequence<size - 1>{});
-    }
-
-    template<std::size_t size, typename T>
-    constexpr auto make_array_n(T && value)
-    {
-        return make_array_n(std::integral_constant<std::size_t, size>{}, std::forward<T>(value));
-    }
-}
--- a/base/ext/map.h
+++ b/base/ext/map.h
@ -1,51 +0,0 @@
-#pragma once
-
-#include <type_traits>
-#include <boost/iterator/transform_iterator.hpp>
-
-
-namespace ext
-{
-    /// \brief Strip type off top level reference and cv-qualifiers thus allowing storage in containers
-    template <typename T>
-    using unqualified_t = std::remove_cv_t<std::remove_reference_t<T>>;
-
-    /** \brief Returns collection of the same container-type as the input collection,
-      *    with each element transformed by the application of `mapper`.
-      */
-    template <template <typename...> class Collection, typename... Params, typename Mapper>
-    auto map(const Collection<Params...> & collection, const Mapper mapper)
-    {
-        using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
-
-        return Collection<value_type>(
-            boost::make_transform_iterator(std::begin(collection), mapper),
-            boost::make_transform_iterator(std::end(collection), mapper));
-    }
-
-    /** \brief Returns collection of specified container-type,
-      *    with each element transformed by the application of `mapper`.
-      *    Allows conversion between different container-types, e.g. std::vector to std::list
-      */
-    template <template <typename...> class ResultCollection, typename Collection, typename Mapper>
-    auto map(const Collection & collection, const Mapper mapper)
-    {
-        using value_type = unqualified_t<decltype(mapper(*std::begin(collection)))>;
-
-        return ResultCollection<value_type>(
-            boost::make_transform_iterator(std::begin(collection), mapper),
-            boost::make_transform_iterator(std::end(collection), mapper));
-    }
-
-    /** \brief Returns collection of specified type,
-      *    with each element transformed by the application of `mapper`.
-      *    Allows leveraging implicit conversion between the result of applying `mapper` and R::value_type.
-      */
-    template <typename ResultCollection, typename Collection, typename Mapper>
-    auto map(const Collection & collection, const Mapper mapper)
-    {
-        return ResultCollection(
-            boost::make_transform_iterator(std::begin(collection), mapper),
-            boost::make_transform_iterator(std::end(collection), mapper));
-    }
-}
--- a/base/ext/push_back.h
+++ b/base/ext/push_back.h
@ -1,25 +0,0 @@
-#pragma once
-
-#include <vector>
-
-namespace ext
-{
-
-/// Moves all arguments starting from the second to the end of the vector.
-/// For example, `push_back(vec, a1, a2, a3)` is a more compact way to write
-/// `vec.push_back(a1); vec.push_back(a2); vec.push_back(a3);`
-/// This function is like boost::range::push_back() but works for noncopyable types too.
-template <typename T>
-void push_back(std::vector<T> &)
-{
-}
-
-template <typename T, typename FirstArg, typename... OtherArgs>
-void push_back(std::vector<T> & vec, FirstArg && first, OtherArgs &&... other)
-{
-    vec.reserve(vec.size() + sizeof...(other) + 1);
-    vec.emplace_back(std::move(first));
-    push_back(vec, std::move(other)...);
-}
-
-}
--- a/base/ext/size.h
+++ b/base/ext/size.h
@ -1,14 +0,0 @@
-#pragma once
-
-#include <cstdlib>
-
-
-namespace ext
-{
-    /** \brief Returns number of elements in an automatic array. */
-    template <typename T, std::size_t N>
-    constexpr std::size_t size(const T (&)[N]) noexcept { return N; }
-
-    /** \brief Returns number of in a container providing size() member function. */
-    template <typename T> constexpr auto size(const T & t) { return t.size(); }
-}
--- a/base/ext/unlock_guard.h
+++ b/base/ext/unlock_guard.h
@ -1,27 +0,0 @@
-#pragma once
-
-namespace ext
-{
-
-template <typename T>
-class unlock_guard
-{
-public:
-    unlock_guard(T & mutex_) : mutex(mutex_)
-    {
-        mutex.unlock();
-    }
-
-    ~unlock_guard()
-    {
-        mutex.lock();
-    }
-
-    unlock_guard(const unlock_guard &) = delete;
-    unlock_guard & operator=(const unlock_guard &) = delete;
-
-private:
-    T & mutex;
-};
-
-}
--- a/base/glibc-compatibility/glibc-compatibility.c
+++ b/base/glibc-compatibility/glibc-compatibility.c
@ -8,13 +8,6 @@
 extern "C" {
 #endif

-#include <pthread.h>
-
-size_t __pthread_get_minstack(const pthread_attr_t * attr)
-{
-    return 1048576;        /// This is a guess. Don't sure it is correct.
-}
-
 #include <signal.h>
 #include <unistd.h>
 #include <string.h>
@ -141,6 +134,8 @@ int __open_2(const char *path, int oflag)
 }


+#include <pthread.h>
+
 /// No-ops.
 int pthread_setname_np(pthread_t thread, const char *name) { return 0; }
 int pthread_getname_np(pthread_t thread, char *name, size_t len) { name[0] = '\0'; return 0; };
--- a/base/mysqlxx/Query.cpp
+++ b/base/mysqlxx/Query.cpp
@ -2,7 +2,7 @@
 #include <errmsg.h>
 #include <mysql.h>
 #else
-#include <mysql/errmsg.h>
+#include <mysql/errmsg.h> //Y_IGNORE
 #include <mysql/mysql.h>
 #endif

--- a/base/mysqlxx/ya.make
+++ b/base/mysqlxx/ya.make
@ -0,0 +1,39 @@
+# This file is generated automatically, do not edit. See 'ya.make.in' and use 'utils/generate-ya-make' to regenerate it.
+LIBRARY()
+
+OWNER(g:clickhouse)
+
+CFLAGS(-g0)
+
+PEERDIR(
+    contrib/restricted/boost/libs
+    contrib/libs/libmysql_r
+    contrib/libs/poco/Foundation
+    contrib/libs/poco/Util
+)
+
+ADDINCL(
+    GLOBAL clickhouse/base
+    clickhouse/base
+    contrib/libs/libmysql_r
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCS(
+    Connection.cpp
+    Exception.cpp
+    Pool.cpp
+    PoolFactory.cpp
+    PoolWithFailover.cpp
+    Query.cpp
+    ResultBase.cpp
+    Row.cpp
+    UseQueryResult.cpp
+    Value.cpp
+
+)
+
+END()
--- a/base/mysqlxx/ya.make.in
+++ b/base/mysqlxx/ya.make.in
@ -0,0 +1,28 @@
+LIBRARY()
+
+OWNER(g:clickhouse)
+
+CFLAGS(-g0)
+
+PEERDIR(
+    contrib/restricted/boost/libs
+    contrib/libs/libmysql_r
+    contrib/libs/poco/Foundation
+    contrib/libs/poco/Util
+)
+
+ADDINCL(
+    GLOBAL clickhouse/base
+    clickhouse/base
+    contrib/libs/libmysql_r
+)
+
+NO_COMPILER_WARNINGS()
+
+NO_UTIL()
+
+SRCS(
+<? find . -name '*.cpp' | grep -v -F tests/ | grep -v -F examples | sed 's/^\.\//    /' | sort ?>
+)
+
+END()
--- a/base/ya.make
+++ b/base/ya.make
@ -4,6 +4,7 @@ RECURSE(
    common
    daemon
    loggers
+    mysqlxx
    pcg-random
    widechar_width
    readpassphrase
--- a/cmake/embed_binary.cmake
+++ b/cmake/embed_binary.cmake
@ -0,0 +1,76 @@
+# Embed a set of resource files into a resulting object file.
+#
+# Signature: `clickhouse_embed_binaries(TARGET <target> RESOURCE_DIR <dir> RESOURCES <resource> ...)
+#
+# This will generate a static library target named `<target>`, which contains the contents of
+# each `<resource>` file. The files should be located in `<dir>`. <dir> defaults to
+# ${CMAKE_CURRENT_SOURCE_DIR}, and the resources may not be empty.
+#
+# Each resource will result in three symbols in the final archive, based on the name `<resource>`.
+# These are:
+#   1. `_binary_<name>_start`: Points to the start of the binary data from `<resource>`.
+#   2. `_binary_<name>_end`: Points to the end of the binary data from `<resource>`.
+#   2. `_binary_<name>_size`: Points to the size of the binary data from `<resource>`.
+#
+# `<name>` is a normalized name derived from `<resource>`, by replacing the characters "./-" with
+# the character "_", and the character "+" with "_PLUS_". This scheme is similar to those generated
+# by `ld -r -b binary`, and matches the expectations in `./base/common/getResource.cpp`.
+macro(clickhouse_embed_binaries)
+    set(one_value_args TARGET RESOURCE_DIR)
+    set(resources RESOURCES)
+    cmake_parse_arguments(EMBED "" "${one_value_args}" ${resources} ${ARGN})
+
+    if (NOT DEFINED EMBED_TARGET)
+        message(FATAL_ERROR "A target name must be provided for embedding binary resources into")
+    endif()
+
+    if (NOT DEFINED EMBED_RESOURCE_DIR)
+        set(EMBED_RESOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
+    endif()
+
+    list(LENGTH EMBED_RESOURCES N_RESOURCES)
+    if (N_RESOURCES LESS 1)
+        message(FATAL_ERROR "The list of binary resources to embed may not be empty")
+    endif()
+
+    # If cross-compiling, ensure we use the toolchain file and target the
+    # actual target architecture
+    if (CMAKE_CROSSCOMPILING)
+        set(CROSS_COMPILE_FLAGS "--target=${CMAKE_C_COMPILER_TARGET} --gcc-toolchain=${TOOLCHAIN_FILE}")
+    else()
+        set(CROSS_COMPILE_FLAGS "")
+    endif()
+
+    set(EMBED_TEMPLATE_FILE "${PROJECT_SOURCE_DIR}/programs/embed_binary.S.in")
+    set(RESOURCE_OBJS)
+    foreach(RESOURCE_FILE ${EMBED_RESOURCES})
+        set(RESOURCE_OBJ "${RESOURCE_FILE}.o")
+        list(APPEND RESOURCE_OBJS "${RESOURCE_OBJ}")
+
+        # Normalize the name of the resource
+        set(BINARY_FILE_NAME "${RESOURCE_FILE}")
+        string(REGEX REPLACE "[\./-]" "_" SYMBOL_NAME "${RESOURCE_FILE}") # - must be last in regex
+        string(REPLACE "+" "_PLUS_" SYMBOL_NAME "${SYMBOL_NAME}")
+        set(ASSEMBLY_FILE_NAME "${RESOURCE_FILE}.S")
+
+        # Put the configured assembly file in the output directory.
+        # This is so we can clean it up as usual, and we CD to the
+        # source directory before compiling, so that the assembly
+        # `.incbin` directive can find the file.
+        configure_file("${EMBED_TEMPLATE_FILE}" "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}" @ONLY)
+
+        # Generate the output object file by compiling the assembly, in the directory of
+        # the sources so that the resource file may also be found
+        add_custom_command(
+            OUTPUT ${RESOURCE_OBJ}
+            COMMAND cd "${EMBED_RESOURCE_DIR}" &&
+                 ${CMAKE_C_COMPILER} "${CROSS_COMPILE_FLAGS}" -c -o
+                    "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
+                    "${CMAKE_CURRENT_BINARY_DIR}/${ASSEMBLY_FILE_NAME}"
+        )
+        set_source_files_properties("${RESOURCE_OBJ}" PROPERTIES EXTERNAL_OBJECT true GENERATED true)
+    endforeach()
+
+    add_library("${EMBED_TARGET}" STATIC ${RESOURCE_OBJS})
+    set_target_properties("${EMBED_TARGET}" PROPERTIES LINKER_LANGUAGE C)
+endmacro()
--- a/cmake/linux/toolchain-aarch64.cmake
+++ b/cmake/linux/toolchain-aarch64.cmake
@ -4,6 +4,7 @@ set (CMAKE_C_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_CXX_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_ASM_COMPILER_TARGET "aarch64-linux-gnu")
 set (CMAKE_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/aarch64-linux-gnu/libc")
+get_filename_component (TOOLCHAIN_FILE "${CMAKE_TOOLCHAIN_FILE}" REALPATH)

 # We don't use compiler from toolchain because it's gcc-8, and we provide support only for gcc-9.
 set (CMAKE_AR "${CMAKE_CURRENT_LIST_DIR}/../toolchain/linux-aarch64/bin/aarch64-linux-gnu-ar" CACHE FILEPATH "" FORCE)
--- a/contrib/NuRaft
+++ b/contrib/NuRaft
@ -1 +1 @@
-Subproject commit 2a1bf7d87b4a03561fc66fbb49cee8a288983c5d
+Subproject commit 976874b7aa7f422bf4ea595bb7d1166c617b1c26
--- a/contrib/arrow
+++ b/contrib/arrow
@ -1 +1 @@
-Subproject commit 616b3dc76a0c8450b4027ded8a78e9619d7c845f
+Subproject commit debf751a129bdda9ff4d1e895e08957ff77000a1
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@ -188,6 +188,7 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/array/util.cc"
        "${LIBRARY_DIR}/array/validate.cc"

+        "${LIBRARY_DIR}/compute/api_aggregate.cc"
        "${LIBRARY_DIR}/compute/api_scalar.cc"
        "${LIBRARY_DIR}/compute/api_vector.cc"
        "${LIBRARY_DIR}/compute/cast.cc"
@ -198,8 +199,11 @@ set(ARROW_SRCS

        "${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc"
        "${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc"
+        "${LIBRARY_DIR}/compute/kernels/aggregate_quantile.cc"
+        "${LIBRARY_DIR}/compute/kernels/aggregate_tdigest.cc"
        "${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc"
        "${LIBRARY_DIR}/compute/kernels/codegen_internal.cc"
+        "${LIBRARY_DIR}/compute/kernels/hash_aggregate.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc"
        "${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc"
@ -243,6 +247,7 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/io/interfaces.cc"
        "${LIBRARY_DIR}/io/memory.cc"
        "${LIBRARY_DIR}/io/slow.cc"
+        "${LIBRARY_DIR}/io/transform.cc"

        "${LIBRARY_DIR}/tensor/coo_converter.cc"
        "${LIBRARY_DIR}/tensor/csf_converter.cc"
@ -256,11 +261,8 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/util/bitmap_builders.cc"
        "${LIBRARY_DIR}/util/bitmap_ops.cc"
        "${LIBRARY_DIR}/util/bpacking.cc"
+        "${LIBRARY_DIR}/util/cancel.cc"
        "${LIBRARY_DIR}/util/compression.cc"
-        "${LIBRARY_DIR}/util/compression_lz4.cc"
-        "${LIBRARY_DIR}/util/compression_snappy.cc"
-        "${LIBRARY_DIR}/util/compression_zlib.cc"
-        "${LIBRARY_DIR}/util/compression_zstd.cc"
        "${LIBRARY_DIR}/util/cpu_info.cc"
        "${LIBRARY_DIR}/util/decimal.cc"
        "${LIBRARY_DIR}/util/delimiting.cc"
@ -268,13 +270,14 @@ set(ARROW_SRCS
        "${LIBRARY_DIR}/util/future.cc"
        "${LIBRARY_DIR}/util/int_util.cc"
        "${LIBRARY_DIR}/util/io_util.cc"
-        "${LIBRARY_DIR}/util/iterator.cc"
        "${LIBRARY_DIR}/util/key_value_metadata.cc"
        "${LIBRARY_DIR}/util/logging.cc"
        "${LIBRARY_DIR}/util/memory.cc"
+        "${LIBRARY_DIR}/util/mutex.cc"
        "${LIBRARY_DIR}/util/string_builder.cc"
        "${LIBRARY_DIR}/util/string.cc"
        "${LIBRARY_DIR}/util/task_group.cc"
+        "${LIBRARY_DIR}/util/tdigest.cc"
        "${LIBRARY_DIR}/util/thread_pool.cc"
        "${LIBRARY_DIR}/util/time.cc"
        "${LIBRARY_DIR}/util/trie.cc"
@ -368,14 +371,14 @@ set(PARQUET_SRCS
        "${LIBRARY_DIR}/column_reader.cc"
        "${LIBRARY_DIR}/column_scanner.cc"
        "${LIBRARY_DIR}/column_writer.cc"
-        "${LIBRARY_DIR}/deprecated_io.cc"
        "${LIBRARY_DIR}/encoding.cc"
-        "${LIBRARY_DIR}/encryption.cc"
-        "${LIBRARY_DIR}/encryption_internal.cc"
+        "${LIBRARY_DIR}/encryption/encryption.cc"
+        "${LIBRARY_DIR}/encryption/encryption_internal.cc"
+        "${LIBRARY_DIR}/encryption/internal_file_decryptor.cc"
+        "${LIBRARY_DIR}/encryption/internal_file_encryptor.cc"
+        "${LIBRARY_DIR}/exception.cc"
        "${LIBRARY_DIR}/file_reader.cc"
        "${LIBRARY_DIR}/file_writer.cc"
-        "${LIBRARY_DIR}/internal_file_decryptor.cc"
-        "${LIBRARY_DIR}/internal_file_encryptor.cc"
        "${LIBRARY_DIR}/level_conversion.cc"
        "${LIBRARY_DIR}/level_comparison.cc"
        "${LIBRARY_DIR}/metadata.cc"
@ -385,6 +388,8 @@ set(PARQUET_SRCS
        "${LIBRARY_DIR}/properties.cc"
        "${LIBRARY_DIR}/schema.cc"
        "${LIBRARY_DIR}/statistics.cc"
+        "${LIBRARY_DIR}/stream_reader.cc"
+        "${LIBRARY_DIR}/stream_writer.cc"
        "${LIBRARY_DIR}/types.cc"

        "${GEN_LIBRARY_DIR}/parquet_constants.cpp"
--- a/contrib/cctz-cmake/CMakeLists.txt
+++ b/contrib/cctz-cmake/CMakeLists.txt
@ -39,6 +39,7 @@ if (NOT USE_INTERNAL_CCTZ_LIBRARY)
 endif()

 if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
+    include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
    set(USE_INTERNAL_CCTZ_LIBRARY 1)
    set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/cctz")

@ -70,19 +71,18 @@ if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)
    set(SYSTEM_STORAGE_TZ_FILE "${CMAKE_BINARY_DIR}/src/Storages/System/StorageSystemTimeZones.generated.cpp")
    # remove existing copies so that its generated fresh on each build.
    file(REMOVE ${SYSTEM_STORAGE_TZ_FILE})
-    # Build a libray with embedded tzdata
-    if (OS_LINUX)
+
    # get the list of timezones from tzdata shipped with cctz
    set(TZDIR "${LIBRARY_DIR}/testdata/zoneinfo")
    file(STRINGS "${LIBRARY_DIR}/testdata/version" TZDATA_VERSION)
    set_property(GLOBAL PROPERTY TZDATA_VERSION_PROP "${TZDATA_VERSION}")
    message(STATUS "Packaging with tzdata version: ${TZDATA_VERSION}")

-        set(TZ_OBJS)
+    set(TIMEZONE_RESOURCE_FILES)

    # each file in that dir (except of tab and localtime) store the info about timezone
    execute_process(COMMAND
-            bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | sort | paste -sd ';'"
+        bash -c "cd ${TZDIR} && find * -type f -and ! -name '*.tab' -and ! -name 'localtime' | sort | paste -sd ';' -"
        OUTPUT_STRIP_TRAILING_WHITESPACE
        OUTPUT_VARIABLE TIMEZONES)

@ -91,42 +91,16 @@ if (NOT EXTERNAL_CCTZ_LIBRARY_FOUND OR NOT EXTERNAL_CCTZ_LIBRARY_WORKS)

    foreach(TIMEZONE ${TIMEZONES})
        file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "    \"${TIMEZONE}\",\n")
-            string(REPLACE "/" "_" TIMEZONE_ID ${TIMEZONE})
-            string(REPLACE "+" "_PLUS_" TIMEZONE_ID ${TIMEZONE_ID})
-            set(TZ_OBJ ${TIMEZONE_ID}.o)
-            set(TZ_OBJS ${TZ_OBJS} ${TZ_OBJ})
-
-            # https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
-            # PPC64LE fails to do this with objcopy, use ld or lld instead
-            if (ARCH_PPC64LE)
-                add_custom_command(OUTPUT ${TZ_OBJ}
-                    COMMAND cp "${TZDIR}/${TIMEZONE}" "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}"
-                    COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o ${TZ_OBJ} ${TIMEZONE_ID}
-                    COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}")
-            else()
-                add_custom_command(OUTPUT ${TZ_OBJ}
-                    COMMAND cp "${TZDIR}/${TIMEZONE}" "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}"
-                    COMMAND cd ${CMAKE_CURRENT_BINARY_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS}
-                            --rename-section .data=.rodata,alloc,load,readonly,data,contents ${TIMEZONE_ID} ${TZ_OBJ}
-                    COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/${TIMEZONE_ID}")
-            endif()
-            set_source_files_properties(${TZ_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
+        list(APPEND TIMEZONE_RESOURCE_FILES "${TIMEZONE}")
    endforeach(TIMEZONE)
-
    file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "    nullptr};\n")
-
-        add_library(tzdata STATIC ${TZ_OBJS})
-        set_target_properties(tzdata PROPERTIES LINKER_LANGUAGE C)
-        # whole-archive prevents symbols from being discarded for unknown reason
-        # CMake can shuffle each of target_link_libraries arguments with other
-        # libraries in linker command. To avoid this we hardcode whole-archive
-        # library into single string.
+    clickhouse_embed_binaries(
+        TARGET tzdata
+        RESOURCE_DIR "${TZDIR}"
+        RESOURCES ${TIMEZONE_RESOURCE_FILES}
+    )
    add_dependencies(cctz tzdata)
    target_link_libraries(cctz INTERFACE "-Wl,${WHOLE_ARCHIVE} $<TARGET_FILE:tzdata> -Wl,${NO_WHOLE_ARCHIVE}")
-    else ()
-        file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "// autogenerated by ClickHouse/contrib/cctz-cmake/CMakeLists.txt\n")
-        file(APPEND ${SYSTEM_STORAGE_TZ_FILE} "const char * auto_time_zones[] {nullptr};\n" )
-    endif ()
 endif ()

 message (STATUS "Using cctz")
--- a/contrib/cppkafka
+++ b/contrib/cppkafka
@ -1 +1 @@
-Subproject commit 57a599d99c540e647bcd0eb9ea77c523cca011b3
+Subproject commit 5a119f689f8a4d90d10a9635e7ee2bee5c127de1
--- a/contrib/flatbuffers
+++ b/contrib/flatbuffers
@ -1 +1 @@
-Subproject commit 22e3ffc66d2d7d72d1414390aa0f04ffd114a5a1
+Subproject commit eb3f827948241ce0e701516f16cd67324802bce9
--- a/contrib/h3
+++ b/contrib/h3
@ -1 +1 @@
-Subproject commit e209086ae1b5477307f545a0f6111780edc59940
+Subproject commit 5c44b06c406613b7792a60b11d04b871116f6e30
--- a/contrib/libpqxx
+++ b/contrib/libpqxx
@ -1 +1 @@
-Subproject commit 58d2a028d1600225ac3a478d6b3a06ba2f0c01f6
+Subproject commit 357608d11b7a1961c3fb7db2ef9a5dbb2e87da77
--- a/contrib/libpqxx-cmake/CMakeLists.txt
+++ b/contrib/libpqxx-cmake/CMakeLists.txt
@ -64,7 +64,7 @@ set (HDRS
 add_library(libpqxx ${SRCS} ${HDRS})

 target_link_libraries(libpqxx PUBLIC ${LIBPQ_LIBRARY})
-target_include_directories (libpqxx PRIVATE "${LIBRARY_DIR}/include")
+target_include_directories (libpqxx SYSTEM PRIVATE "${LIBRARY_DIR}/include")

 # crutch
 set(CM_CONFIG_H_IN "${LIBRARY_DIR}/include/pqxx/config.h.in")
--- a/contrib/orc
+++ b/contrib/orc
@ -1 +1 @@
-Subproject commit 5981208e39447df84827f6a961d1da76bacb6078
+Subproject commit 0a936f6bbdb9303308973073f8623b5a8d82eae1
--- a/contrib/replxx
+++ b/contrib/replxx
@ -1 +1 @@
-Subproject commit 2b24f14594d7606792b92544bb112a6322ba34d7
+Subproject commit c81be6c68b146f15f2096b7ef80e3f21fe27004c
--- a/docker/test/fuzzer/run-fuzzer.sh
+++ b/docker/test/fuzzer/run-fuzzer.sh
@ -200,7 +200,7 @@ continue
        # The server has died.
        task_exit_code=210
        echo "failure" > status.txt
-        if ! grep -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
+        if ! grep --text -ao "Received signal.*\|Logical error.*\|Assertion.*failed\|Failed assertion.*\|.*runtime error: .*\|.*is located.*\|SUMMARY: AddressSanitizer:.*\|SUMMARY: MemorySanitizer:.*\|SUMMARY: ThreadSanitizer:.*\|.*_LIBCPP_ASSERT.*" server.log > description.txt
        then
            echo "Lost connection to server. See the logs." > description.txt
        fi
@ -220,8 +220,8 @@ continue
        # which is confusing.
        task_exit_code=$fuzzer_exit_code
        echo "failure" > status.txt
-        { grep -o "Found error:.*" fuzzer.log \
-            || grep -o "Exception.*" fuzzer.log \
+        { grep --text -o "Found error:.*" fuzzer.log \
+            || grep --text -o "Exception.*" fuzzer.log \
            || echo "Fuzzer failed ($fuzzer_exit_code). See the logs." ; } \
            | tail -1 > description.txt
    fi
--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -489,7 +489,7 @@ if args.report == 'main':
        text = tableStart('Test Times')
        text += tableHeader(columns, attrs)

-        allowed_average_run_time = 1.6 # 30 seconds per test at 7 runs
+        allowed_average_run_time = 3.75 # 60 seconds per test at (7 + 1) * 2 runs
        for r in rows:
            anchor = f'{currentTableAnchor()}.{r[0]}'
            total_runs = (int(r[7]) + 1) * 2  # one prewarm run, two servers
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -112,12 +112,15 @@ timeout "$MAX_RUN_TIME" bash -c run_tests ||:

 ./process_functional_tests_result.py || echo -e "failure\tCannot parse results" > /test_output/check_status.tsv

+grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
 pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz ||:
 mv /var/log/clickhouse-server/stderr.log /test_output/ ||:
 if [[ -n "$WITH_COVERAGE" ]] && [[ "$WITH_COVERAGE" -eq 1 ]]; then
    tar -chf /test_output/clickhouse_coverage.tar.gz /profraw ||:
 fi
 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+    grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
+    grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
    pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
    pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
    mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
--- a/docker/test/stateless/process_functional_tests_result.py
+++ b/docker/test/stateless/process_functional_tests_result.py
@ -6,14 +6,16 @@ import argparse
 import csv

 OK_SIGN = "[ OK "
-FAIL_SING = "[ FAIL "
-TIMEOUT_SING = "[ Timeout! "
+FAIL_SIGN = "[ FAIL "
+TIMEOUT_SIGN = "[ Timeout! "
 UNKNOWN_SIGN = "[ UNKNOWN "
 SKIPPED_SIGN = "[ SKIPPED "
 HUNG_SIGN = "Found hung queries in processlist"

 NO_TASK_TIMEOUT_SIGN = "All tests have finished"

+RETRIES_SIGN = "Some tests were restarted"
+
 def process_test_log(log_path):
    total = 0
    skipped = 0
@ -21,6 +23,7 @@ def process_test_log(log_path):
    failed = 0
    success = 0
    hung = False
+    retries = False
    task_timeout = True
    test_results = []
    with open(log_path, 'r') as test_file:
@ -30,7 +33,9 @@ def process_test_log(log_path):
                task_timeout = False
            if HUNG_SIGN in line:
                hung = True
-            if any(sign in line for sign in (OK_SIGN, FAIL_SING, UNKNOWN_SIGN, SKIPPED_SIGN)):
+            if RETRIES_SIGN in line:
+                retries = True
+            if any(sign in line for sign in (OK_SIGN, FAIL_SIGN, UNKNOWN_SIGN, SKIPPED_SIGN)):
                test_name = line.split(' ')[2].split(':')[0]

                test_time = ''
@ -42,10 +47,10 @@ def process_test_log(log_path):
                    pass

                total += 1
-                if TIMEOUT_SING in line:
+                if TIMEOUT_SIGN in line:
                    failed += 1
                    test_results.append((test_name, "Timeout", test_time))
-                elif FAIL_SING in line:
+                elif FAIL_SIGN in line:
                    failed += 1
                    test_results.append((test_name, "FAIL", test_time))
                elif UNKNOWN_SIGN in line:
@ -57,7 +62,7 @@ def process_test_log(log_path):
                else:
                    success += int(OK_SIGN in line)
                    test_results.append((test_name, "OK", test_time))
-    return total, skipped, unknown, failed, success, hung, task_timeout, test_results
+    return total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results

 def process_result(result_path):
    test_results = []
@ -73,7 +78,7 @@ def process_result(result_path):
        state = "error"

    if result_path and os.path.exists(result_path):
-        total, skipped, unknown, failed, success, hung, task_timeout, test_results = process_test_log(result_path)
+        total, skipped, unknown, failed, success, hung, task_timeout, retries, test_results = process_test_log(result_path)
        is_flacky_check = 1 < int(os.environ.get('NUM_TRIES', 1))
        # If no tests were run (success == 0) it indicates an error (e.g. server did not start or crashed immediately)
        # But it's Ok for "flaky checks" - they can contain just one test for check which is marked as skipped.
@ -83,9 +88,14 @@ def process_result(result_path):
        if hung:
            description = "Some queries hung, "
            state = "failure"
+            test_results.append(("Some queries hung", "FAIL", "0"))
        elif task_timeout:
            description = "Timeout, "
            state = "failure"
+            test_results.append(("Timeout", "FAIL", "0"))
+        elif retries:
+            description = "Some tests restarted, "
+            test_results.append(("Some tests restarted", "SKIPPED", "0"))
        else:
            description = ""

--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@ -80,6 +80,8 @@ function run_tests()

    if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
        ADDITIONAL_OPTIONS+=('--replicated-database')
+        ADDITIONAL_OPTIONS+=('--jobs')
+        ADDITIONAL_OPTIONS+=('2')
    else
        # Too many tests fail for DatabaseReplicated in parallel. All other
        # configurations are OK.
@ -101,6 +103,7 @@ timeout "$MAX_RUN_TIME" bash -c run_tests ||:

 clickhouse-client -q "system flush logs" ||:

+grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server.log ||:
 pigz < /var/log/clickhouse-server/clickhouse-server.log > /test_output/clickhouse-server.log.gz &
 clickhouse-client -q "select * from system.query_log format TSVWithNamesAndTypes" | pigz > /test_output/query-log.tsv.gz &
 clickhouse-client -q "select * from system.query_thread_log format TSVWithNamesAndTypes" | pigz > /test_output/query-thread-log.tsv.gz &
@ -138,6 +141,8 @@ tar -chf /test_output/query_log_dump.tar /var/lib/clickhouse/data/system/query_l
 tar -chf /test_output/coordination.tar /var/lib/clickhouse/coordination ||:

 if [[ -n "$USE_DATABASE_REPLICATED" ]] && [[ "$USE_DATABASE_REPLICATED" -eq 1 ]]; then
+  grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server1.log ||:
+  grep -Fa "Fatal" /var/log/clickhouse-server/clickhouse-server2.log ||:
    pigz < /var/log/clickhouse-server/clickhouse-server1.log > /test_output/clickhouse-server1.log.gz ||:
    pigz < /var/log/clickhouse-server/clickhouse-server2.log > /test_output/clickhouse-server2.log.gz ||:
    mv /var/log/clickhouse-server/stderr1.log /test_output/ ||:
--- a/docs/clean
+++ b/docs/clean
--- a/docs/en/development/architecture.md
+++ b/docs/en/development/architecture.md
@ -112,7 +112,7 @@ A hand-written recursive descent parser parses a query. For example, `ParserSele

 Interpreters are responsible for creating the query execution pipeline from an `AST`. There are simple interpreters, such as `InterpreterExistsQuery` and `InterpreterDropQuery`, or the more sophisticated `InterpreterSelectQuery`. The query execution pipeline is a combination of block input or output streams. For example, the result of interpreting the `SELECT` query is the `IBlockInputStream` to read the result set from; the result of the INSERT query is the `IBlockOutputStream` to write data for insertion to, and the result of interpreting the `INSERT SELECT` query is the `IBlockInputStream` that returns an empty result set on the first read, but that copies data from `SELECT` to `INSERT` at the same time.

-`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations or query.
+`InterpreterSelectQuery` uses `ExpressionAnalyzer` and `ExpressionActions` machinery for query analysis and transformations. This is where most rule-based query optimizations are done. `ExpressionAnalyzer` is quite messy and should be rewritten: various query transformations and optimizations should be extracted to separate classes to allow modular transformations of query.

 ## Functions {#functions}

@ -169,7 +169,7 @@ There is no global query plan for distributed query execution. Each node has its

 `MergeTree` is a family of storage engines that supports indexing by primary key. The primary key can be an arbitrary tuple of columns or expressions. Data in a `MergeTree` table is stored in “parts”. Each part stores data in the primary key order, so data is ordered lexicographically by the primary key tuple. All the table columns are stored in separate `column.bin` files in these parts. The files consist of compressed blocks. Each block is usually from 64 KB to 1 MB of uncompressed data, depending on the average value size. The blocks consist of column values placed contiguously one after the other. Column values are in the same order for each column (the primary key defines the order), so when you iterate by many columns, you get values for the corresponding rows.

-The primary key itself is “sparse”. It does not address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks,” which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.
+The primary key itself is “sparse”. It does not address every single row, but only some ranges of data. A separate `primary.idx` file has the value of the primary key for each N-th row, where N is called `index_granularity` (usually, N = 8192). Also, for each column, we have `column.mrk` files with “marks”, which are offsets to each N-th row in the data file. Each mark is a pair: the offset in the file to the beginning of the compressed block, and the offset in the decompressed block to the beginning of data. Usually, compressed blocks are aligned by marks, and the offset in the decompressed block is zero. Data for `primary.idx` always resides in memory, and data for `column.mrk` files is cached.

 When we are going to read something from a part in `MergeTree`, we look at `primary.idx` data and locate ranges that could contain requested data, then look at `column.mrk` data and calculate offsets for where to start reading those ranges. Because of sparseness, excess data may be read. ClickHouse is not suitable for a high load of simple point queries, because the entire range with `index_granularity` rows must be read for each key, and the entire compressed block must be decompressed for each column. We made the index sparse because we must be able to maintain trillions of rows per single server without noticeable memory consumption for the index. Also, because the primary key is sparse, it is not unique: it cannot check the existence of the key in the table at INSERT time. You could have many rows with the same key in a table.

--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -17,7 +17,7 @@ Main features:

 -   Partitions can be used if the [partitioning key](../../../engines/table-engines/mergetree-family/custom-partitioning-key.md) is specified.

-    ClickHouse supports certain operations with partitions that are more effective than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query.
+    ClickHouse supports certain operations with partitions that are more efficient than general operations on the same data with the same result. ClickHouse also automatically cuts off the partition data where the partitioning key is specified in the query.

 -   Data replication support.

@ -83,7 +83,7 @@ For a description of parameters, see the [CREATE query description](../../../sql
    Expression must have one `Date` or `DateTime` column as a result. Example:
    `TTL date + INTERVAL 1 DAY`

-    Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can specified, but there should be no more than one `DELETE` rule.
+    Type of the rule `DELETE|TO DISK 'xxx'|TO VOLUME 'xxx'|GROUP BY` specifies an action to be done with the part if the expression is satisfied (reaches current time): removal of expired rows, moving a part (if expression is satisfied for all rows in a part) to specified disk (`TO DISK 'xxx'`) or to volume (`TO VOLUME 'xxx'`), or aggregating values in expired rows. Default type of the rule is removal (`DELETE`). List of multiple rules can be specified, but there should be no more than one `DELETE` rule.

    For more details, see [TTL for columns and tables](#table_engine-mergetree-ttl)

@ -474,7 +474,7 @@ With `WHERE` clause you may specify which of the expired rows to delete or aggre

 `GROUP BY` expression must be a prefix of the table primary key.

-If a column is not part of the `GROUP BY` expression and is not set explicitely in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).
+If a column is not part of the `GROUP BY` expression and is not set explicitly in the `SET` clause, in result row it contains an occasional value from the grouped rows (as if aggregate function `any` is applied to it).

 **Examples**

@ -695,7 +695,8 @@ PARTITION BY toYYYYMM(EventDate)
 SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 ```

-The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`. Once a table is created, its storage policy cannot be changed.
+The `default` storage policy implies using only one volume, which consists of only one disk given in `<path>`.
+You could change storage policy after table creation with [ALTER TABLE ... MODIFY SETTING] query, new policy should include all old disks and volumes with same names.

 The number of threads performing background moves of data parts can be changed by [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size) setting.

--- a/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/summingmergetree.md
@ -96,7 +96,7 @@ SELECT key, sum(value) FROM summtt GROUP BY key

 When data are inserted into a table, they are saved as-is. ClickHouse merges the inserted parts of data periodically and this is when rows with the same primary key are summed and replaced with one for each resulting part of data.

-ClickHouse can merge the data parts so that different resulting parts of data cat consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.
+ClickHouse can merge the data parts so that different resulting parts of data can consist rows with the same primary key, i.e. the summation will be incomplete. Therefore (`SELECT`) an aggregate function [sum()](../../../sql-reference/aggregate-functions/reference/sum.md#agg_function-sum) and `GROUP BY` clause should be used in a query as described in the example above.

 ### Common Rules for Summation {#common-rules-for-summation}

--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@ -1249,10 +1249,13 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `STRING`, `BINARY`           | [String](../sql-reference/data-types/string.md)           | `STRING`                     |
 | —                            | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING`                     |
 | `DECIMAL`                    | [Decimal](../sql-reference/data-types/decimal.md)         | `DECIMAL`                    |
+| `LIST`                       | [Array](../sql-reference/data-types/array.md)             | `LIST`                       |
+
+Arrays can be nested and can have a value of the `Nullable` type as an argument.

 ClickHouse supports configurable precision of `Decimal` type. The `INSERT` query treats the Parquet `DECIMAL` type as the ClickHouse `Decimal128` type.

-Unsupported Parquet data types: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+Unsupported Parquet data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.

 Data types of ClickHouse table columns can differ from the corresponding fields of the Parquet data inserted. When inserting data, ClickHouse interprets data types according to the table above and then [cast](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) the data to that data type which is set for the ClickHouse table column.

@ -1276,7 +1279,54 @@ To exchange data with Hadoop, you can use [HDFS table engine](../engines/table-e

 [Apache Arrow](https://arrow.apache.org/) comes with two built-in columnar storage formats. ClickHouse supports read and write operations for these formats.

-`Arrow` is Apache Arrow’s “file mode” format. It is designed for in-memory random access.
+`Arrow` is Apache Arrow’s "file mode" format. It is designed for in-memory random access.
+
+### Data Types Matching {#data_types-matching-arrow}
+
+The table below shows supported data types and how they match ClickHouse [data types](../sql-reference/data-types/index.md) in `INSERT` and `SELECT` queries.
+
+| Arrow data type (`INSERT`) | ClickHouse data type                                | Arrow data type (`SELECT`) |
+|----------------------------|-----------------------------------------------------|----------------------------|
+| `UINT8`, `BOOL`            | [UInt8](../sql-reference/data-types/int-uint.md)    | `UINT8`                    |
+| `INT8`                     | [Int8](../sql-reference/data-types/int-uint.md)     | `INT8`                     |
+| `UINT16`                   | [UInt16](../sql-reference/data-types/int-uint.md)   | `UINT16`                   |
+| `INT16`                    | [Int16](../sql-reference/data-types/int-uint.md)    | `INT16`                    |
+| `UINT32`                   | [UInt32](../sql-reference/data-types/int-uint.md)   | `UINT32`                   |
+| `INT32`                    | [Int32](../sql-reference/data-types/int-uint.md)    | `INT32`                    |
+| `UINT64`                   | [UInt64](../sql-reference/data-types/int-uint.md)   | `UINT64`                   |
+| `INT64`                    | [Int64](../sql-reference/data-types/int-uint.md)    | `INT64`                    |
+| `FLOAT`, `HALF_FLOAT`      | [Float32](../sql-reference/data-types/float.md)     | `FLOAT32`                  |
+| `DOUBLE`                   | [Float64](../sql-reference/data-types/float.md)     | `FLOAT64`                  |
+| `DATE32`                   | [Date](../sql-reference/data-types/date.md)         | `UINT16`                   |
+| `DATE64`, `TIMESTAMP`      | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32`                   |
+| `STRING`, `BINARY`         | [String](../sql-reference/data-types/string.md)     | `UTF8`                     |
+| `STRING`, `BINARY`         | [FixedString](../sql-reference/data-types/fixedstring.md)   | `UTF8`                        |
+| `DECIMAL`                  | [Decimal](../sql-reference/data-types/decimal.md)   | `DECIMAL`                  |
+| `LIST`                     | [Array](../sql-reference/data-types/array.md)       | `LIST`                     |
+
+Arrays can be nested and can have a value of the `Nullable` type as an argument.
+
+ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the Arrow `DECIMAL` type as the ClickHouse `Decimal128` type.
+
+Unsupported Arrow data types: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+
+The data types of ClickHouse table columns do not have to match the corresponding Arrow data fields. When inserting data, ClickHouse interprets data types according to the table above and then [casts](../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) the data to the data type set for the ClickHouse table column.
+
+### Inserting Data {#inserting-data-arrow}
+
+You can insert Arrow data from a file into ClickHouse table by the following command:
+
+``` bash
+$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
+```
+
+### Selecting Data {#selecting-data-arrow}
+
+You can select data from a ClickHouse table and save them into some file in the Arrow format by the following command:
+
+``` bash
+$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filename.arrow}
+```

 ## ArrowStream {#data-format-arrow-stream}

@ -1306,7 +1356,9 @@ The table below shows supported data types and how they match ClickHouse [data t
 | `DATE64`, `TIMESTAMP`    | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP`              |
 | `STRING`, `BINARY`       | [String](../sql-reference/data-types/string.md)     | `BINARY`                 |
 | `DECIMAL`                | [Decimal](../sql-reference/data-types/decimal.md)   | `DECIMAL`                |
-| `-`                      | [Array](../sql-reference/data-types/array.md)       | `LIST`                   |
+| `LIST`                   | [Array](../sql-reference/data-types/array.md)       | `LIST`                   |
+
+Arrays can be nested and can have a value of the `Nullable` type as an argument.

 ClickHouse supports configurable precision of the `Decimal` type. The `INSERT` query treats the ORC `DECIMAL` type as the ClickHouse `Decimal128` type.

--- a/docs/en/introduction/adopters.md
+++ b/docs/en/introduction/adopters.md
@ -148,5 +148,10 @@ toc_title: Adopters
 | <a href="https://www.kakaocorp.com/" class="favicon">kakaocorp</a> | Internet company | — | — | — | [if(kakao)2020 conference](https://if.kakao.com/session/117) |
 | <a href="https://shop.okraina.ru/" class="favicon">ООО «МПЗ Богородский»</a> | Agriculture | — | — | — | [Article in Russian, November 2020](https://cloud.yandex.ru/cases/okraina) |
 | <a href="https://www.tesla.com/" class="favicon">Tesla</a> | Electric vehicle and clean energy company | — | — | — | [Vacancy description, March 2021](https://news.ycombinator.com/item?id=26306170) |
+| <a href="https://www.kgk-global.com/en/" class="favicon">KGK Global</a> | Vehicle monitoring | — | — | — | [Press release, June 2021](https://zoom.cnews.ru/news/item/530921) |
+| <a href="https://www.bilibili.com/" class="favicon">BiliBili</a> | Video sharing | — | — | — | [Blog post, June 2021](https://chowdera.com/2021/06/20210622012241476b.html) |
+| <a href="https://gigapipe.com/" class="favicon">Gigapipe</a> | Managed ClickHouse | Main product | — | — | [Official website](https://gigapipe.com/) |
+| <a href="https://www.hydrolix.io/" class="favicon">Hydrolix</a> | Cloud data platform | Main product | — | — | [Documentation](https://docs.hydrolix.io/guide/query) |
+| <a href="https://www.argedor.com/en/clickhouse/" class="favicon">Argedor</a> | ClickHouse support | — | — | — | [Official website](https://www.argedor.com/en/clickhouse/) |

 [Original article](https://clickhouse.tech/docs/en/introduction/adopters/) <!--hide-->
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@ -191,10 +191,12 @@ Possible values:

 Default value: 480.

-`fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged.
-To protect data parts created by merges source parts are not deleted immediately. After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds.
+After merging several parts into a new part, ClickHouse marks the original parts as inactive and deletes them only after `old_parts_lifetime` seconds.
 Inactive parts are removed if they are not used by current queries, i.e. if the `refcount` of the part is zero.

+`fsync` is not called for new parts, so for some time new parts exist only in the server's RAM (OS cache). If the server is rebooted spontaneously, new parts can be lost or damaged.
+To protect data inactive parts are not deleted immediately.
+
 During startup ClickHouse checks the integrity of the parts.
 If the merged part is damaged ClickHouse returns the inactive parts to the active list, and later merges them again. Then the damaged part is renamed (the `broken_` prefix is added) and moved to the `detached` folder.
 If the merged part is not damaged, then the original inactive parts are renamed (the `ignored_` prefix is added) and moved to the `detached` folder.
@ -214,7 +216,7 @@ Default value: 161061273600 (150 GB).

 The merge scheduler periodically analyzes the sizes and number of parts in partitions, and if there is enough free resources in the pool, it starts background merges. Merges occur until the total size of the source parts is less than `max_bytes_to_merge_at_max_space_in_pool`.

-Merges initiated by `optimize final` ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.
+Merges initiated by [OPTIMIZE FINAL](../../sql-reference/statements/optimize.md) ignore `max_bytes_to_merge_at_max_space_in_pool` and merge parts only taking into account available resources (free disk's space) until one part remains in the partition.

 ## max_bytes_to_merge_at_min_space_in_pool {#max-bytes-to-merge-at-min-space-in-pool}

@ -252,6 +254,7 @@ Possible values:
 Default value: auto (number of CPU cores).

 During startup ClickHouse reads all parts of all tables (reads files with metadata of parts) to build a list of all parts in memory. In some systems with a large number of parts this process can take a long time, and this time might be shortened by increasing `max_part_loading_threads` (if this process is not CPU and disk I/O bound).
+
 ## max_partitions_to_read {#max-partitions-to-read}

 Limits the maximum number of partitions that can be accessed in one query.
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -1591,6 +1591,18 @@ FORMAT PrettyCompactMonoBlock

 Default value: 0

+## distributed_push_down_limit (#distributed-push-down-limit}
+
+LIMIT will be applied on each shard separatelly. Usually you don't need to use it, since this will be done automatically if it is possible, i.e. for simple query SELECT FROM LIMIT.
+
+Possible values:
+
+-  0 - Disabled
+-  1 - Enabled
+
+!!! note "Note"
+    That with this setting the result of the query may be inaccurate.
+
 ## optimize_skip_unused_shards_limit {#optimize-skip-unused-shards-limit}

 Limit for number of sharding key values, turns off `optimize_skip_unused_shards` if the limit is reached.
@ -2069,7 +2081,7 @@ Possible values:

 -   Any positive integer.

-Default value: 16.
+Default value: 128.

 ## background_fetches_pool_size {#background_fetches_pool_size}

@ -2549,17 +2561,6 @@ Result
 └──────────────────────────┴───────┴───────────────────────────────────────────────────────┘
 ```

-## allow_experimental_bigint_types {#allow_experimental_bigint_types}
-
-Enables or disables integer values exceeding the range that is supported by the int data type.
-
-Possible values:
-
-   1 — The bigint data type is enabled.
-   0 — The bigint data type is disabled.
-
-Default value: `0`.
-
 ## persistent {#persistent}

 Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines.
--- a/docs/en/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/en/sql-reference/aggregate-functions/parametric-functions.md
@ -116,7 +116,7 @@ Type: `UInt8`.

 -   `.*` — Matches any number of events. You do not need conditional arguments to match this element of the pattern.

-   `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=` operators.
+-   `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` matches events that occur more than 1800 seconds from each other. An arbitrary number of any events can lay between these events. You can use the `>=`, `>`, `<`, `<=`, `==` operators.

 **Examples**

@ -509,7 +509,7 @@ Same behavior as [sumMap](../../sql-reference/aggregate-functions/reference/summ

 ## sequenceNextNode {#sequenceNextNode}

-Returns a value of next event that matched an event chain.
+Returns a value of the next event that matched an event chain.

 _Experimental function, `SET allow_experimental_funnel_functions = 1` to enable it._

@ -520,31 +520,34 @@ sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event
 ```

 **Parameters**
-   `direction` - Used to navigate to directions.
-    - forward : Moving forward
-    - backward: Moving backward

-   `base` - Used to set the base point.
-    - head : Set the base point to the first event
-    - tail : Set the base point to the last event
-    - first_match : Set the base point to the first matched event1
-    - last_match : Set the base point to the last matched event1
+-   `direction` — Used to navigate to directions.
+    - forward — Moving forward.
+    - backward — Moving backward.
+
+-   `base` — Used to set the base point.
+    - head — Set the base point to the first event.
+    - tail — Set the base point to the last event.
+    - first_match — Set the base point to the first matched `event1`.
+    - last_match — Set the base point to the last matched `event1`.
    
 **Arguments**
-   `timestamp` — Name of the column containing the timestamp. Data types supported: `Date`, `DateTime` and other unsigned integer types.
-   `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: `String` and `Nullable(String)`
+
+-   `timestamp` — Name of the column containing the timestamp. Data types supported: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) and other unsigned integer types.
+-   `event_column` — Name of the column containing the value of the next event to be returned. Data types supported: [String](../../sql-reference/data-types/string.md) and [Nullable(String)](../../sql-reference/data-types/nullable.md).
 -   `base_condition` — Condition that the base point must fulfill.
-   `cond` — Conditions describing the chain of events. `UInt8`
+-   `event1`, `event2`, ... — Conditions describing the chain of events. [UInt8](../../sql-reference/data-types/int-uint.md).

-**Returned value**
-  `event_column[next_index]` - if the pattern is matched and next value exists.
-  `NULL` - if the pattern isn’t matched or next value doesn't exist.
+**Returned values**

-Type: `Nullable(String)`.
+-  `event_column[next_index]` — If the pattern is matched and next value exists.
+-  `NULL` - If the pattern isn’t matched or next value doesn't exist.
+
+Type: [Nullable(String)](../../sql-reference/data-types/nullable.md).

 **Example**

-It can be used when events are A->B->C->E->F and you want to know the event following B->C, which is E.
+It can be used when events are A->B->C->D->E and you want to know the event following B->C, which is D.

 The query statement searching the event following A->B:

@ -557,7 +560,7 @@ ENGINE = MergeTree()
 PARTITION BY toYYYYMMDD(dt) 
 ORDER BY id;

-INSERT INTO test_flow VALUES (1, 1, 'A') (2, 1, 'B') (3, 1, 'C') (4, 1, 'E') (5, 1, 'F');
+INSERT INTO test_flow VALUES (1, 1, 'A') (2, 1, 'B') (3, 1, 'C') (4, 1, 'D') (5, 1, 'E');

 SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'A', page = 'A', page = 'B') as next_flow FROM test_flow GROUP BY id;
 ```
@ -572,7 +575,7 @@ Result:

 **Behavior for `forward` and `head`**

-```SQL
+``` sql
 ALTER TABLE test_flow DELETE WHERE 1 = 1 settings mutations_sync = 1;

 INSERT INTO test_flow VALUES (1, 1, 'Home') (2, 1, 'Gift') (3, 1, 'Exit');
@ -580,7 +583,7 @@ INSERT INTO test_flow VALUES (1, 2, 'Home') (2, 2, 'Home') (3, 2, 'Gift') (4, 2,
 INSERT INTO test_flow VALUES (1, 3, 'Gift') (2, 3, 'Home') (3, 3, 'Gift') (4, 3, 'Basket');
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'Home', page = 'Home', page = 'Gift') FROM test_flow GROUP BY id;
 
                  dt   id   page
@ -601,7 +604,7 @@ SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'Home', page = '

 **Behavior for `backward` and `tail`**

-```SQL
+``` sql
 SELECT id, sequenceNextNode('backward', 'tail')(dt, page, page = 'Basket', page = 'Basket', page = 'Gift') FROM test_flow GROUP BY id;

                 dt   id   page
@ -623,7 +626,7 @@ SELECT id, sequenceNextNode('backward', 'tail')(dt, page, page = 'Basket', page

 **Behavior for `forward` and `first_match`**

-```SQL
+``` sql
 SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;

                 dt   id   page
@ -637,12 +640,12 @@ SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', p
 1970-01-01 09:00:04    2   Basket  The result
                                     
 1970-01-01 09:00:01    3   Gift // Base point
-1970-01-01 09:00:02    3   Home // Thre result
+1970-01-01 09:00:02    3   Home // The result
 1970-01-01 09:00:03    3   Gift   
 1970-01-01 09:00:04    3   Basket    
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;

                 dt   id   page
@ -664,7 +667,7 @@ SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', p

 **Behavior for `backward` and `last_match`**

-```SQL
+``` sql
 SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;

                 dt   id   page
@ -683,7 +686,7 @@ SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', p
 1970-01-01 09:00:04    3   Basket    
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;

                 dt   id   page
@ -705,7 +708,7 @@ SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', p

 **Behavior for `base_condition`**

-```SQL
+``` sql
 CREATE TABLE test_flow_basecond
 (
    `dt` DateTime,
@ -715,47 +718,47 @@ CREATE TABLE test_flow_basecond
 )
 ENGINE = MergeTree
 PARTITION BY toYYYYMMDD(dt)
-ORDER BY id
+ORDER BY id;

 INSERT INTO test_flow_basecond VALUES (1, 1, 'A', 'ref4') (2, 1, 'A', 'ref3') (3, 1, 'B', 'ref2') (4, 1, 'B', 'ref1');
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('forward', 'head')(dt, page, ref = 'ref1', page = 'A') FROM test_flow_basecond GROUP BY id;

                  dt   id   page   ref 
- 1970-01-01 09:00:01    1   A      ref4 // The head can't be base point becasue the ref column of the head unmatched with 'ref1'.
+ 1970-01-01 09:00:01    1   A      ref4 // The head can not be base point because the ref column of the head unmatched with 'ref1'.
 1970-01-01 09:00:02    1   A      ref3 
 1970-01-01 09:00:03    1   B      ref2 
 1970-01-01 09:00:04    1   B      ref1 
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('backward', 'tail')(dt, page, ref = 'ref4', page = 'B') FROM test_flow_basecond GROUP BY id;

                  dt   id   page   ref 
 1970-01-01 09:00:01    1   A      ref4
 1970-01-01 09:00:02    1   A      ref3 
 1970-01-01 09:00:03    1   B      ref2 
- 1970-01-01 09:00:04    1   B      ref1 // The tail can't be base point becasue the ref column of the tail unmatched with 'ref4'.
+ 1970-01-01 09:00:04    1   B      ref1 // The tail can not be base point because the ref column of the tail unmatched with 'ref4'.
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, ref = 'ref3', page = 'A') FROM test_flow_basecond GROUP BY id;

                  dt   id   page   ref 
- 1970-01-01 09:00:01    1   A      ref4 // This row can't be base point becasue the ref column unmatched with 'ref3'.
+ 1970-01-01 09:00:01    1   A      ref4 // This row can not be base point because the ref column unmatched with 'ref3'.
 1970-01-01 09:00:02    1   A      ref3 // Base point
 1970-01-01 09:00:03    1   B      ref2 // The result
 1970-01-01 09:00:04    1   B      ref1 
 ```

-```SQL
+``` sql
 SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, ref = 'ref2', page = 'B') FROM test_flow_basecond GROUP BY id;

                  dt   id   page   ref 
 1970-01-01 09:00:01    1   A      ref4
 1970-01-01 09:00:02    1   A      ref3 // The result
 1970-01-01 09:00:03    1   B      ref2 // Base point
- 1970-01-01 09:00:04    1   B      ref1 // This row can't be base point becasue the ref column unmatched with 'ref2'. 
+ 1970-01-01 09:00:04    1   B      ref1 // This row can not be base point because the ref column unmatched with 'ref2'. 
 ```
--- a/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/en/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@ -60,7 +60,8 @@ SETTINGS(format_csv_allow_single_quotes = 0)
 Types of sources (`source_type`):

 -   [Local file](#dicts-external_dicts_dict_sources-local_file)
-   [Executable file](#dicts-external_dicts_dict_sources-executable)
+-   [Executable File](#dicts-external_dicts_dict_sources-executable)
+-   [Executable Pool](#dicts-external_dicts_dict_sources-executable_pool)
 -   [HTTP(s)](#dicts-external_dicts_dict_sources-http)
 -   DBMS
    -   [ODBC](#dicts-external_dicts_dict_sources-odbc)
@ -94,7 +95,7 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
 Setting fields:

 -   `path` – The absolute path to the file.
-   `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
+-   `format` – The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.

 When dictionary with source `FILE` is created via DDL command (`CREATE DICTIONARY ...`), the source file needs to be located in `user_files` directory, to prevent DB users accessing arbitrary file on ClickHouse node.

@ -113,21 +114,24 @@ Example of settings:
    <executable>
        <command>cat /opt/dictionaries/os.tsv</command>
        <format>TabSeparated</format>
+        <implicit_key>false</implicit_key>
    </executable>
 </source>
 ```

 Setting fields:

-   `command` – The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
-   `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
-   `implicit_key` - The executable source file can return only values, and the correspondence to the requested keys is determined implicitly - by the order of rows in the result. Default value is false.
+-   `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
+-   `format` — The file format. All the formats described in [Formats](../../../interfaces/formats.md#formats) are supported.
+-   `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false.

 That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.

 ## Executable Pool {#dicts-external_dicts_dict_sources-executable_pool}

-Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary is stored using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct` layouts. Executable pool will spawn pool of processes with specified command and keep them running until they exit. The program should read data from STDIN while it is available and output result to STDOUT, and it can wait for next block of data on stdin. ClickHouse will not close STDIN after processing a block of data but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing - it should poll STDIN and flush data to STDOUT early.
+Executable pool allows loading data from pool of processes. This source does not work with dictionary layouts that need to load all data from source. Executable pool works if the dictionary [is stored](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory) using `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct` layouts. 
+
+Executable pool will spawn pool of processes with specified command and keep them running until they exit. The program should read data from STDIN while it is available and output result to STDOUT, and it can wait for next block of data on STDIN. ClickHouse will not close STDIN after processing a block of data but will pipe another chunk of data when needed. The executable script should be ready for this way of data processing — it should poll STDIN and flush data to STDOUT early.

 Example of settings:

@ -145,12 +149,12 @@ Example of settings:

 Setting fields:

-   `command` – The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
-   `format` – The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
-   `pool_size` - Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
-   `command_termination_timeout` - Executable pool script, should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have command_termination_timeout seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
-   `max_command_execution_time` - Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
-   `implicit_key` - The executable source file can return only values, and the correspondence to the requested keys is determined implicitly - by the order of rows in the result. Default value is false. Optional parameter.
+-   `command` — The absolute path to the executable file, or the file name (if the program directory is written to `PATH`).
+-   `format` — The file format. All the formats described in “[Formats](../../../interfaces/formats.md#formats)” are supported.
+-   `pool_size` — Size of pool. If 0 is specified as `pool_size` then there is no pool size restrictions.
+-   `command_termination_timeout` — Executable pool script should contain main read-write loop. After dictionary is destroyed, pipe is closed, and executable file will have `command_termination_timeout` seconds to shutdown, before ClickHouse will send SIGTERM signal to child process. Specified in seconds. Default value is 10. Optional parameter.
+-   `max_command_execution_time` — Maximum executable script command execution time for processing block of data. Specified in seconds. Default value is 10. Optional parameter.
+-   `implicit_key` — The executable source file can return only values, and the correspondence to the requested keys is determined implicitly — by the order of rows in the result. Default value is false. Optional parameter.

 That dictionary source can be configured only via XML configuration. Creating dictionaries with executable source via DDL is disabled, otherwise, the DB user would be able to execute arbitrary binary on ClickHouse node.

--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@ -963,7 +963,7 @@ formatDateTime(Time, Format\[, Timezone\])

 **Returned value(s)**

-Returnes time and date values according to the determined format.
+Returns time and date values according to the determined format.

 **Replacement fields**
 Using replacement fields, you can define a pattern for the resulting string. “Example” column shows formatting result for `2018-01-02 22:33:44`.
@ -1012,6 +1012,45 @@ Result:
 └────────────────────────────────────────────┘
 ```

+## dateName {#dataname}
+
+Returns part of date with specified date part.
+
+**Syntax**
+
+``` sql
+dateName(date_part, date)
+```
+
+**Arguments**
+
+-   `date_part` - Date part. Possible values .
+-   `date` — Date [Date](../../sql-reference/data-types/date.md) or DateTime [DateTime](../../sql-reference/data-types/datetime.md), [DateTime64](../../sql-reference/data-types/datetime64.md).
+
+
+**Returned value**
+
+-   Specified date part of date.
+
+Type: [String](../../sql-reference/data-types/string.md#string)
+
+**Example**
+
+Query:
+
+```sql
+WITH toDateTime('2021-04-14 11:22:33') AS date_value
+SELECT dateName('year', date_value), dateName('month', date_value), dateName('day', date_value);
+```
+
+Result:
+
+```text
+┌─dateName('year', date_value)─┬─dateName('month', date_value)─┬─dateName('day', date_value)─┐
+│ 2021                         │ April                         │ 14                          │
+└──────────────────────────────┴───────────────────────────────┴─────────────────────────────
+```
+
 ## FROM\_UNIXTIME {#fromunixfime}

 Function converts Unix timestamp to a calendar date and a time of a day. When there is only a single argument of [Integer](../../sql-reference/data-types/int-uint.md) type, it acts in the same way as [toDateTime](../../sql-reference/functions/type-conversion-functions.md#todatetime) and return [DateTime](../../sql-reference/data-types/datetime.md) type.
--- a/docs/en/sql-reference/functions/encoding-functions.md
+++ b/docs/en/sql-reference/functions/encoding-functions.md
@ -221,3 +221,51 @@ Accepts an integer. Returns a string containing the list of powers of two that t
 ## bitmaskToArray(num) {#bitmasktoarraynum}

 Accepts an integer. Returns an array of UInt64 numbers containing the list of powers of two that total the source number when summed. Numbers in the array are in ascending order.
+
+## bitPositionsToArray(num) {#bitpositionstoarraynum}
+
+Accepts an integer, argument will be converted to unsigned integer type. Returns an array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
+
+**Syntax**
+
+```sql
+bitPositionsToArray(arg)
+```
+
+**Arguments**
+
+-   `arg` — Integer value.Types:  [Int/UInt](../../sql-reference/data-types/int-uint.md)
+
+**Returned value**
+
+An array of UInt64 numbers containing the list of positions of bits that equals 1. Numbers in the array are in ascending order.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT bitPositionsToArray(toInt8(1)) AS bit_positions;
+```
+
+Result:
+
+``` text
+┌─bit_positions─┐
+│ [0]           │
+└───────────────┘
+```
+
+Query:
+
+``` sql
+select bitPositionsToArray(toInt8(-1)) as bit_positions;
+```
+
+Result:
+
+``` text
+┌─bit_positions─────┐
+│ [0,1,2,3,4,5,6,7] │
+└───────────────────┘
+```
--- a/docs/en/sql-reference/functions/geo/h3.md
+++ b/docs/en/sql-reference/functions/geo/h3.md
@ -195,6 +195,41 @@ Result:
 └────────────────────┘
 ```

+## h3ToGeo {#h3togeo}
+
+Returns `(lon, lat)` that corresponds to the provided H3 index.
+
+**Syntax**
+
+``` sql
+h3ToGeo(h3Index)
+```
+
+**Arguments**
+
+-   `h3Index` — H3 Index. Type: [UInt64](../../../sql-reference/data-types/int-uint.md).
+
+**Returned values**
+
+-   `lon` — Longitude. Type: [Float64](../../../sql-reference/data-types/float.md).
+-   `lat` — Latitude. Type: [Float64](../../../sql-reference/data-types/float.md).
+
+
+**Example**
+
+Query:
+
+``` sql
+SELECT h3ToGeo(644325524701193974) coordinates;
+```
+
+Result:
+
+``` text
+┌─coordinates───────────────────────────┐
+│ (37.79506616830252,55.71290243145668) │
+└───────────────────────────────────────┘
+```
 ## h3kRing {#h3kring}

 Lists all the [H3](#h3index) hexagons in the raduis of `k` from the given hexagon in random order.
--- a/docs/en/sql-reference/operators/index.md
+++ b/docs/en/sql-reference/operators/index.md
@ -188,6 +188,24 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV
 └─────────────────────┴────────────────────────────────────────────────────────────┘
 ```

+You can work with dates without using `INTERVAL`, just by adding or subtracting seconds, minutes, and hours. For example, an interval of one day can be set by adding `60*60*24`.
+
+!!! note "Note"
+    The `INTERVAL` syntax or `addDays` function are always preferred. Simple addition or subtraction (syntax like `now() + ...`) doesn't consider time settings. For example, daylight saving time.
+
+
+Examples:
+
+``` sql
+SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 60 * 24 AS time_plus_24_hours, time + toIntervalDay(1) AS time_plus_1_day;
+```
+
+``` text
+┌────────────────time─┬──time_plus_24_hours─┬─────time_plus_1_day─┐
+│ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │
+└─────────────────────┴─────────────────────┴─────────────────────┘
+```
+
 **See Also**

 -   [Interval](../../sql-reference/data-types/special-data-types/interval.md) data type
@ -295,4 +313,3 @@ SELECT * FROM t_null WHERE y IS NOT NULL
 │ 2 │ 3 │
 └───┴───┘
 ```
-
--- a/docs/en/sql-reference/statements/show.md
+++ b/docs/en/sql-reference/statements/show.md
@ -366,9 +366,9 @@ Returns a list of clusters. All available clusters are listed in the [system.clu

 ``` sql
 SHOW CLUSTER '<name>'
-SWOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
+SHOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
 ```
-### Examples 
+### Examples {#show-cluster-examples}

 Query:

--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@ -38,6 +38,7 @@ The list of available `SYSTEM` statements:
 -   [START REPLICATION QUEUES](#query_language-system-start-replication-queues)
 -   [SYNC REPLICA](#query_language-system-sync-replica)
 -   [RESTART REPLICA](#query_language-system-restart-replica)
+-   [RESTORE REPLICA](#query_language-system-restore-replica)
 -   [RESTART REPLICAS](#query_language-system-restart-replicas)

 ## RELOAD EMBEDDED DICTIONARIES {#query_language-system-reload-emdedded-dictionaries}
@ -290,13 +291,60 @@ After running this statement the `[db.]replicated_merge_tree_family_table_name`

 ### RESTART REPLICA {#query_language-system-restart-replica}

-Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed
-Initialization replication quene based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.
+Provides possibility to reinitialize Zookeeper sessions state for `ReplicatedMergeTree` table, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed.
+Initialization replication queue based on ZooKeeper date happens in the same way as `ATTACH TABLE` statement. For a short time the table will be unavailable for any operations.

 ``` sql
 SYSTEM RESTART REPLICA [db.]replicated_merge_tree_family_table_name
 ```

+### RESTORE REPLICA {#query_language-system-restore-replica}
+
+Restores a replica if data is [possibly] present but Zookeeper metadata is lost.
+
+Works only on readonly `ReplicatedMergeTree` tables.
+
+One may execute query after:
+
+  - ZooKeeper root `/` loss.
+  - Replicas path `/replicas` loss.
+  - Individual replica path `/replicas/replica_name/` loss.
+
+Replica attaches locally found parts and sends info about them to Zookeeper.
+Parts present on replica before metadata loss are not re-fetched from other replicas if not being outdated
+(so replica restoration does not mean re-downloading all data over the network).
+
+Caveat: parts in all states are moved to `detached/` folder. Parts active before data loss (Committed) are attached.
+
+#### Syntax
+
+```sql
+SYSTEM RESTORE REPLICA [db.]replicated_merge_tree_family_table_name [ON CLUSTER cluster_name]
+```
+
+Alternative syntax:
+
+```sql
+SYSTEM RESTORE REPLICA [ON CLUSTER cluster_name] [db.]replicated_merge_tree_family_table_name
+```
+
+#### Example
+
+```sql
+-- Creating table on multiple servers
+
+CREATE TABLE test(n UInt32)
+ENGINE = ReplicatedMergeTree('/clickhouse/tables/test/', '{replica}')
+ORDER BY n PARTITION BY n % 10;
+
+INSERT INTO test SELECT * FROM numbers(1000);
+
+-- zookeeper_delete_path("/clickhouse/tables/test", recursive=True) <- root loss.
+
+SYSTEM RESTART REPLICA test; -- Table will attach as readonly as metadata is missing.
+SYSTEM RESTORE REPLICA test; -- Need to execute on every replica, another way: RESTORE REPLICA test ON CLUSTER cluster
+```
+
 ### RESTART REPLICAS {#query_language-system-restart-replicas}

 Provides possibility to reinitialize Zookeeper sessions state for all `ReplicatedMergeTree` tables, will compare current state with Zookeeper as source of true and add tasks to Zookeeper queue if needed
--- a/docs/ja/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/ja/sql-reference/aggregate-functions/parametric-functions.md
@ -113,7 +113,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)

 -   `.*` — Matches any number of events. You don't need conditional arguments to match this element of the pattern.

-   `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` お互いから1800秒以上発生するイベントと一致します。 これらのイベントの間に任意の数のイベントを配置できます。 を使用することができます `>=`, `>`, `<`, `<=` 演算子。
+-   `(?t operator value)` — Sets the time in seconds that should separate two events. For example, pattern `(?1)(?t>1800)(?2)` お互いから1800秒以上発生するイベントと一致します。 これらのイベントの間に任意の数のイベントを配置できます。 を使用することができます `>=`, `>`, `<`, `<=`, `==` 演算子。

 **例**

--- a/docs/ru/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/ru/engines/table-engines/mergetree-family/mergetree.md
@ -685,7 +685,9 @@ PARTITION BY toYYYYMM(EventDate)
 SETTINGS storage_policy = 'moving_from_ssd_to_hdd'
 ```

-По умолчанию используется политика хранения `default` в которой есть один том и один диск, указанный в `<path>`. В данный момент менять политику хранения после создания таблицы нельзя.
+По умолчанию используется политика хранения `default` в которой есть один том и один диск, указанный в `<path>`.
+Изменить политику хранения после создания таблицы можно при помощи запроса [ALTER TABLE ... MODIFY SETTING]. При этом необходимо учесть, что новая политика должна содержать все тома и диски предыдущей политики с теми же именами.
+

 Количество потоков для фоновых перемещений кусков между дисками можно изменить с помощью настройки [background_move_pool_size](../../../operations/settings/settings.md#background_move_pool_size)

--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@ -1168,12 +1168,15 @@ SELECT * FROM topic1_stream;
 | `STRING`, `BINARY`            | [String](../sql-reference/data-types/string.md)           | `STRING`                      |
 | —                             | [FixedString](../sql-reference/data-types/fixedstring.md) | `STRING`                      |
 | `DECIMAL`                     | [Decimal](../sql-reference/data-types/decimal.md)         | `DECIMAL`                     |
+| `LIST`                        | [Array](../sql-reference/data-types/array.md)             | `LIST`                        |

-ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.
+Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.

-Неподдержанные типы данных Parquet: `DATE32`, `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Parquet `DECIMAL` как `Decimal128`.

-Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных, ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.
+Неподдерживаемые типы данных Parquet: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+
+Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Parquet. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.

 ### Вставка и выборка данных {#vstavka-i-vyborka-dannykh}

@ -1197,6 +1200,53 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_

 `Arrow` — это Apache Arrow's "file mode" формат. Он предназначен для произвольного доступа в памяти.

+### Соответствие типов данных {#data_types-matching-arrow}
+
+Таблица ниже содержит поддерживаемые типы данных и их соответствие [типам данных](../sql-reference/data-types/index.md) ClickHouse для запросов `INSERT` и `SELECT`.
+
+| Тип данных Arrow (`INSERT`) | Тип данных ClickHouse                               | Тип данных Arrow (`SELECT`) |
+|-----------------------------|-----------------------------------------------------|-----------------------------|
+| `UINT8`, `BOOL`             | [UInt8](../sql-reference/data-types/int-uint.md)    | `UINT8`                     |
+| `INT8`                      | [Int8](../sql-reference/data-types/int-uint.md)     | `INT8`                      |
+| `UINT16`                    | [UInt16](../sql-reference/data-types/int-uint.md)   | `UINT16`                    |
+| `INT16`                     | [Int16](../sql-reference/data-types/int-uint.md)    | `INT16`                     |
+| `UINT32`                    | [UInt32](../sql-reference/data-types/int-uint.md)   | `UINT32`                    |
+| `INT32`                     | [Int32](../sql-reference/data-types/int-uint.md)    | `INT32`                     |
+| `UINT64`                    | [UInt64](../sql-reference/data-types/int-uint.md)   | `UINT64`                    |
+| `INT64`                     | [Int64](../sql-reference/data-types/int-uint.md)    | `INT64`                     |
+| `FLOAT`, `HALF_FLOAT`       | [Float32](../sql-reference/data-types/float.md)     | `FLOAT32`                   |
+| `DOUBLE`                    | [Float64](../sql-reference/data-types/float.md)     | `FLOAT64`                   |
+| `DATE32`                    | [Date](../sql-reference/data-types/date.md)         | `UINT16`                    |
+| `DATE64`, `TIMESTAMP`       | [DateTime](../sql-reference/data-types/datetime.md) | `UINT32`                    |
+| `STRING`, `BINARY`          | [String](../sql-reference/data-types/string.md)     | `UTF8`                      |
+| `STRING`, `BINARY`          | [FixedString](../sql-reference/data-types/fixedstring.md)   | `UTF8`                        |
+| `DECIMAL`                   | [Decimal](../sql-reference/data-types/decimal.md)   | `DECIMAL`                   |
+| `LIST`                      | [Array](../sql-reference/data-types/array.md)       | `LIST`                      |
+
+Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
+
+ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных Arrow `DECIMAL` как `Decimal128`.
+
+Неподдерживаемые типы данных Arrow: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.
+
+Типы данных столбцов в ClickHouse могут отличаться от типов данных соответствующих полей файла в формате Arrow. При вставке данных ClickHouse интерпретирует типы данных в соответствии с таблицей выше, а затем [приводит](../sql-reference/functions/type-conversion-functions/#type_conversion_function-cast) данные к тому типу, который установлен для столбца таблицы.
+
+### Вставка данных {#inserting-data-arrow}
+
+Чтобы вставить в ClickHouse данные из файла в формате Arrow, используйте команду следующего вида:
+
+``` bash
+$ cat filename.arrow | clickhouse-client --query="INSERT INTO some_table FORMAT Arrow"
+```
+
+### Вывод данных {#selecting-data-arrow}
+
+Чтобы получить данные из таблицы ClickHouse и сохранить их в файл формата Arrow, используйте команду следующего вида:
+
+``` bash
+$ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Arrow" > {filename.arrow}
+```
+
 ## ArrowStream {#data-format-arrow-stream}

 `ArrowStream` — это Apache Arrow's "stream mode" формат. Он предназначен для обработки потоков в памяти.
@ -1225,9 +1275,11 @@ $ clickhouse-client --query="SELECT * FROM {some_table} FORMAT Parquet" > {some_
 | `DATE64`, `TIMESTAMP`     | [DateTime](../sql-reference/data-types/datetime.md) | `TIMESTAMP`               |
 | `STRING`, `BINARY`        | [String](../sql-reference/data-types/string.md)     | `BINARY`                  |
 | `DECIMAL`                 | [Decimal](../sql-reference/data-types/decimal.md)   | `DECIMAL`                 |
-| `-`                       | [Array](../sql-reference/data-types/array.md)       | `LIST`                    |
+| `LIST`                    | [Array](../sql-reference/data-types/array.md)       | `LIST`                    |

-ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При обработке запроса `INSERT`, ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.
+Массивы могут быть вложенными и иметь в качестве аргумента значение типа `Nullable`.
+
+ClickHouse поддерживает настраиваемую точность для формата `Decimal`. При выполнении запроса `INSERT` ClickHouse обрабатывает тип данных ORC `DECIMAL` как `Decimal128`.

 Неподдерживаемые типы данных ORC: `TIME32`, `FIXED_SIZE_BINARY`, `JSON`, `UUID`, `ENUM`.

@ -1397,4 +1449,3 @@ $ clickhouse-client --query "SELECT * FROM {some_table} FORMAT RawBLOB" | md5sum
 ``` text
 f9725a22f9191e064120d718e26862a9  -
 ```
-
--- a/docs/ru/interfaces/third-party/client-libraries.md
+++ b/docs/ru/interfaces/third-party/client-libraries.md
@ -38,6 +38,8 @@ toc_title: "Клиентские библиотеки от сторонних р
 -   Ruby
    -   [ClickHouse (Ruby)](https://github.com/shlima/click_house)
    -   [clickhouse-activerecord](https://github.com/PNixx/clickhouse-activerecord)
+-   Rust
+    -   [Klickhouse](https://github.com/Protryon/klickhouse)
 -   R
    -   [clickhouse-r](https://github.com/hannesmuehleisen/clickhouse-r)
    -   [RClickhouse](https://github.com/IMSMWU/RClickhouse)
--- a/docs/ru/operations/settings/merge-tree-settings.md
+++ b/docs/ru/operations/settings/merge-tree-settings.md
@ -1,6 +1,6 @@
 # Настройки MergeTree таблиц {#merge-tree-settings}

-Значения настроек для всех MergeTree таблиц можно посмотреть в таблице `system.merge_tree_settings`, их можно переопределить в `config.xml` в секции `merge_tree`, или задать в секции `SETTINGS` у каждой таблицы.
+Значения настроек всех MergeTree таблиц собраны в таблице `system.merge_tree_settings`. Их можно переопределить в разделе `merge_tree` файла `config.xml` или задать в секции `SETTINGS` каждой таблицы.

 Пример переопределения в `config.xml`:

@ -10,7 +10,7 @@
 </merge_tree>
 ```

-Пример для определения в `SETTINGS` у конкретной таблицы:
+Пример установки `SETTINGS` для конкретной таблицы:

 ``` sql
 CREATE TABLE foo
@ -22,7 +22,7 @@ ORDER BY tuple()
 SETTINGS max_suspicious_broken_parts = 500;
 ```

-Пример изменения настроек у конкретной таблицы командой `ALTER TABLE ... MODIFY SETTING`:
+Пример изменения настроек для конкретной таблицы при помощи команды `ALTER TABLE ... MODIFY SETTING`:

 ``` sql
 ALTER TABLE foo
@ -31,7 +31,7 @@ ALTER TABLE foo

 ## parts_to_throw_insert {#parts-to-throw-insert}

-Eсли число кусков в партиции превышает значение `parts_to_throw_insert`, INSERT прерывается с исключением `Too many parts (N). Merges are processing significantly slower than inserts`.
+Eсли число активных кусков в партиции больше значения `parts_to_throw_insert`, то INSERT прерывается с исключением: `Too many parts (N). Merges are processing significantly slower than inserts`.

 Возможные значения:

@ -39,13 +39,13 @@ Eсли число кусков в партиции превышает знач

 Значение по умолчанию: 300.

-Для достижения максимальной производительности запросов `SELECT` необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree).
+Чтобы производительность запросов `SELECT` стала максимальной, необходимо минимизировать количество обрабатываемых кусков, см. [Дизайн MergeTree](../../development/architecture.md#merge-tree).

-Можно установить большее значение 600 (1200), это уменьшит вероятность возникновения ошибки `Too many parts`, но в тоже время вы позже обнаружите возможную проблему со слияниями (например, из-за недостатка места на диске) и деградацию производительности `SELECT`.
+Можно установить значение больше — 600 (1200) кусков. Тогда ошибка `Too many parts` будет появляться реже, но при этом могут возникнуть проблемы с фоновыми слияниями и производительностью `SELECT`-запросов.

 ## parts_to_delay_insert {#parts-to-delay-insert}

-Eсли число кусков в партиции превышает значение `parts_to_delay_insert`, `INSERT` искусственно замедляется.
+Eсли число кусков в партиции больше значения `parts_to_delay_insert`, то `INSERT` искусственно замедляется.

 Возможные значения:

@ -53,31 +53,31 @@ Eсли число кусков в партиции превышает знач

 Значение по умолчанию: 150.

-ClickHouse искусственно выполняет `INSERT` дольше (добавляет ‘sleep’), чтобы фоновый механизм слияния успевал слиять куски быстрее, чем они добавляются.
+ClickHouse искусственно выполняет `INSERT` дольше (добавляет ‘sleep’) так, чтобы куски сливались в фоновом процессе быстрее, чем добавляются.

 ## inactive_parts_to_throw_insert {#inactive-parts-to-throw-insert}

-Если число неактивных кусков в партиции превышает значение `inactive_parts_to_throw_insert`, `INSERT` прерывается с исключением «Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts».
+Если число неактивных кусков в партиции больше значения `inactive_parts_to_throw_insert`, то `INSERT` прерывается с исключением `Too many inactive parts (N). Parts cleaning are processing significantly slower than inserts`.

 Возможные значения:

 -   Положительное целое число.

-Значение по умолчанию: 0 (не ограничено).
+Значение по умолчанию: 0 (без ограничений).

 ## inactive_parts_to_delay_insert {#inactive-parts-to-delay-insert}

-Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, `INSERT` искусственно замедляется. Это полезно, когда сервер не может быстро очистить неактивные куски.
+Если число неактивных кусков в партиции больше или равно значению `inactive_parts_to_delay_insert`, то `INSERT` искусственно замедляется. Это помогает, когда сервер не может быстро очистить неактивные куски.

 Возможные значения:

 -   Положительное целое число.

-Значение по умолчанию: 0 (не ограничено).
+Значение по умолчанию: 0 (без ограничений).

 ## max_delay_to_insert {#max-delay-to-insert}

-Величина в секундах, которая используется для расчета задержки `INSERT`, если число кусков в партиции превышает значение [parts_to_delay_insert](#parts-to-delay-insert).
+Величина в секундах, которая используется для расчета задержки `INSERT` в случаях, когда число кусков в партиции больше значения [parts_to_delay_insert](#parts-to-delay-insert).

 Возможные значения:

@ -93,11 +93,11 @@ k = 1 + parts_count_in_partition - parts_to_delay_insert
 delay_milliseconds = pow(max_delay_to_insert * 1000, k / max_k)
 ```

-Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, max_delay_to_insert = 1, `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд.
+Т.е. если в партиции уже 299 кусков и parts_to_throw_insert = 300, parts_to_delay_insert = 150, а max_delay_to_insert = 1, то `INSERT` замедлится на `pow( 1 * 1000, (1 + 299 - 150) / (300 - 150) ) = 1000` миллисекунд.

 ## max_parts_in_total {#max-parts-in-total}

-Eсли суммарное число активных кусков во всех партициях таблицы превышает значение `max_parts_in_total`, INSERT прерывается с исключением `Too many parts (N)`.
+Eсли суммарное число активных кусков во всех партициях таблицы больше значения `max_parts_in_total`, то INSERT прерывается с исключением `Too many parts (N)`.

 Возможные значения:

@ -105,20 +105,22 @@ Eсли суммарное число активных кусков во все

 Значение по умолчанию: 100000.

-Большое число кусков в таблице снижает производительность запросов ClickHouse и увеличивает время старта ClickHouse. Чаще всего это следствие неправильного дизайна (ошибки при выборе стратегии партиционирования -- слишком мелкие партиции).
+С большим числом кусков в таблице производительность запросов ClickHouse снижается, а время старта ClickHouse — увеличивается. Чаще всего это следствие неправильного дизайна (ошибки выбора стратегии партиционирования, например, слишком мелкие партиции).

 ## replicated_deduplication_window {#replicated-deduplication-window}

-Количество хеш-сумм последних вставленных блоков, хранящихся в Zookeeper.
+Количество хеш-сумм последних вставленных блоков, которые хранятся в Zookeeper.

 Возможные значения:

 -   Положительное целое число.
+-   0 (без ограничений).

 Значение по умолчанию: 100.

-Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся хеш-суммы не всех кусков, а только последние `replicated_deduplication_window`. Наиболее старые хеш-суммы удаляются из Zookeeper.
-Большое число `replicated_deduplication_window` замедляет `Insert`-ы. Хеш-сумма рассчитывается от композиции имен и типов полей, а также данных вставленного куска (потока байт).
+Команда `Insert` создает один или несколько блоков (кусков). При вставке в Replicated таблицы ClickHouse для [дедупликации вставок](../../engines/table-engines/mergetree-family/replication.md) записывает в Zookeeper хеш-суммы созданных кусков. Но хранятся только последние `replicated_deduplication_window` хеш-сумм. Самые старые хеш-суммы удаляются из Zookeeper.
+Большое значение `replicated_deduplication_window` замедляет `Insert`, так как приходится сравнивать большее количество хеш-сумм. 
+Хеш-сумма рассчитывается по названиям и типам полей, а также по данным вставленного куска (потока байт).

 ## non_replicated_deduplication_window {#non-replicated-deduplication-window}

@ -135,7 +137,7 @@ Eсли суммарное число активных кусков во все

 ## replicated_deduplication_window_seconds {#replicated-deduplication-window-seconds}

-Число секунд, после которых хеш-суммы вставленных блоков удаляются из Zookeeper.
+Время хранения (в секундах) хеш-сумм вставленных блоков в Zookeeper. По истечении этого времени хеш-суммы удаляются.

 Возможные значения:

@ -143,11 +145,11 @@ Eсли суммарное число активных кусков во все

 Значение по умолчанию: 604800 (1 неделя).

-Аналогично [replicated_deduplication_window](#replicated-deduplication-window), задает, сколько времени хранить хеш-суммы блоков для дедупликции `Insert`-в. Хеш-суммы старше `replicated_deduplication_window_seconds` удаляются из Zookeeper, даже если их меньше чем `replicated_deduplication_window`.
+Аналогично [replicated_deduplication_window](#replicated-deduplication-window), настройка `replicated_deduplication_window_seconds` задает время хранения хеш-сумм блоков для дедупликции `Insert`. Хеш-суммы старше значения `replicated_deduplication_window_seconds` удаляются из Zookeeper, даже если количество оставшихся хеш-сумм станет меньше чем `replicated_deduplication_window`.

 ## old_parts_lifetime {#old-parts-lifetime}

-Время (в секундах) хранения неактивных кусков, для защиты от потери данных при спонтанной перезагрузке сервера или О.С.
+Время (в секундах) хранения неактивных кусков для защиты от потери данных при спонтанной перезагрузке сервера.

 Возможные значения:

@ -155,12 +157,16 @@ Eсли суммарное число активных кусков во все

 Значение по умолчанию: 480.

-После слияния нескольких кусков в новый кусок, ClickHouse помечает исходные куски как неактивные и удаляет их после `old_parts_lifetime` секунд.
-Неактивные куски удаляются, если они не используются в текущих запросах, т.е. если счетчик ссылок куска – `refcount` равен нулю.
+После объединения нескольких кусков в один новый ClickHouse помечает исходные куски как неактивные и удаляет их по прошествии `old_parts_lifetime` секунд.
+Неактивные куски удаляются, если они не нужны для текущих запросов, т.е. если счетчик ссылок куска `refcount` имеет нулевое значение.

-Неактивные куски удаляются не сразу, потому что при записи нового куска не вызывается `fsync`, т.е. некоторое время новый кусок находится только в оперативной памяти сервера (кеше О.С.). Т.о. при спонтанной перезагрузке сервера новый (смерженный) кусок может быть потерян или испорчен. В этом случае ClickHouse в процессе старта при проверке целостности кусков обнаружит проблему, вернет неактивные куски в список активных и позже заново их смержит. Сломанный кусок в этом случае переименовывается (добавляется префикс broken_) и перемещается в папку detached. Если проверка целостности не обнаруживает проблем в смерженном куске, то исходные неактивные куски переименовываются (добавляется префикс ignored_) и перемещаются в папку detached.
+При записи нового куска `fsync` не вызывается, поэтому неактивные куски удаляются позже. Это значит, что некоторое время новый кусок находится только в оперативной памяти сервера (кеш ОС). Если сервер перезагрузится спонтанно, новый слитый кусок может испортиться или потеряться.

-Стандартное значение Linux dirty_expire_centisecs - 30 секунд (максимальное время, которое записанные данные хранятся только в оперативной памяти), но при больших нагрузках на дисковую систему, данные могут быть записаны намного позже. Экспериментально было найдено время - 480 секунд, за которое гарантированно новый кусок будет записан на диск.
+Во время запуска сервер ClickHouse проверяет целостность кусков. 
+Если новый (слитый) кусок поврежден, ClickHouse возвращает неактивные куски в список активных и позже снова выполняет слияние. В этом случае испорченный кусок получает новое имя (добавляется префикс `broken_`) и попадает в каталог `detached`. 
+Если проверка целостности не выявляет проблем в слитом куске, то исходные неактивные куски переименовываются (добавляется префикс `ignored_`) и перемещаются в каталог `detached`.
+
+Стандартное для Linux значение `dirty_expire_centisecs` — 30 секунд. Это максимальное время, в течение которого записанные данные хранятся только в оперативной памяти. Если нагрузка на дисковую систему большая, то данные записываются намного позже. Значение 480 секунд подобрали экспериментальным путем — это время, за которое новый кусок гарантированно запишется на диск.

 ## replicated_fetches_http_connection_timeout {#replicated_fetches_http_connection_timeout}

@ -197,8 +203,8 @@ Eсли суммарное число активных кусков во все

 ## max_bytes_to_merge_at_max_space_in_pool {#max-bytes-to-merge-at-max-space-in-pool}

-Максимальный суммарный размер кусков (в байтах) в одном слиянии, при наличии свободных ресурсов в фоновом пуле.
-`max_bytes_to_merge_at_max_space_in_pool` -- примерно соответствует максимально возможному размеру куска, созданного автоматическим фоновым слиянием.
+Максимальный суммарный размер кусков (в байтах) в одном слиянии, если есть свободные ресурсы в фоновом пуле.
+`max_bytes_to_merge_at_max_space_in_pool` примерно соответствует максимально возможному размеру куска, созданного автоматическим фоновым слиянием.

 Возможные значения:

@ -206,26 +212,27 @@ Eсли суммарное число активных кусков во все

 Значение по умолчанию: 161061273600 (150ГБ).

-Планировщик мержей периодически анализирует размер и количество кусков в партициях, и при достаточном количестве свободных ресурсов в фоновом пуле начинает фоновое слияние. Слияния происходят до тех пор, пока суммарный размер входных кусков не достигнет `max_bytes_to_merge_at_max_space_in_pool`.
+Планировщик слияний периодически анализирует размер и количество кусков в партициях, и если в пуле хватает ресурсов, то начинает фоновое слияние. Слияния выполняются до тех пор, пока суммарный размер входных кусков не достигнет `max_bytes_to_merge_at_max_space_in_pool`.

-Слияния, инициированные `optimize final`, не учитывают `max_bytes_to_merge_at_max_space_in_pool` и размеры кусков и слияют куски только с учетом наличия ресурсов в фоновом пуле, пока не останется один кусок в партиции.
+Слияния, начатые по [OPTIMIZE FINAL](../../sql-reference/statements/optimize.md), не учитывают `max_bytes_to_merge_at_max_space_in_pool` и объединяют куски пока есть доступные ресурсы (свободное дисковое пространство) до тех пор, пока в партиции не останется один кусок.

 ## max_bytes_to_merge_at_min_space_in_pool {#max-bytes-to-merge-at-min-space-in-pool}

-Максимальный суммарный размер кусков (в байтах) в одном слиянии, при минимальных свободных ресурсах в фоновом пуле.
+Максимальный суммарный размер кусков (в байтах) в одном слиянии при минимуме свободных ресурсов в фоновом пуле.

 Возможные значения:

 -   Положительное целое число.

-Значение по умолчанию: 1048576
+Значение по умолчанию: 1048576 (1 МБ).

-`max_bytes_to_merge_at_min_space_in_pool` задает максимальный суммарный размер кусков, для которых можно начать слияние, несмотря на недостаток свободных ресурсов в фоновом пуле (дискового пространства). Это необходимо, чтобы уменьшить количество маленьких кусков и вероятность ошибки `Too many parts`.
-Слияния резервируют дисковое пространство, удваивая суммарный размер кусков в слиянии. Таким образом, при малом количестве свободного места на диске может сложится ситуация, что свободное место есть, но оно уже зарезервировано идущими слиянияними, поэтому другие слияния не могут начаться, и количество маленьких кусков в партиции растет с каждым инсертом.
+`max_bytes_to_merge_at_min_space_in_pool` задает максимальный суммарный размер кусков, которые можно объединить несмотря на нехватку свободных ресурсов (дискового пространства) в фоновом пуле. Это нужно, чтобы уменьшить количество маленьких кусков и снизить вероятность ошибки `Too many parts`.
+
+Слияния резервируют дисковое пространство, удваивая суммарный размер кусков в слиянии. Поэтому при малом объеме свободного места на диске может сложиться ситуация, когда свободное место есть, но оно уже зарезервировано текущими слияниями. Из-за этого другие слияния не начинаются, и количество маленьких кусков в партиции растет с каждым запросом `INSERT`.

 ## merge_max_block_size {#merge-max-block-size}

-Количество строк в блоках, которые читаются из слияемых кусков.
+Количество строк в блоках, которые читаются из объединяемых кусков.

 Возможные значения:

@ -233,7 +240,7 @@ Eсли суммарное число активных кусков во все

 Значение по умолчанию: 8192

-Слияние читает строки из кусков блоками по `merge_max_block_size` строк, производит слияние и пишет результат в новый кусок. Читаемый блок помещается в оперативную память, т.е. `merge_max_block_size` влияет на размер оперативной памяти, необходимой для слияния. Таким образом, слияния могут потреблять большое количество оперативной памяти для таблиц, хранящих очень большие строки (если средний размер строки 100кб, то при слиянии 10 кусков будет использовано (100кб * 10 * 8192) =~ 8ГБ ОЗУ). Уменьшив `merge_max_block_size`, можно сократить размер оперативной памяти, необходимой для слияния.
+Слияние читает строки из кусков блоками по `merge_max_block_size` строк, производит слияние и записывает результат в новый кусок. Читаемый блок помещается в оперативную память, т.е. `merge_max_block_size` влияет на размер оперативной памяти, необходимой для слияния. Таким образом, слияния могут потреблять большое количество оперативной памяти для таблиц, хранящих очень большие строки (если средний размер строки 100кб, то при слиянии 10 кусков будет использовано (100кб * 10 * 8192) =~ 8ГБ оперативной памяти). Уменьшив `merge_max_block_size`, можно сократить размер оперативной памяти, необходимой для слияния, но при этом процесс слияния замедлится.

 ## max_part_loading_threads {#max-part-loading-threads}

@ -243,9 +250,9 @@ Eсли суммарное число активных кусков во все

 -   Положительное целое число.

-Значение по умолчанию: auto (количество ядер процессора).
+Значение по умолчанию: определяется автоматически (по количеству ядер процессора).

-При старте ClickHouse читает все куски всех таблиц (читает файлы с метаданными кусков), чтобы построить в ОЗУ список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время, и это время можно сократить, увеличив `max_part_loading_threads` (если при этом процессе есть недозагруженность CPU и диска).
+На старте ClickHouse читает все куски из всех таблиц (читает файлы с метаданными кусков), чтобы построить в оперативной памяти список всех кусков. В некоторых системах с большим количеством кусков этот процесс может занимать длительное время. Это время можно сократить, увеличив `max_part_loading_threads` (если при этом хватает ресурсов процессора и диска).

 ## max_partitions_to_read {#max-partitions-to-read}

--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -2078,7 +2078,7 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;

 -   Положительное целое число.

-Значение по умолчанию: 16.
+Значение по умолчанию: 128.

 ## background_fetches_pool_size {#background_fetches_pool_size}

@ -2376,18 +2376,6 @@ SELECT * FROM system.events WHERE event='QueryMemoryLimitExceeded';
 └──────────────────────────┴───────┴───────────────────────────────────────────────────────┘
 ```

-## allow_experimental_bigint_types {#allow_experimental_bigint_types}
-
-Включает или отключает поддержку целочисленных значений, превышающих максимальное значение, допустимое для типа `int`.
-
-Возможные значения:
-
-   1 — большие целочисленные значения поддерживаются.
-   0 — большие целочисленные значения не поддерживаются.
-
-Значение по умолчанию: `0`.
-
-
 ## lock_acquire_timeout {#lock_acquire_timeout}

 Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы.
--- a/docs/ru/operations/system-tables/trace_log.md
+++ b/docs/ru/operations/system-tables/trace_log.md
@ -2,7 +2,7 @@

 Содержит экземпляры трассировки стека адресов вызова, собранные с помощью семплирующего профайлера запросов.

-ClickHouse создает эту таблицу когда утсановлена настройка [trace_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) в конфигурационном файле сервереа. А также настройки [query_profiler_real_time_period_ns](../settings/settings.md#query_profiler_real_time_period_ns) и [query_profiler_cpu_time_period_ns](../settings/settings.md#query_profiler_cpu_time_period_ns).
+ClickHouse создает эту таблицу когда установлена настройка [trace_log](../server-configuration-parameters/settings.md#server_configuration_parameters-trace_log) в конфигурационном файле сервера. А также настройки [query_profiler_real_time_period_ns](../settings/settings.md#query_profiler_real_time_period_ns) и [query_profiler_cpu_time_period_ns](../settings/settings.md#query_profiler_cpu_time_period_ns).

 Для анализа stack traces, используйте функции интроспекции `addressToLine`, `addressToSymbol` и `demangle`.

--- a/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/ru/sql-reference/aggregate-functions/parametric-functions.md
@ -116,7 +116,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)

 -   `.*` — соответствует любому количеству событий. Для этого элемента шаблона не надо задавать условия.

-   `(?t operator value)` — устанавливает время в секундах, которое должно разделять два события. Например, шаблон `(?1)(?t>1800)(?2)` соответствует событиям, которые произошли более чем через 1800 секунд друг от друга. Между этими событиями может находиться произвольное количество любых событий. Операторы могут быть `>=`, `>`, `<`, `<=`.
+-   `(?t operator value)` — устанавливает время в секундах, которое должно разделять два события. Например, шаблон `(?1)(?t>1800)(?2)` соответствует событиям, которые произошли более чем через 1800 секунд друг от друга. Между этими событиями может находиться произвольное количество любых событий. Операторы могут быть `>=`, `>`, `<`, `<=`, `==`.

 **Примеры**

@ -496,3 +496,258 @@ FROM
 Решение: пишем в запросе GROUP BY SearchPhrase HAVING uniqUpTo(4)(UserID) >= 5
 ```

+## sequenceNextNode {#sequenceNextNode}
+
+Возвращает значение следующего события, соответствующего цепочке событий.
+
+_Экспериментальная функция, чтобы включить ее, выполните: `SET allow_experimental_funnel_functions = 1`._
+
+**Синтаксис**
+
+``` sql
+sequenceNextNode(direction, base)(timestamp, event_column, base_condition, event1, event2, event3, ...)
+```
+
+**Параметры**
+
+-   `direction` — используется для навигации по направлениям.
+    - forward — двигаться вперед.
+    - backward — двигаться назад.
+
+-   `base` — используется для задания начальной точки.
+    - head — установить начальную точку на первое событие цепочки.
+    - tail — установить начальную точку на последнее событие цепочки.
+    - first_match — установить начальную точку на первое соответствующее событие `event1`.
+    - last_match — установить начальную точку на последнее соответствующее событие `event1`.
+    
+**Аргументы**
+
+-   `timestamp` — название столбца, содержащего `timestamp`. Поддерживаемые типы данных: [Date](../../sql-reference/data-types/date.md), [DateTime](../../sql-reference/data-types/datetime.md#data_type-datetime) и другие беззнаковые целые типы.
+-   `event_column` — название столбца, содержащего значение следующего возвращаемого события. Поддерживаемые типы данных: [String](../../sql-reference/data-types/string.md) и [Nullable(String)](../../sql-reference/data-types/nullable.md).
+-   `base_condition` — условие, которому должна соответствовать исходная точка.
+-   `event1`, `event2`, ... — условия, описывающие цепочку событий. [UInt8](../../sql-reference/data-types/int-uint.md).
+
+**Возвращаемые значения**
+
+-  `event_column[next_index]` — если есть совпадение с шаблоном и существует следующее значение.
+-  `NULL` — если нет совпадений с шаблоном или следующего значения не существует.
+
+Тип: [Nullable(String)](../../sql-reference/data-types/nullable.md).
+
+**Пример**
+
+Функцию можно использовать, если есть цепочка событий A->B->C->D->E, и вы хотите определить событие, следующее за B->C, то есть D.
+
+Запрос ищет событие после A->B:
+
+``` sql
+CREATE TABLE test_flow (
+    dt DateTime, 
+    id int, 
+    page String)
+ENGINE = MergeTree() 
+PARTITION BY toYYYYMMDD(dt) 
+ORDER BY id;
+
+INSERT INTO test_flow VALUES (1, 1, 'A') (2, 1, 'B') (3, 1, 'C') (4, 1, 'D') (5, 1, 'E');
+
+SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'A', page = 'A', page = 'B') as next_flow FROM test_flow GROUP BY id;
+```
+
+Результат:
+
+``` text
+┌─id─┬─next_flow─┐
+│  1 │ C         │
+└────┴───────────┘
+```
+
+**Поведение для `forward` и `head`**
+
+``` sql
+ALTER TABLE test_flow DELETE WHERE 1 = 1 settings mutations_sync = 1;
+
+INSERT INTO test_flow VALUES (1, 1, 'Home') (2, 1, 'Gift') (3, 1, 'Exit');
+INSERT INTO test_flow VALUES (1, 2, 'Home') (2, 2, 'Home') (3, 2, 'Gift') (4, 2, 'Basket');
+INSERT INTO test_flow VALUES (1, 3, 'Gift') (2, 3, 'Home') (3, 3, 'Gift') (4, 3, 'Basket');
+```
+
+``` sql
+SELECT id, sequenceNextNode('forward', 'head')(dt, page, page = 'Home', page = 'Home', page = 'Gift') FROM test_flow GROUP BY id;
+ 
+                  dt   id   page
+ 1970-01-01 09:00:01    1   Home // Исходная точка, совпадение с Home
+ 1970-01-01 09:00:02    1   Gift // Совпадение с Gift
+ 1970-01-01 09:00:03    1   Exit // Результат 
+
+ 1970-01-01 09:00:01    2   Home // Исходная точка, совпадение с Home
+ 1970-01-01 09:00:02    2   Home // Несовпадение с Gift
+ 1970-01-01 09:00:03    2   Gift
+ 1970-01-01 09:00:04    2   Basket    
+ 
+ 1970-01-01 09:00:01    3   Gift // Исходная точка, несовпадение с Home
+ 1970-01-01 09:00:02    3   Home      
+ 1970-01-01 09:00:03    3   Gift      
+ 1970-01-01 09:00:04    3   Basket    
+```
+
+**Поведение для `backward` и `tail`**
+
+``` sql
+SELECT id, sequenceNextNode('backward', 'tail')(dt, page, page = 'Basket', page = 'Basket', page = 'Gift') FROM test_flow GROUP BY id;
+
+                 dt   id   page
+1970-01-01 09:00:01    1   Home
+1970-01-01 09:00:02    1   Gift
+1970-01-01 09:00:03    1   Exit // Исходная точка, несовпадение с Basket
+                                     
+1970-01-01 09:00:01    2   Home 
+1970-01-01 09:00:02    2   Home // Результат
+1970-01-01 09:00:03    2   Gift // Совпадение с Gift
+1970-01-01 09:00:04    2   Basket // Исходная точка, совпадение с Basket
+                                     
+1970-01-01 09:00:01    3   Gift
+1970-01-01 09:00:02    3   Home // Результат
+1970-01-01 09:00:03    3   Gift // Исходная точка, совпадение с Gift
+1970-01-01 09:00:04    3   Basket // Исходная точка, совпадение с Basket
+```
+
+
+**Поведение для `forward` и `first_match`**
+
+``` sql
+SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;
+
+                 dt   id   page
+1970-01-01 09:00:01    1   Home
+1970-01-01 09:00:02    1   Gift // Исходная точка
+1970-01-01 09:00:03    1   Exit // Результат
+                                     
+1970-01-01 09:00:01    2   Home 
+1970-01-01 09:00:02    2   Home 
+1970-01-01 09:00:03    2   Gift // Исходная точка
+1970-01-01 09:00:04    2   Basket  Результат
+                                     
+1970-01-01 09:00:01    3   Gift // Исходная точка
+1970-01-01 09:00:02    3   Home // Результат
+1970-01-01 09:00:03    3   Gift   
+1970-01-01 09:00:04    3   Basket    
+```
+
+``` sql
+SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;
+
+                 dt   id   page
+1970-01-01 09:00:01    1   Home
+1970-01-01 09:00:02    1   Gift // Исходная точка
+1970-01-01 09:00:03    1   Exit // Несовпадение с Home
+                                     
+1970-01-01 09:00:01    2   Home 
+1970-01-01 09:00:02    2   Home 
+1970-01-01 09:00:03    2   Gift // Исходная точка
+1970-01-01 09:00:04    2   Basket // Несовпадение с Home
+                                     
+1970-01-01 09:00:01    3   Gift // Исходная точка
+1970-01-01 09:00:02    3   Home // Совпадение с Home
+1970-01-01 09:00:03    3   Gift // Результат
+1970-01-01 09:00:04    3   Basket    
+```
+
+
+**Поведение для `backward` и `last_match`**
+
+``` sql
+SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift') FROM test_flow GROUP BY id;
+
+                 dt   id   page
+1970-01-01 09:00:01    1   Home // Результат
+1970-01-01 09:00:02    1   Gift // Исходная точка
+1970-01-01 09:00:03    1   Exit 
+                                     
+1970-01-01 09:00:01    2   Home 
+1970-01-01 09:00:02    2   Home // Результат
+1970-01-01 09:00:03    2   Gift // Исходная точка
+1970-01-01 09:00:04    2   Basket    
+                                     
+1970-01-01 09:00:01    3   Gift 
+1970-01-01 09:00:02    3   Home // Результат
+1970-01-01 09:00:03    3   Gift // Исходная точка
+1970-01-01 09:00:04    3   Basket    
+```
+
+``` sql
+SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, page = 'Gift', page = 'Gift', page = 'Home') FROM test_flow GROUP BY id;
+
+                 dt   id   page
+1970-01-01 09:00:01    1   Home // Совпадение с Home, результат `Null`
+1970-01-01 09:00:02    1   Gift // Исходная точка
+1970-01-01 09:00:03    1   Exit 
+                                     
+1970-01-01 09:00:01    2   Home // Результат
+1970-01-01 09:00:02    2   Home // Совпадение с Home
+1970-01-01 09:00:03    2   Gift // Исходная точка
+1970-01-01 09:00:04    2   Basket    
+                                     
+1970-01-01 09:00:01    3   Gift // Результат
+1970-01-01 09:00:02    3   Home // Совпадение с Home
+1970-01-01 09:00:03    3   Gift // Исходная точка 
+1970-01-01 09:00:04    3   Basket    
+```
+
+
+**Поведение для `base_condition`**
+
+``` sql
+CREATE TABLE test_flow_basecond
+(
+    `dt` DateTime,
+    `id` int,
+    `page` String,
+    `ref` String
+)
+ENGINE = MergeTree
+PARTITION BY toYYYYMMDD(dt)
+ORDER BY id;
+
+INSERT INTO test_flow_basecond VALUES (1, 1, 'A', 'ref4') (2, 1, 'A', 'ref3') (3, 1, 'B', 'ref2') (4, 1, 'B', 'ref1');
+```
+
+``` sql
+SELECT id, sequenceNextNode('forward', 'head')(dt, page, ref = 'ref1', page = 'A') FROM test_flow_basecond GROUP BY id;
+
+                  dt   id   page   ref 
+ 1970-01-01 09:00:01    1   A      ref4 // Начало не может быть исходной точкой, поскольку столбец ref не соответствует 'ref1'.
+ 1970-01-01 09:00:02    1   A      ref3 
+ 1970-01-01 09:00:03    1   B      ref2 
+ 1970-01-01 09:00:04    1   B      ref1 
+ ```
+
+``` sql
+SELECT id, sequenceNextNode('backward', 'tail')(dt, page, ref = 'ref4', page = 'B') FROM test_flow_basecond GROUP BY id;
+
+                  dt   id   page   ref 
+ 1970-01-01 09:00:01    1   A      ref4
+ 1970-01-01 09:00:02    1   A      ref3 
+ 1970-01-01 09:00:03    1   B      ref2 
+ 1970-01-01 09:00:04    1   B      ref1 // Конец не может быть исходной точкой, поскольку столбец ref не соответствует 'ref4'.
+```
+
+``` sql
+SELECT id, sequenceNextNode('forward', 'first_match')(dt, page, ref = 'ref3', page = 'A') FROM test_flow_basecond GROUP BY id;
+
+                  dt   id   page   ref 
+ 1970-01-01 09:00:01    1   A      ref4 // Эта строка не может быть исходной точкой, поскольку столбец ref не соответствует 'ref3'.
+ 1970-01-01 09:00:02    1   A      ref3 // Исходная точка
+ 1970-01-01 09:00:03    1   B      ref2 // Результат
+ 1970-01-01 09:00:04    1   B      ref1 
+```
+
+``` sql
+SELECT id, sequenceNextNode('backward', 'last_match')(dt, page, ref = 'ref2', page = 'B') FROM test_flow_basecond GROUP BY id;
+
+                  dt   id   page   ref 
+ 1970-01-01 09:00:01    1   A      ref4
+ 1970-01-01 09:00:02    1   A      ref3 // Результат
+ 1970-01-01 09:00:03    1   B      ref2 // Исходная точка
+ 1970-01-01 09:00:04    1   B      ref1 // Эта строка не может быть исходной точкой, поскольку столбец ref не соответствует 'ref2'. 
+```
--- a/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
+++ b/docs/ru/sql-reference/aggregate-functions/reference/quantiletdigestweighted.md
@ -21,7 +21,7 @@ toc_priority: 208
 quantileTDigestWeighted(level)(expr, weight)
 ```

-Алиас: `medianTDigest`.
+Синоним: `medianTDigestWeighted`.

 **Аргументы**

--- a/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
+++ b/docs/ru/sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md
@ -61,6 +61,7 @@ SETTINGS(format_csv_allow_single_quotes = 0)

 -   [Локальный файл](#dicts-external_dicts_dict_sources-local_file)
 -   [Исполняемый файл](#dicts-external_dicts_dict_sources-executable)
+-   [Исполняемый пул](#dicts-external_dicts_dict_sources-executable_pool)
 -   [HTTP(s)](#dicts-external_dicts_dict_sources-http)
 -   СУБД:
    -   [ODBC](#dicts-external_dicts_dict_sources-odbc)
@ -69,6 +70,7 @@ SETTINGS(format_csv_allow_single_quotes = 0)
    -   [ClickHouse](#dicts-external_dicts_dict_sources-clickhouse)
    -   [MongoDB](#dicts-external_dicts_dict_sources-mongodb)
    -   [Redis](#dicts-external_dicts_dict_sources-redis)
+    -   [Cassandra](#dicts-external_dicts_dict_sources-cassandra)
    -   [PostgreSQL](#dicts-external_dicts_dict_sources-postgresql)

 ## Локальный файл {#dicts-external_dicts_dict_sources-local_file}
@ -93,7 +95,7 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
 Поля настройки:

 -   `path` — абсолютный путь к файлу.
-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе [Форматы](../../../interfaces/formats.md#formats).

 Если словарь с источником `FILE` создается с помощью DDL-команды (`CREATE DICTIONARY ...`), источник словаря должен быть расположен в каталоге `user_files`. Иначе пользователи базы данных будут иметь доступ к произвольному файлу на узле ClickHouse.

@ -112,6 +114,7 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
    <executable>
        <command>cat /opt/dictionaries/os.tsv</command>
        <format>TabSeparated</format>
+        <implicit_key>false</implicit_key>
    </executable>
 </source>
 ```
@ -119,9 +122,41 @@ SOURCE(FILE(path './user_files/os.tsv' format 'TabSeparated'))
 Поля настройки:

 -   `command` — абсолютный путь к исполняемому файлу или имя файла (если каталог программы прописан в `PATH`).
-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе «[Форматы](../../../interfaces/formats.md#formats)».
+-   `format` — формат файла. Поддерживаются все форматы, описанные в разделе [Форматы](../../../interfaces/formats.md#formats).
+-   `implicit_key` — исходный исполняемый файл может возвращать только значения, а соответствие запрошенным ключам определено неявно — порядком строк в результате. Значение по умолчанию: false. Необязательный параметр.

-Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL отключено. Иначе пользователь базы данных сможет выполнить произвольный бинарный файл на узле ClickHouse.
+Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL запрещено. Иначе пользователь сможет выполнить произвольный бинарный файл на сервере ClickHouse.
+
+## Исполняемый пул {#dicts-external_dicts_dict_sources-executable_pool}
+
+Исполняемый пул позволяет загружать данные из пула процессов. Этот источник не работает со словарями, которые требуют загрузки всех данных из источника. Исполняемый пул работает словарями, которые размещаются [следующими способами](external-dicts-dict-layout.md#ways-to-store-dictionaries-in-memory): `cache`, `complex_key_cache`, `ssd_cache`, `complex_key_ssd_cache`, `direct`, `complex_key_direct`. 
+
+Исполняемый пул генерирует пул процессов с помощью указанной команды и оставляет их активными, пока они не завершатся. Программа считывает данные из потока STDIN пока он доступен и выводит результат в поток STDOUT, а затем ожидает следующего блока данных из STDIN. ClickHouse не закрывает поток STDIN после обработки блока данных и отправляет в него следующую порцию данных, когда это требуется. Исполняемый скрипт должен быть готов к такому способу обработки данных — он должен заранее опрашивать STDIN и отправлять данные в STDOUT.
+
+Пример настройки:
+
+``` xml
+<source>
+    <executable_pool>
+        <command><command>while read key; do printf "$key\tData for key $key\n"; done</command</command>
+        <format>TabSeparated</format>
+        <pool_size>10</pool_size>
+        <max_command_execution_time>10<max_command_execution_time>
+        <implicit_key>false</implicit_key>
+    </executable_pool>
+</source>
+```
+
+Поля настройки:
+
+-   `command` — абсолютный путь к файлу или имя файла (если каталог программы записан в `PATH`).
+-   `format` — формат файла. Поддерживаются все форматы, описанные в “[Форматы](../../../interfaces/formats.md#formats)”.
+-   `pool_size` — размер пула. Если в поле `pool_size` указан 0, то размер пула не ограничен.
+-   `command_termination_timeout` — скрипт исполняемого пула должен включать основной цикл чтения-записи. После уничтожения словаря канал закрывается. При этом исполняемый файл имеет `command_termination_timeout` секунд для завершения работы, прежде чем ClickHouse пошлет сигнал SIGTERM дочернему процессу. Указывается в секундах. Значение по умолчанию: 10. Необязательный параметр.
+-   `max_command_execution_time` — максимальное количество времени для исполняемого скрипта на обработку блока данных. Указывается в секундах. Значение по умолчанию: 10. Необязательный параметр.
+-   `implicit_key` — исходный исполняемый файл может возвращать только значения, а соответствие запрошенным ключам определено неявно — порядком строк в результате. Значение по умолчанию: false. Необязательный параметр.
+
+Этот источник словаря может быть настроен только с помощью XML-конфигурации. Создание словарей с исполняемым источником с помощью DDL запрещено. Иначе пользователь сможет выполнить произвольный бинарный файл на сервере ClickHouse.

 ## HTTP(s) {#dicts-external_dicts_dict_sources-http}

@ -729,4 +764,3 @@ Setting fields:
 -   `where` – Условие выборки. Синтаксис для условий такой же как для `WHERE` выражения в PostgreSQL, для примера, `id > 10 AND id < 20`. Необязательный параметр.
 -   `invalidate_query` – Запрос для проверки условия загрузки словаря. Необязательный параметр. Читайте больше в разделе [Обновление словарей](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-lifetime.md).

-
--- a/docs/ru/sql-reference/operators/index.md
+++ b/docs/ru/sql-reference/operators/index.md
@ -189,6 +189,23 @@ SELECT now() AS current_date_time, current_date_time + INTERVAL '4' day + INTERV
 └─────────────────────┴────────────────────────────────────────────────────────────┘
 ```

+Вы можете изменить дату, не используя синтаксис `INTERVAL`, а просто добавив или отняв секунды, минуты и часы. Например, чтобы передвинуть дату на один день вперед, можно прибавить к ней значение `60*60*24`.
+
+!!! note "Примечание"
+    Синтаксис `INTERVAL` или функция `addDays` предпочтительнее для работы с датами. Сложение с числом (например, синтаксис `now() + ...`) не учитывает региональные настройки времени, например, переход на летнее время. 
+
+Пример:
+
+``` sql
+SELECT toDateTime('2014-10-26 00:00:00', 'Europe/Moscow') AS time, time + 60 * 60 * 24 AS time_plus_24_hours, time + toIntervalDay(1) AS time_plus_1_day;
+```
+
+``` text
+┌────────────────time─┬──time_plus_24_hours─┬─────time_plus_1_day─┐
+│ 2014-10-26 00:00:00 │ 2014-10-26 23:00:00 │ 2014-10-27 00:00:00 │
+└─────────────────────┴─────────────────────┴─────────────────────┘
+```
+
 **Смотрите также**

 -   Тип данных [Interval](../../sql-reference/operators/index.md)
@ -296,4 +313,3 @@ SELECT * FROM t_null WHERE y IS NOT NULL
 │ 2 │ 3 │
 └───┴───┘
 ```
-
--- a/docs/ru/sql-reference/statements/show.md
+++ b/docs/ru/sql-reference/statements/show.md
@ -362,6 +362,79 @@ SHOW [CURRENT] QUOTA
 SHOW ACCESS
 ```

+## SHOW CLUSTER(s) {#show-cluster-statement}
+
+Возвращает список кластеров. Все доступные кластеры перечислены в таблице [system.clusters](../../operations/system-tables/clusters.md).
+
+!!! info "Note"
+    По запросу `SHOW CLUSTER name` вы получите содержимое таблицы system.clusters для этого кластера.
+
+### Синтаксис {#show-cluster-syntax}
+
+``` sql
+SHOW CLUSTER '<name>'
+SHOW CLUSTERS [LIKE|NOT LIKE '<pattern>'] [LIMIT <N>]
+```
+### Примеры {#show-cluster-examples}
+
+Запрос:
+
+``` sql
+SHOW CLUSTERS;
+```
+
+Результат:
+
+```text
+┌─cluster──────────────────────────────────────┐
+│ test_cluster_two_shards                      │
+│ test_cluster_two_shards_internal_replication │
+│ test_cluster_two_shards_localhost            │
+│ test_shard_localhost                         │
+│ test_shard_localhost_secure                  │
+│ test_unavailable_shard                       │
+└──────────────────────────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SHOW CLUSTERS LIKE 'test%' LIMIT 1;
+```
+
+Результат:
+
+```text
+┌─cluster─────────────────┐
+│ test_cluster_two_shards │
+└─────────────────────────┘
+```
+
+Запрос:
+
+``` sql
+SHOW CLUSTER 'test_shard_localhost' FORMAT Vertical;
+```
+
+Результат:
+
+```text
+Row 1:
+──────
+cluster:                 test_shard_localhost
+shard_num:               1
+shard_weight:            1
+replica_num:             1
+host_name:               localhost
+host_address:            127.0.0.1
+port:                    9000
+is_local:                1
+user:                    default
+default_database:
+errors_count:            0
+estimated_recovery_time: 0
+```
+
 ## SHOW SETTINGS {#show-settings}

 Возвращает список системных настроек и их значений. Использует данные из таблицы [system.settings](../../operations/system-tables/settings.md).
@ -426,4 +499,3 @@ SHOW CHANGED SETTINGS ILIKE '%MEMORY%'
 **См. также**

 -   Таблица [system.settings](../../operations/system-tables/settings.md)
-
--- a/docs/zh/engines/table-engines/integrations/mongodb.md
+++ b/docs/zh/engines/table-engines/integrations/mongodb.md
@ -0,0 +1,57 @@
+---
+toc_priority: 5
+toc_title: MongoDB
+---
+
+# MongoDB {#mongodb}
+
+MongoDB 引擎是只读表引擎，允许从远程 MongoDB 集合中读取数据(`SELECT`查询)。引擎只支持非嵌套的数据类型。不支持 `INSERT` 查询。
+
+## 创建一张表 {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name
+(
+    name1 [type1],
+    name2 [type2],
+    ...
+) ENGINE = MongoDB(host:port, database, collection, user, password);
+```
+
+**引擎参数**
+
+-   `host:port` — MongoDB 服务器地址.
+
+-   `database` — 数据库名称.
+
+-   `collection` —  集合名称.
+
+-   `user` — MongoDB 用户.
+
+-   `password` — 用户密码.
+
+## 用法示例 {#usage-example}
+
+ClickHouse 中的表，从 MongoDB 集合中读取数据:
+
+``` text
+CREATE TABLE mongo_table
+(
+    key UInt64, 
+    data String
+) ENGINE = MongoDB('mongo1:27017', 'test', 'simple_table', 'testuser', 'clickhouse');
+```
+
+查询:
+
+``` sql
+SELECT COUNT() FROM mongo_table;
+```
+
+``` text
+┌─count()─┐
+│       4 │
+└─────────┘
+```
+
+[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/mongodb/) <!--hide-->
--- a/docs/zh/engines/table-engines/integrations/odbc.md
+++ b/docs/zh/engines/table-engines/integrations/odbc.md
@ -1,6 +1,4 @@
 ---
-machine_translated: true
-machine_translated_rev: 72537a2d527c63c07aa5d2361a8829f3895cf2bd
 toc_priority: 35
 toc_title: ODBC
 ---
@ -11,7 +9,7 @@ toc_title: ODBC

 为了安全地实现 ODBC 连接，ClickHouse 使用了一个独立程序 `clickhouse-odbc-bridge`. 如果ODBC驱动程序是直接从 `clickhouse-server`中加载的，那么驱动问题可能会导致ClickHouse服务崩溃。 当有需要时，ClickHouse会自动启动 `clickhouse-odbc-bridge`。 ODBC桥梁程序与`clickhouse-server`来自相同的安装包.

-该引擎支持 [可为空](../../../sql-reference/data-types/nullable.md) 的数据类型。
+该引擎支持 [Nullable](../../../sql-reference/data-types/nullable.md) 数据类型。

 ## 创建表 {#creating-a-table}

@ -31,22 +29,23 @@ ENGINE = ODBC(connection_settings, external_database, external_table)

 -   列名应与源表中的列名相同，但您可以按任何顺序使用其中的一些列。
 -   列类型可能与源表中的列类型不同。 ClickHouse尝试将数值[映射](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 到ClickHouse的数据类型。
+-   设置 `external_table_functions_use_nulls` 来定义如何处理 Nullable 列. 默认值是 true, 当设置为 false 时 - 表函数将不会使用 nullable 列，而是插入默认值来代替 null. 这同样适用于数组数据类型中的 null 值.

 **引擎参数**

-   `connection_settings` — Name of the section with connection settings in the `odbc.ini` 文件
-   `external_database` — Name of a database in an external DBMS.
-   `external_table` — Name of a table in the `external_database`.
+-   `connection_settings` — 在 `odbc.ini` 配置文件中，连接配置的名称.
+-   `external_database` — 在外部 DBMS 中的数据库名.
+-   `external_table` — `external_database`中的表名.

 ## 用法示例 {#usage-example}

 **通过ODBC从本地安装的MySQL中检索数据**

-本示例针对Ubuntu Linux18.04和MySQL服务器5.7进行检查。
+本示例已经在 Ubuntu Linux 18.04 和 MySQL server 5.7 上测试通过。

-请确保安装了unixODBC和MySQL连接器。
+请确保已经安装了 unixODBC 和 MySQL Connector。

-默认情况下（如果从软件包安装），ClickHouse以用户`clickhouse`的身份启动 . 因此，您需要在MySQL服务器中创建和配置此用户。
+默认情况下（如果从软件包安装），ClickHouse以用户`clickhouse`的身份启动. 因此，您需要在MySQL服务器中创建并配置此用户。

 ``` bash
 $ sudo mysql
@ -74,7 +73,7 @@ PASSWORD = clickhouse

 ``` bash
 $ isql -v mysqlconn
-+-------------------------+
+---------------------------------------+
 | Connected!                            |
 |                                       |
 ...
@ -95,11 +94,11 @@ mysql> insert into test (`int_id`, `float`) VALUES (1,2);
 Query OK, 1 row affected (0,00 sec)

 mysql> select * from test;
-+------+----------+-----+----------+
+--------+--------------+-------+----------------+
 | int_id | int_nullable | float | float_nullable |
-+------+----------+-----+----------+
+--------+--------------+-------+----------------+
 |      1 |         NULL |     2 |           NULL |
-+------+----------+-----+----------+
+--------+--------------+-------+----------------+
 1 row in set (0,00 sec)
 ```

--- a/docs/zh/engines/table-engines/integrations/postgresql.md
+++ b/docs/zh/engines/table-engines/integrations/postgresql.md
@ -0,0 +1,145 @@
+---
+toc_priority: 11
+toc_title: PostgreSQL
+---
+
+# PostgreSQL {#postgresql}
+
+PostgreSQL 引擎允许 ClickHouse 对存储在远程 PostgreSQL 服务器上的数据执行 `SELECT` 和 `INSERT` 查询.
+
+## 创建一张表 {#creating-a-table}
+
+``` sql
+CREATE TABLE [IF NOT EXISTS] [db.]table_name [ON CLUSTER cluster]
+(
+    name1 [type1] [DEFAULT|MATERIALIZED|ALIAS expr1] [TTL expr1],
+    name2 [type2] [DEFAULT|MATERIALIZED|ALIAS expr2] [TTL expr2],
+    ...
+) ENGINE = PostgreSQL('host:port', 'database', 'table', 'user', 'password'[, `schema`]);
+```
+
+<!-- 详情请见 [CREATE TABLE](../../../sql-reference/statements/create/table.md#create-table-query) 查询. -->
+
+表结构可以与 PostgreSQL 源表结构不同:
+
+-   列名应与 PostgreSQL 源表中的列名相同，但您可以按任何顺序使用其中的一些列。
+-   列类型可能与源表中的列类型不同。 ClickHouse尝试将数值[映射](../../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) 到ClickHouse的数据类型。
+-   设置 `external_table_functions_use_nulls` 来定义如何处理 Nullable 列. 默认值是 1, 当设置为 0 时 - 表函数将不会使用 nullable 列，而是插入默认值来代替 null. 这同样适用于数组数据类型中的 null 值.
+
+**引擎参数**
+
+-   `host:port` — PostgreSQL 服务器地址.
+-   `database` — 数据库名称.
+-   `table` — 表名称.
+-   `user` — PostgreSQL 用户.
+-   `password` — 用户密码.
+-   `schema` — Non-default table schema. 可选.
+
+## 实施细节 {#implementation-details}
+
+在 PostgreSQL 上的 `SELECT` 查询以 `COPY (SELECT ...) TO STDOUT` 的方式在只读 PostgreSQL 事务中运行，每次 `SELECT` 查询后提交。
+
+简单的 `WHERE` 子句，如`=`，`！=`，`>`，`>=`，`<`，`<=`，和`IN`是在PostgreSQL 服务器上执行。
+
+所有的连接、聚合、排序、`IN [ array ]`条件和`LIMIT`采样约束都是在 PostgreSQL 的查询结束后才在ClickHouse中执行的。
+
+在 PostgreSQL 上的 `INSERT` 查询以 `COPY "table_name" (field1, field2, ... fieldN) FROM STDIN` 的方式在 PostgreSQL 事务中运行，每条 `INSERT` 语句后自动提交。
+
+PostgreSQL 的 `Array` 类型会被转换为 ClickHouse 数组。
+
+!!! info "Note"
+    要小心 - 一个在 PostgreSQL 中的数组数据，像`type_name[]`这样创建，可以在同一列的不同表行中包含不同维度的多维数组。但是在 ClickHouse 中，只允许在同一列的所有表行中包含相同维数的多维数组。
+
+支持设置 PostgreSQL 字典源中 Replicas 的优先级。地图中的数字越大，优先级就越低。最高的优先级是 `0`。
+
+在下面的例子中，副本`example01-1`有最高的优先级。
+
+```xml
+<postgresql>
+    <port>5432</port>
+    <user>clickhouse</user>
+    <password>qwerty</password>
+    <replica>
+        <host>example01-1</host>
+        <priority>1</priority>
+    </replica>
+    <replica>
+        <host>example01-2</host>
+        <priority>2</priority>
+    </replica>
+    <db>db_name</db>
+    <table>table_name</table>
+    <where>id=10</where>
+    <invalidate_query>SQL_QUERY</invalidate_query>
+</postgresql>
+</source>
+```
+
+## 用法示例 {#usage-example}
+
+PostgreSQL 中的表:
+
+``` text
+postgres=# CREATE TABLE "public"."test" (
+"int_id" SERIAL,
+"int_nullable" INT NULL DEFAULT NULL,
+"float" FLOAT NOT NULL,
+"str" VARCHAR(100) NOT NULL DEFAULT '',
+"float_nullable" FLOAT NULL DEFAULT NULL,
+PRIMARY KEY (int_id));
+
+CREATE TABLE
+
+postgres=# INSERT INTO test (int_id, str, "float") VALUES (1,'test',2);
+INSERT 0 1
+
+postgresql> SELECT * FROM test;
+  int_id | int_nullable | float | str  | float_nullable
+ --------+--------------+-------+------+----------------
+       1 |              |     2 | test |
+ (1 row)
+```
+
+ClickHouse 中的表, 从上面创建的 PostgreSQL 表中检索数据:
+
+``` sql
+CREATE TABLE default.postgresql_table
+(
+    `float_nullable` Nullable(Float32),
+    `str` String,
+    `int_id` Int32
+)
+ENGINE = PostgreSQL('localhost:5432', 'public', 'test', 'postges_user', 'postgres_password');
+```
+
+``` sql
+SELECT * FROM postgresql_table WHERE str IN ('test');
+```
+
+``` text
+┌─float_nullable─┬─str──┬─int_id─┐
+│           ᴺᵁᴸᴸ │ test │      1 │
+└────────────────┴──────┴────────┘
+```
+
+使用非默认的模式:
+
+```text
+postgres=# CREATE SCHEMA "nice.schema";
+
+postgres=# CREATE TABLE "nice.schema"."nice.table" (a integer);
+
+postgres=# INSERT INTO "nice.schema"."nice.table" SELECT i FROM generate_series(0, 99) as t(i)
+```
+
+```sql
+CREATE TABLE pg_table_schema_with_dots (a UInt32)
+        ENGINE PostgreSQL('localhost:5432', 'clickhouse', 'nice.table', 'postgrsql_user', 'password', 'nice.schema');
+```
+
+**另请参阅**
+
+<!-- -   [`postgresql` 表函数](../../../sql-reference/table-functions/postgresql.md) -->
+-   [使用 PostgreSQL 作为外部字典的来源](../../../sql-reference/dictionaries/external-dictionaries/external-dicts-dict-sources.md#dicts-external_dicts_dict_sources-postgresql)
+
+[原始文章](https://clickhouse.tech/docs/en/engines/table-engines/integrations/postgresql/) <!--hide-->
--- a/docs/zh/interfaces/formats.md
+++ b/docs/zh/interfaces/formats.md
--- a/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
+++ b/docs/zh/sql-reference/aggregate-functions/parametric-functions.md
@ -112,7 +112,7 @@ sequenceMatch(pattern)(timestamp, cond1, cond2, ...)

 -   `.*` — 匹配任何事件的数字。 不需要条件参数来匹配这个模式。

-   `(?t operator value)` — 分开两个事件的时间。 例如： `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=` 运算符。
+-   `(?t operator value)` — 分开两个事件的时间。 例如： `(?1)(?t>1800)(?2)` 匹配彼此发生超过1800秒的事件。 这些事件之间可以存在任意数量的任何事件。 您可以使用 `>=`, `>`, `<`, `<=`, `==`  运算符。

 **例**

--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@ -33,6 +33,7 @@ option (ENABLE_CLICKHOUSE_OBFUSCATOR "Table data obfuscator (convert real data t
    ${ENABLE_CLICKHOUSE_ALL})

 # https://clickhouse.tech/docs/en/operations/utilities/odbc-bridge/
+# TODO Also needs NANODBC.
 if (ENABLE_ODBC)
    option (ENABLE_CLICKHOUSE_ODBC_BRIDGE "HTTP-server working like a proxy to ODBC driver"
        ${ENABLE_CLICKHOUSE_ALL})
@ -204,55 +205,6 @@ macro(clickhouse_program_add name)
    clickhouse_program_add_executable(${name})
 endmacro()

-# Embed default config files as a resource into the binary.
-# This is needed for two purposes:
-# 1. Allow to run the binary without download of any other files.
-# 2. Allow to implement "sudo clickhouse install" tool.
-#
-# Arguments: target (server, client, keeper, etc.) and list of files
-#
-# Also dependency on TARGET_FILE is required, look at examples in programs/server and programs/keeper
-macro(clickhouse_embed_binaries)
-    # TODO We actually need this on Mac, FreeBSD.
-    if (OS_LINUX)
-
-        set(arguments_list "${ARGN}")
-        list(GET arguments_list 0 target)
-
-        # for some reason cmake iterates loop including <stop>
-        math(EXPR arguments_count "${ARGC}-1")
-
-        foreach(RESOURCE_POS RANGE 1 "${arguments_count}")
-            list(GET arguments_list "${RESOURCE_POS}" RESOURCE_FILE)
-            set(RESOURCE_OBJ ${RESOURCE_FILE}.o)
-            set(RESOURCE_OBJS ${RESOURCE_OBJS} ${RESOURCE_OBJ})
-
-            # https://stackoverflow.com/questions/14776463/compile-and-add-an-object-file-from-a-binary-with-cmake
-            # PPC64LE fails to do this with objcopy, use ld or lld instead
-            if (ARCH_PPC64LE)
-                add_custom_command(OUTPUT ${RESOURCE_OBJ}
-                    COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${CMAKE_LINKER} -m elf64lppc -r -b binary -o "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" ${RESOURCE_FILE})
-            else()
-                add_custom_command(OUTPUT ${RESOURCE_OBJ}
-                    COMMAND cd ${CMAKE_CURRENT_SOURCE_DIR} && ${OBJCOPY_PATH} -I binary ${OBJCOPY_ARCH_OPTIONS} ${RESOURCE_FILE} "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}"
-                    COMMAND ${OBJCOPY_PATH} --rename-section .data=.rodata,alloc,load,readonly,data,contents
-                        "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}" "${CMAKE_CURRENT_BINARY_DIR}/${RESOURCE_OBJ}")
-            endif()
-            set_source_files_properties(${RESOURCE_OBJ} PROPERTIES EXTERNAL_OBJECT true GENERATED true)
-        endforeach()
-
-        add_library(clickhouse_${target}_configs STATIC ${RESOURCE_OBJS})
-        set_target_properties(clickhouse_${target}_configs PROPERTIES LINKER_LANGUAGE C)
-
-        # whole-archive prevents symbols from being discarded for unknown reason
-        # CMake can shuffle each of target_link_libraries arguments with other
-        # libraries in linker command. To avoid this we hardcode whole-archive
-        # library into single string.
-        add_dependencies(clickhouse-${target}-lib clickhouse_${target}_configs)
-    endif ()
-endmacro()
-
-
 add_subdirectory (server)
 add_subdirectory (client)
 add_subdirectory (local)
--- a/programs/bash-completion/completions/clickhouse-bootstrap
+++ b/programs/bash-completion/completions/clickhouse-bootstrap
@ -20,6 +20,7 @@ CLICKHOUSE_QueryProcessingStage=(
    fetch_columns
    with_mergeable_state
    with_mergeable_state_after_aggregation
+    with_mergeable_state_after_aggregation_and_limit
 )

 CLICKHOUSE_Format=(
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -580,7 +580,7 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
            ("query",      value<std::string>()->default_value(""),             "query to execute")
            ("concurrency,c", value<unsigned>()->default_value(1),              "number of parallel queries")
            ("delay,d",       value<double>()->default_value(1),                "delay between intermediate reports in seconds (set 0 to disable reports)")
-            ("stage",         value<std::string>()->default_value("complete"),  "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
+            ("stage",         value<std::string>()->default_value("complete"),  "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
            ("iterations,i",  value<size_t>()->default_value(0),                "amount of queries to be executed")
            ("timelimit,t",   value<double>()->default_value(0.),               "stop launch of queries after specified time limit")
            ("randomize,r",   value<bool>()->default_value(false),              "randomize order of execution")
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@ -21,7 +21,7 @@
 #include <unordered_set>
 #include <algorithm>
 #include <optional>
-#include <ext/scope_guard_safe.h>
+#include <common/scope_guard_safe.h>
 #include <boost/program_options.hpp>
 #include <boost/algorithm/string/replace.hpp>
 #include <Poco/String.h>
@ -29,7 +29,6 @@
 #include <common/find_symbols.h>
 #include <common/LineReader.h>
 #include <Common/ClickHouseRevision.h>
-#include <Common/Stopwatch.h>
 #include <Common/Exception.h>
 #include <Common/ShellCommand.h>
 #include <Common/UnicodeBar.h>
@ -85,7 +84,7 @@
 #include <common/argsToConfig.h>
 #include <Common/TerminalSize.h>
 #include <Common/UTF8Helpers.h>
-#include <Common/ProgressBar.h>
+#include <Common/ProgressIndication.h>
 #include <filesystem>
 #include <Common/filesystemHelpers.h>

@ -113,6 +112,7 @@ namespace ErrorCodes
    extern const int DEADLOCK_AVOIDED;
    extern const int UNRECOGNIZED_ARGUMENTS;
    extern const int SYNTAX_ERROR;
+    extern const int TOO_DEEP_RECURSION;
 }


@ -230,13 +230,13 @@ private:
    String server_version;
    String server_display_name;

-    Stopwatch watch;
+    /// true by default - for interactive mode, might be changed when --progress option is checked for
+    /// non-interactive mode.
+    bool need_render_progress = true;

-    /// The server periodically sends information about how much data was read since last time.
-    Progress progress;
+    bool written_first_block = false;

-    /// Progress bar
-    ProgressBar progress_bar;
+    ProgressIndication progress_indication;

    /// External tables info.
    std::list<ExternalTable> external_tables;
@ -536,7 +536,7 @@ private:

        if (!is_interactive)
        {
-            progress_bar.need_render_progress = config().getBool("progress", false);
+            need_render_progress = config().getBool("progress", false);
            echo_queries = config().getBool("echo", false);
            ignore_error = config().getBool("ignore-error", false);
        }
@ -549,65 +549,6 @@ private:

        /// Initialize DateLUT here to avoid counting time spent here as query execution time.
        const auto local_tz = DateLUT::instance().getTimeZone();
-        if (!context->getSettingsRef().use_client_time_zone)
-        {
-            const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
-            if (!time_zone.empty())
-            {
-                try
-                {
-                    DateLUT::setDefaultTimezone(time_zone);
-                }
-                catch (...)
-                {
-                    std::cerr << "Warning: could not switch to server time zone: " << time_zone
-                              << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl
-                              << "Proceeding with local time zone." << std::endl
-                              << std::endl;
-                }
-            }
-            else
-            {
-                std::cerr << "Warning: could not determine server time zone. "
-                          << "Proceeding with local time zone." << std::endl
-                          << std::endl;
-            }
-        }
-
-        Strings keys;
-
-        prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
-
-        config().keys("prompt_by_server_display_name", keys);
-
-        for (const String & key : keys)
-        {
-            if (key != "default" && server_display_name.find(key) != std::string::npos)
-            {
-                prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name." + key);
-                break;
-            }
-        }
-
-        /// Prompt may contain escape sequences including \e[ or \x1b[ sequences to set terminal color.
-        {
-            String unescaped_prompt_by_server_display_name;
-            ReadBufferFromString in(prompt_by_server_display_name);
-            readEscapedString(unescaped_prompt_by_server_display_name, in);
-            prompt_by_server_display_name = std::move(unescaped_prompt_by_server_display_name);
-        }
-
-        /// Prompt may contain the following substitutions in a form of {name}.
-        std::map<String, String> prompt_substitutions{
-            {"host", connection_parameters.host},
-            {"port", toString(connection_parameters.port)},
-            {"user", connection_parameters.user},
-            {"display_name", server_display_name},
-        };
-
-        /// Quite suboptimal.
-        for (const auto & [key, value] : prompt_substitutions)
-            boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);

        if (is_interactive)
        {
@ -805,6 +746,66 @@ private:
                          << std::endl;
            }
        }
+
+        if (!context->getSettingsRef().use_client_time_zone)
+        {
+            const auto & time_zone = connection->getServerTimezone(connection_parameters.timeouts);
+            if (!time_zone.empty())
+            {
+                try
+                {
+                    DateLUT::setDefaultTimezone(time_zone);
+                }
+                catch (...)
+                {
+                    std::cerr << "Warning: could not switch to server time zone: " << time_zone
+                              << ", reason: " << getCurrentExceptionMessage(/* with_stacktrace = */ false) << std::endl
+                              << "Proceeding with local time zone." << std::endl
+                              << std::endl;
+                }
+            }
+            else
+            {
+                std::cerr << "Warning: could not determine server time zone. "
+                          << "Proceeding with local time zone." << std::endl
+                          << std::endl;
+            }
+        }
+
+        Strings keys;
+
+        prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name.default", "{display_name} :) ");
+
+        config().keys("prompt_by_server_display_name", keys);
+
+        for (const String & key : keys)
+        {
+            if (key != "default" && server_display_name.find(key) != std::string::npos)
+            {
+                prompt_by_server_display_name = config().getRawString("prompt_by_server_display_name." + key);
+                break;
+            }
+        }
+
+        /// Prompt may contain escape sequences including \e[ or \x1b[ sequences to set terminal color.
+        {
+            String unescaped_prompt_by_server_display_name;
+            ReadBufferFromString in(prompt_by_server_display_name);
+            readEscapedString(unescaped_prompt_by_server_display_name, in);
+            prompt_by_server_display_name = std::move(unescaped_prompt_by_server_display_name);
+        }
+
+        /// Prompt may contain the following substitutions in a form of {name}.
+        std::map<String, String> prompt_substitutions{
+            {"host", connection_parameters.host},
+            {"port", toString(connection_parameters.port)},
+            {"user", connection_parameters.user},
+            {"display_name", server_display_name},
+        };
+
+        /// Quite suboptimal.
+        for (const auto & [key, value] : prompt_substitutions)
+            boost::replace_all(prompt_by_server_display_name, "{" + key + "}", value);
    }


@ -1202,7 +1203,9 @@ private:
                client_exception.reset();
                server_exception.reset();
                have_error = false;
-                connection->forceConnected(connection_parameters.timeouts);
+
+                if (!connection->checkConnected())
+                    connect();
            }

            // Report error.
@ -1265,7 +1268,8 @@ private:
        }
        catch (const Exception & e)
        {
-            if (e.code() != ErrorCodes::SYNTAX_ERROR)
+            if (e.code() != ErrorCodes::SYNTAX_ERROR &&
+                e.code() != ErrorCodes::TOO_DEEP_RECURSION)
                throw;
        }

@ -1447,11 +1451,10 @@ private:
                }
                catch (Exception & e)
                {
-                    if (e.code() != ErrorCodes::SYNTAX_ERROR)
-                    {
+                    if (e.code() != ErrorCodes::SYNTAX_ERROR &&
+                        e.code() != ErrorCodes::TOO_DEEP_RECURSION)
                        throw;
                }
-                }

                if (ast_2)
                {
@ -1575,12 +1578,9 @@ private:
            }
        }

-        watch.restart();
        processed_rows = 0;
-        progress.reset();
-        progress_bar.show_progress_bar = false;
-        progress_bar.written_progress_chars = 0;
-        progress_bar.written_first_block = false;
+        written_first_block = false;
+        progress_indication.resetProgress();

        {
            /// Temporarily apply query settings to context.
@ -1603,7 +1603,8 @@ private:
            if (with_output && with_output->settings_ast)
                apply_query_settings(*with_output->settings_ast);

-            connection->forceConnected(connection_parameters.timeouts);
+            if (!connection->checkConnected())
+                connect();

            ASTPtr input_function;
            if (insert && insert->select)
@ -1647,16 +1648,15 @@ private:

        if (is_interactive)
        {
-            std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << watch.elapsedSeconds() << " sec. ";
-
-            if (progress.read_rows >= 1000)
+            std::cout << std::endl << processed_rows << " rows in set. Elapsed: " << progress_indication.elapsedSeconds() << " sec. ";
+            /// Write final progress if it makes sense to do so.
            writeFinalProgress();

            std::cout << std::endl << std::endl;
        }
        else if (print_time_to_stderr)
        {
-            std::cerr << watch.elapsedSeconds() << "\n";
+            std::cerr << progress_indication.elapsedSeconds() << "\n";
        }
    }

@ -1831,6 +1831,19 @@ private:
            /// Send data read from stdin.
            try
            {
+                if (need_render_progress)
+                {
+                    /// Set total_bytes_to_read for current fd.
+                    FileProgress file_progress(0, std_in.size());
+                    progress_indication.updateProgress(Progress(file_progress));
+
+                    /// Set callback to be called on file progress.
+                    progress_indication.setFileProgressCallback(context, true);
+
+                    /// Add callback to track reading from fd.
+                    std_in.setProgressCallback(context);
+                }
+
                sendDataFrom(std_in, sample, columns_description);
            }
            catch (Exception & e)
@ -1953,7 +1966,7 @@ private:
                        cancelled = true;
                        if (is_interactive)
                        {
-                            progress_bar.clearProgress();
+                            progress_indication.clearProgressOutput();
                            std::cout << "Cancelling query." << std::endl;
                        }

@ -2180,7 +2193,7 @@ private:
                current_format = "Vertical";

            /// It is not clear how to write progress with parallel formatting. It may increase code complexity significantly.
-            if (!progress_bar.need_render_progress)
+            if (!need_render_progress)
                block_out_stream = context->getOutputStreamParallelIfPossible(current_format, *out_buf, block);
            else
                block_out_stream = context->getOutputStream(current_format, *out_buf, block);
@ -2239,25 +2252,25 @@ private:
        if (block.rows() == 0 || (query_fuzzer_runs != 0 && processed_rows >= 100))
            return;

-        if (progress_bar.need_render_progress)
-            progress_bar.clearProgress();
+        if (need_render_progress)
+            progress_indication.clearProgressOutput();

        block_out_stream->write(block);
-        progress_bar.written_first_block = true;
+        written_first_block = true;

        /// Received data block is immediately displayed to the user.
        block_out_stream->flush();

        /// Restore progress bar after data block.
-        if (progress_bar.need_render_progress)
-            progress_bar.writeProgress(progress, watch.elapsed());
+        if (need_render_progress)
+            progress_indication.writeProgress();
    }


    void onLogData(Block & block)
    {
        initLogsOutputStream();
-        progress_bar.clearProgress();
+        progress_indication.clearProgressOutput();
        logs_out_stream->write(block);
        logs_out_stream->flush();
    }
@ -2278,28 +2291,23 @@ private:

    void onProgress(const Progress & value)
    {
-        if (!progress_bar.updateProgress(progress, value))
+        if (!progress_indication.updateProgress(value))
        {
            // Just a keep-alive update.
            return;
        }
+
        if (block_out_stream)
            block_out_stream->onProgress(value);
-        progress_bar.writeProgress(progress, watch.elapsed());
+
+        if (need_render_progress)
+            progress_indication.writeProgress();
    }


    void writeFinalProgress()
    {
-        std::cout << "Processed " << formatReadableQuantity(progress.read_rows) << " rows, "
-                  << formatReadableSizeWithDecimalSuffix(progress.read_bytes);
-
-        size_t elapsed_ns = watch.elapsed();
-        if (elapsed_ns)
-            std::cout << " (" << formatReadableQuantity(progress.read_rows * 1000000000.0 / elapsed_ns) << " rows/s., "
-                      << formatReadableSizeWithDecimalSuffix(progress.read_bytes * 1000000000.0 / elapsed_ns) << "/s.)";
-        else
-            std::cout << ". ";
+        progress_indication.writeFinalProgress();
    }


@ -2320,7 +2328,7 @@ private:

    void onEndOfStream()
    {
-        progress_bar.clearProgress();
+        progress_indication.clearProgressOutput();

        if (block_out_stream)
            block_out_stream->writeSuffix();
@ -2330,9 +2338,9 @@ private:

        resetOutput();

-        if (is_interactive && !progress_bar.written_first_block)
+        if (is_interactive && !written_first_block)
        {
-            progress_bar.clearProgress();
+            progress_indication.clearProgressOutput();
            std::cout << "Ok." << std::endl;
        }
    }
@ -2464,7 +2472,7 @@ public:
            ("password", po::value<std::string>()->implicit_value("\n", ""), "password")
            ("ask-password", "ask-password")
            ("quota_key", po::value<std::string>(), "A string to differentiate quotas when the user have keyed quotas configured on server")
-            ("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
+            ("stage", po::value<std::string>()->default_value("complete"), "Request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation,with_mergeable_state_after_aggregation_and_limit")
            ("query_id", po::value<std::string>(), "query_id")
            ("query,q", po::value<std::string>(), "query")
            ("database,d", po::value<std::string>(), "database")
--- a/programs/client/ConnectionParameters.cpp
+++ b/programs/client/ConnectionParameters.cpp
@ -10,7 +10,7 @@
 #include <Common/isLocalAddress.h>
 #include <Common/DNSResolver.h>
 #include <common/setTerminalEcho.h>
-#include <ext/scope_guard.h>
+#include <common/scope_guard.h>

 #if !defined(ARCADIA_BUILD)
 #include <readpassphrase.h> // Y_IGNORE
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@ -463,9 +463,7 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
    }
    else if (auto * table_expr = typeid_cast<ASTTableExpression *>(ast.get()))
    {
-        fuzz(table_expr->database_and_table_name);
-        fuzz(table_expr->subquery);
-        fuzz(table_expr->table_function);
+        fuzz(table_expr->children);
    }
    else if (auto * expr_list = typeid_cast<ASTExpressionList *>(ast.get()))
    {
--- a/programs/copier/ClusterCopier.cpp
+++ b/programs/copier/ClusterCopier.cpp
@ -1746,7 +1746,7 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
    task_shard.table_read_shard = DatabaseAndTableName(working_database_name, read_shard_prefix + task_table.table_id);
    task_shard.main_table_split_shard = DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id);

-    for (const auto & piece_number : ext::range(0, task_table.number_of_splits))
+    for (const auto & piece_number : collections::range(0, task_table.number_of_splits))
    {
        task_shard.list_of_split_tables_on_shard[piece_number] =
                DatabaseAndTableName(working_database_name, split_shard_prefix + task_table.table_id + "_piece_" + toString(piece_number));
@ -1776,7 +1776,7 @@ void ClusterCopier::createShardInternalTables(const ConnectionTimeouts & timeout
        dropAndCreateLocalTable(create_table_split_piece_ast);

        /// Create auxiliary split tables for each piece
-        for (const auto & piece_number : ext::range(0, task_table.number_of_splits))
+        for (const auto & piece_number : collections::range(0, task_table.number_of_splits))
        {
            const auto & storage_piece_split_ast = task_table.auxiliary_engine_split_asts[piece_number];

--- a/programs/copier/ClusterCopierApp.cpp
+++ b/programs/copier/ClusterCopierApp.cpp
@ -3,7 +3,7 @@
 #include <Common/TerminalSize.h>
 #include <IO/ConnectionTimeoutsContext.h>
 #include <Formats/registerFormats.h>
-#include <ext/scope_guard_safe.h>
+#include <common/scope_guard_safe.h>
 #include <unistd.h>
 #include <filesystem>

--- a/programs/copier/TaskTableAndShard.h
+++ b/programs/copier/TaskTableAndShard.h
@ -6,7 +6,7 @@

 #include <Core/Defines.h>

-#include <ext/map.h>
+#include <common/map.h>
 #include <boost/algorithm/string/join.hpp>


@ -286,7 +286,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
               + "." + escapeForFileName(table_push.first)
               + "." + escapeForFileName(table_push.second);

-    engine_push_str = config.getString(table_prefix + "engine");
+    engine_push_str = config.getString(table_prefix + "engine", "rand()");

    {
        ParserStorage parser_storage;
@ -305,7 +305,7 @@ inline TaskTable::TaskTable(TaskCluster & parent, const Poco::Util::AbstractConf
        main_engine_split_ast = createASTStorageDistributed(cluster_push_name, table_push.first, table_push.second,
                                                            sharding_key_ast);

-        for (const auto piece_number : ext::range(0, number_of_splits))
+        for (const auto piece_number : collections::range(0, number_of_splits))
        {
            auxiliary_engine_split_asts.emplace_back
                    (
--- a/programs/embed_binary.S.in
+++ b/programs/embed_binary.S.in
@ -0,0 +1,17 @@
+// Embed a binary file into an executable.
+
+// The variable BINARY_FILE_NAME is the actual name of the file to include
+// The variable SYMBOL_NAME is the "normalized" name of the symbol, with
+// symbols like `-`, `.`, and `/` replaced with `_`. This is to match how
+// objcopy rewrites symbol names, and matches the expectation in
+// `base/common/getResource.cpp`
+
+    .data
+    .global _binary_@SYMBOL_NAME@_start
+_binary_@SYMBOL_NAME@_start:
+    .incbin "@BINARY_FILE_NAME@"
+    .global _binary_@SYMBOL_NAME@_end
+_binary_@SYMBOL_NAME@_end:
+    .global _binary_@SYMBOL_NAME@_size
+_binary_@SYMBOL_NAME@_size:
+    .quad _binary_@SYMBOL_NAME@_end - _binary_@SYMBOL_NAME@_start
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@ -75,6 +75,9 @@ namespace ErrorCodes
 #define HILITE "\033[1m"
 #define END_HILITE "\033[0m"

+static constexpr auto CLICKHOUSE_BRIDGE_USER = "clickhouse-bridge";
+static constexpr auto CLICKHOUSE_BRIDGE_GROUP = "clickhouse-bridge";
+
 using namespace DB;
 namespace po = boost::program_options;
 namespace fs = std::filesystem;
@ -150,7 +153,6 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            << argv[0]
            << " install [options]\n";
        std::cout << desc << '\n';
-        return 1;
    }

    try
@ -324,26 +326,34 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
        std::string user = options["user"].as<std::string>();
        std::string group = options["group"].as<std::string>();

-        if (!group.empty())
+        auto create_group = [](const String & group_name)
        {
-            {
-                fmt::print("Creating clickhouse group if it does not exist.\n");
-                std::string command = fmt::format("groupadd -r {}", group);
+            std::string command = fmt::format("groupadd -r {}", group_name);
            fmt::print(" {}\n", command);
            executeScript(command);
-            }
+        };
+
+        if (!group.empty())
+        {
+            fmt::print("Creating clickhouse group if it does not exist.\n");
+            create_group(group);
        }
        else
            fmt::print("Will not create clickhouse group");

+        auto create_user = [](const String & user_name, const String & group_name)
+        {
+            std::string command = group_name.empty()
+                ? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user_name)
+                : fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group_name, user_name);
+            fmt::print(" {}\n", command);
+            executeScript(command);
+        };
+
        if (!user.empty())
        {
            fmt::print("Creating clickhouse user if it does not exist.\n");
-            std::string command = group.empty()
-                ? fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent --user-group {}", user)
-                : fmt::format("useradd -r --shell /bin/false --home-dir /nonexistent -g {} {}", group, user);
-            fmt::print(" {}\n", command);
-            executeScript(command);
+            create_user(user, group);

            if (group.empty())
                group = user;
@ -475,12 +485,15 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
            }
        }

-        /// Chmod and chown configs
+        auto change_ownership = [](const String & file_name, const String & user_name, const String & group_name)
        {
-            std::string command = fmt::format("chown --recursive {}:{} '{}'", user, group, config_dir.string());
+            std::string command = fmt::format("chown --recursive {}:{} '{}'", user_name, group_name, file_name);
            fmt::print(" {}\n", command);
            executeScript(command);
-        }
+        };
+
+        /// Chmod and chown configs
+        change_ownership(config_dir.string(), user, group);

        /// Symlink "preprocessed_configs" is created by the server, so "write" is needed.
        fs::permissions(config_dir, fs::perms::owner_all, fs::perm_options::replace);
@ -558,7 +571,19 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
        /// Data directory is not accessible to anyone except clickhouse.
        fs::permissions(data_path, fs::perms::owner_all, fs::perm_options::replace);

-        /// Set up password for default user.
+        fs::path odbc_bridge_path = bin_dir / "clickhouse-odbc-bridge";
+        fs::path library_bridge_path = bin_dir / "clickhouse-library-bridge";
+
+        if (fs::exists(odbc_bridge_path) || fs::exists(library_bridge_path))
+        {
+            create_group(CLICKHOUSE_BRIDGE_GROUP);
+            create_user(CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP);
+
+            if (fs::exists(odbc_bridge_path))
+                change_ownership(odbc_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP);
+            if (fs::exists(library_bridge_path))
+                change_ownership(library_bridge_path, CLICKHOUSE_BRIDGE_USER, CLICKHOUSE_BRIDGE_GROUP);
+        }

        bool stdin_is_a_tty = isatty(STDIN_FILENO);
        bool stdout_is_a_tty = isatty(STDOUT_FILENO);
@ -573,6 +598,7 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
        /// We can ask password even if stdin is closed/redirected but /dev/tty is available.
        bool can_ask_password = !noninteractive && stdout_is_a_tty;

+        /// Set up password for default user.
        if (has_password_for_default_user)
        {
            fmt::print(HILITE "Password for default user is already specified. To remind or reset, see {} and {}." END_HILITE "\n",
@ -818,6 +844,8 @@ namespace
        UInt64 pid = 0;

        if (fs::exists(pid_file))
+        {
+            try
            {
                ReadBufferFromFile in(pid_file.string());
                if (tryReadIntText(pid, in))
@ -830,6 +858,14 @@ namespace
                    fs::remove(pid_file);
                }
            }
+            catch (const Exception & e)
+            {
+                if (e.code() != ErrorCodes::FILE_DOESNT_EXIST)
+                    throw;
+
+                /// If file does not exist (TOCTOU) - it's ok.
+            }
+        }

        if (!pid)
        {
--- a/programs/keeper/CMakeLists.txt
+++ b/programs/keeper/CMakeLists.txt
@ -1,3 +1,5 @@
+include(${ClickHouse_SOURCE_DIR}/cmake/embed_binary.cmake)
+
 set(CLICKHOUSE_KEEPER_SOURCES
    Keeper.cpp
 )
@ -21,4 +23,8 @@ clickhouse_program_add(keeper)

 install (FILES keeper_config.xml DESTINATION "${CLICKHOUSE_ETC_DIR}/clickhouse-keeper" COMPONENT clickhouse-keeper)

-clickhouse_embed_binaries(keeper keeper_config.xml keeper_embedded.xml)
+clickhouse_embed_binaries(
+    TARGET clickhouse_keeper_configs
+    RESOURCES keeper_config.xml keeper_embedded.xml
+)
+add_dependencies(clickhouse-keeper-lib clickhouse_keeper_configs)
--- a/programs/keeper/Keeper.cpp
+++ b/programs/keeper/Keeper.cpp
@ -12,7 +12,7 @@
 #include <common/defines.h>
 #include <common/logger_useful.h>
 #include <common/ErrorHandlers.h>
-#include <ext/scope_guard.h>
+#include <common/scope_guard.h>
 #include <Poco/Util/HelpFormatter.h>
 #include <Poco/Version.h>
 #include <Poco/Environment.h>
--- a/Show More
+++ b/Show More