Merge branch 'master' into kssenii-rabbitmq-improvements

2024-11-22 07:31:57 +00:00 · 2020-09-07 11:36:08 +03:00 · 2020-09-07 11:36:08 +03:00 · 4ce975c512
commit 4ce975c512
parent 4b7c303eaf 9703494f32
255 changed files with 6679 additions and 1174 deletions
--- a/README.md
+++ b/README.md
@ -17,5 +17,4 @@ ClickHouse is an open-source column-oriented database management system that all

 ## Upcoming Events		

-* [ClickHouse at ByteDance (in Chinese)](https://mp.weixin.qq.com/s/Em-HjPylO8D7WPui4RREAQ) on August 28, 2020.
 * [ClickHouse Data Integration Virtual Meetup](https://www.eventbrite.com/e/clickhouse-september-virtual-meetup-data-integration-tickets-117421895049) on September 10, 2020.
--- a/base/common/arithmeticOverflow.h
+++ b/base/common/arithmeticOverflow.h
@ -38,18 +38,18 @@ namespace common
    }

    template <>
-    inline bool addOverflow(bInt256 x, bInt256 y, bInt256 & res)
+    inline bool addOverflow(wInt256 x, wInt256 y, wInt256 & res)
    {
        res = x + y;
-        return (y > 0 && x > std::numeric_limits<bInt256>::max() - y) ||
-            (y < 0 && x < std::numeric_limits<bInt256>::min() - y);
+        return (y > 0 && x > std::numeric_limits<wInt256>::max() - y) ||
+            (y < 0 && x < std::numeric_limits<wInt256>::min() - y);
    }

    template <>
-    inline bool addOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
+    inline bool addOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
    {
        res = x + y;
-        return x > std::numeric_limits<bUInt256>::max() - y;
+        return x > std::numeric_limits<wUInt256>::max() - y;
    }

    template <typename T>
@ -86,15 +86,15 @@ namespace common
    }

    template <>
-    inline bool subOverflow(bInt256 x, bInt256 y, bInt256 & res)
+    inline bool subOverflow(wInt256 x, wInt256 y, wInt256 & res)
    {
        res = x - y;
-        return (y < 0 && x > std::numeric_limits<bInt256>::max() + y) ||
-            (y > 0 && x < std::numeric_limits<bInt256>::min() + y);
+        return (y < 0 && x > std::numeric_limits<wInt256>::max() + y) ||
+            (y > 0 && x < std::numeric_limits<wInt256>::min() + y);
    }

    template <>
-    inline bool subOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
+    inline bool subOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
    {
        res = x - y;
        return x < y;
@ -137,19 +137,19 @@ namespace common
    }

    template <>
-    inline bool mulOverflow(bInt256 x, bInt256 y, bInt256 & res)
+    inline bool mulOverflow(wInt256 x, wInt256 y, wInt256 & res)
    {
        res = x * y;
        if (!x || !y)
            return false;

-        bInt256 a = (x > 0) ? x : -x;
-        bInt256 b = (y > 0) ? y : -y;
+        wInt256 a = (x > 0) ? x : -x;
+        wInt256 b = (y > 0) ? y : -y;
        return (a * b) / b != a;
    }

    template <>
-    inline bool mulOverflow(bUInt256 x, bUInt256 y, bUInt256 & res)
+    inline bool mulOverflow(wUInt256 x, wUInt256 y, wUInt256 & res)
    {
        res = x * y;
        if (!x || !y)
--- a/base/common/types.h
+++ b/base/common/types.h
@ -6,7 +6,7 @@
 #include <string>
 #include <type_traits>

-#include <boost/multiprecision/cpp_int.hpp>
+#include <common/wide_integer.h>

 using Int8 = int8_t;
 using Int16 = int16_t;
@ -25,12 +25,11 @@ using UInt64 = uint64_t;

 using Int128 = __int128;

-/// We have to use 127 and 255 bit integers to safe a bit for a sign serialization
-//using bInt256 = boost::multiprecision::int256_t;
-using bInt256 = boost::multiprecision::number<boost::multiprecision::cpp_int_backend<
-    255, 255, boost::multiprecision::signed_magnitude, boost::multiprecision::unchecked, void> >;
-using bUInt256 = boost::multiprecision::uint256_t;
+using wInt256 = std::wide_integer<256, signed>;
+using wUInt256 = std::wide_integer<256, unsigned>;

+static_assert(sizeof(wInt256) == 32);
+static_assert(sizeof(wUInt256) == 32);

 using String = std::string;

@ -44,7 +43,7 @@ struct is_signed
 };

 template <> struct is_signed<Int128> { static constexpr bool value = true; };
-template <> struct is_signed<bInt256> { static constexpr bool value = true; };
+template <> struct is_signed<wInt256> { static constexpr bool value = true; };

 template <typename T>
 inline constexpr bool is_signed_v = is_signed<T>::value;
@ -55,7 +54,7 @@ struct is_unsigned
    static constexpr bool value = std::is_unsigned_v<T>;
 };

-template <> struct is_unsigned<bUInt256> { static constexpr bool value = true; };
+template <> struct is_unsigned<wUInt256> { static constexpr bool value = true; };

 template <typename T>
 inline constexpr bool is_unsigned_v = is_unsigned<T>::value;
@ -69,8 +68,8 @@ struct is_integer
 };

 template <> struct is_integer<Int128> { static constexpr bool value = true; };
-template <> struct is_integer<bInt256> { static constexpr bool value = true; };
-template <> struct is_integer<bUInt256> { static constexpr bool value = true; };
+template <> struct is_integer<wInt256> { static constexpr bool value = true; };
+template <> struct is_integer<wUInt256> { static constexpr bool value = true; };

 template <typename T>
 inline constexpr bool is_integer_v = is_integer<T>::value;
@ -93,9 +92,9 @@ struct make_unsigned
    typedef std::make_unsigned_t<T> type;
 };

-template <> struct make_unsigned<__int128> { using type = unsigned __int128; };
-template <> struct make_unsigned<bInt256>  { using type = bUInt256; };
-template <> struct make_unsigned<bUInt256> { using type = bUInt256; };
+template <> struct make_unsigned<Int128> { using type = unsigned __int128; };
+template <> struct make_unsigned<wInt256>  { using type = wUInt256; };
+template <> struct make_unsigned<wUInt256> { using type = wUInt256; };

 template <typename T> using make_unsigned_t = typename make_unsigned<T>::type;

@ -105,8 +104,8 @@ struct make_signed
    typedef std::make_signed_t<T> type;
 };

-template <> struct make_signed<bInt256>  { typedef bInt256 type; };
-template <> struct make_signed<bUInt256> { typedef bInt256 type; };
+template <> struct make_signed<wInt256>  { using type = wInt256; };
+template <> struct make_signed<wUInt256> { using type = wInt256; };

 template <typename T> using make_signed_t = typename make_signed<T>::type;

@ -116,8 +115,8 @@ struct is_big_int
    static constexpr bool value = false;
 };

-template <> struct is_big_int<bUInt256> { static constexpr bool value = true; };
-template <> struct is_big_int<bInt256> { static constexpr bool value = true; };
+template <> struct is_big_int<wInt256> { static constexpr bool value = true; };
+template <> struct is_big_int<wUInt256> { static constexpr bool value = true; };

 template <typename T>
 inline constexpr bool is_big_int_v = is_big_int<T>::value;
@ -125,14 +124,11 @@ inline constexpr bool is_big_int_v = is_big_int<T>::value;
 template <typename T>
 inline std::string bigintToString(const T & x)
 {
-    return x.str();
+    return to_string(x);
 }

 template <typename To, typename From>
 inline To bigint_cast(const From & x [[maybe_unused]])
 {
-    if constexpr ((is_big_int_v<From> && std::is_same_v<To, UInt8>) || (is_big_int_v<To> && std::is_same_v<From, UInt8>))
-        return static_cast<uint8_t>(x);
-    else
-        return static_cast<To>(x);
+    return static_cast<To>(x);
 }
--- a/base/common/wide_integer.h
+++ b/base/common/wide_integer.h
@ -0,0 +1,249 @@
+#pragma once
+
+///////////////////////////////////////////////////////////////
+//  Distributed under the Boost Software License, Version 1.0.
+//  (See at http://www.boost.org/LICENSE_1_0.txt)
+///////////////////////////////////////////////////////////////
+
+/*  Divide and multiply
+ *
+ *
+ * Copyright (c) 2008
+ * Evan Teran
+ *
+ * Permission to use, copy, modify, and distribute this software and its
+ * documentation for any purpose and without fee is hereby granted, provided
+ * that the above copyright notice appears in all copies and that both the
+ * copyright notice and this permission notice appear in supporting
+ * documentation, and that the same name not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. We make no representations about the
+ * suitability this software for any purpose. It is provided "as is"
+ * without express or implied warranty.
+ */
+
+#include <climits> // CHAR_BIT
+#include <cmath>
+#include <cstdint>
+#include <limits>
+#include <type_traits>
+
+namespace std
+{
+template <size_t Bits, typename Signed>
+class wide_integer;
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+struct common_type<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>>;
+
+template <size_t Bits, typename Signed, typename Arithmetic>
+struct common_type<wide_integer<Bits, Signed>, Arithmetic>;
+
+template <typename Arithmetic, size_t Bits, typename Signed>
+struct common_type<Arithmetic, wide_integer<Bits, Signed>>;
+
+template <size_t Bits, typename Signed>
+class wide_integer
+{
+public:
+    using base_type = uint8_t;
+    using signed_base_type = int8_t;
+
+    // ctors
+    wide_integer() = default;
+
+    template <typename T>
+    constexpr wide_integer(T rhs) noexcept;
+    template <typename T>
+    constexpr wide_integer(std::initializer_list<T> il) noexcept;
+
+    // assignment
+    template <size_t Bits2, typename Signed2>
+    constexpr wide_integer<Bits, Signed> & operator=(const wide_integer<Bits2, Signed2> & rhs) noexcept;
+
+    template <typename Arithmetic>
+    constexpr wide_integer<Bits, Signed> & operator=(Arithmetic rhs) noexcept;
+
+    template <typename Arithmetic>
+    constexpr wide_integer<Bits, Signed> & operator*=(const Arithmetic & rhs);
+
+    template <typename Arithmetic>
+    constexpr wide_integer<Bits, Signed> & operator/=(const Arithmetic & rhs);
+
+    template <typename Arithmetic>
+    constexpr wide_integer<Bits, Signed> & operator+=(const Arithmetic & rhs) noexcept(is_same<Signed, unsigned>::value);
+
+    template <typename Arithmetic>
+    constexpr wide_integer<Bits, Signed> & operator-=(const Arithmetic & rhs) noexcept(is_same<Signed, unsigned>::value);
+
+    template <typename Integral>
+    constexpr wide_integer<Bits, Signed> & operator%=(const Integral & rhs);
+
+    template <typename Integral>
+    constexpr wide_integer<Bits, Signed> & operator&=(const Integral & rhs) noexcept;
+
+    template <typename Integral>
+    constexpr wide_integer<Bits, Signed> & operator|=(const Integral & rhs) noexcept;
+
+    template <typename Integral>
+    constexpr wide_integer<Bits, Signed> & operator^=(const Integral & rhs) noexcept;
+
+    constexpr wide_integer<Bits, Signed> & operator<<=(int n);
+    constexpr wide_integer<Bits, Signed> & operator>>=(int n) noexcept;
+
+    constexpr wide_integer<Bits, Signed> & operator++() noexcept(is_same<Signed, unsigned>::value);
+    constexpr wide_integer<Bits, Signed> operator++(int) noexcept(is_same<Signed, unsigned>::value);
+    constexpr wide_integer<Bits, Signed> & operator--() noexcept(is_same<Signed, unsigned>::value);
+    constexpr wide_integer<Bits, Signed> operator--(int) noexcept(is_same<Signed, unsigned>::value);
+
+    // observers
+
+    constexpr explicit operator bool() const noexcept;
+
+    template <class T>
+    using __integral_not_wide_integer_class = typename std::enable_if<std::is_arithmetic<T>::value, T>::type;
+
+    template <class T, class = __integral_not_wide_integer_class<T>>
+    constexpr operator T() const noexcept;
+
+    constexpr operator long double() const noexcept;
+    constexpr operator double() const noexcept;
+    constexpr operator float() const noexcept;
+
+    struct _impl;
+
+private:
+    template <size_t Bits2, typename Signed2>
+    friend class wide_integer;
+
+    friend class numeric_limits<wide_integer<Bits, signed>>;
+    friend class numeric_limits<wide_integer<Bits, unsigned>>;
+
+    base_type m_arr[_impl::arr_size];
+};
+
+template <typename T>
+static constexpr bool ArithmeticConcept() noexcept;
+template <class T1, class T2>
+using __only_arithmetic = typename std::enable_if<ArithmeticConcept<T1>() && ArithmeticConcept<T2>()>::type;
+
+template <typename T>
+static constexpr bool IntegralConcept() noexcept;
+template <class T, class T2>
+using __only_integer = typename std::enable_if<IntegralConcept<T>() && IntegralConcept<T2>()>::type;
+
+// Unary operators
+template <size_t Bits, typename Signed>
+constexpr wide_integer<Bits, Signed> operator~(const wide_integer<Bits, Signed> & lhs) noexcept;
+
+template <size_t Bits, typename Signed>
+constexpr wide_integer<Bits, Signed> operator-(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value);
+
+template <size_t Bits, typename Signed>
+constexpr wide_integer<Bits, Signed> operator+(const wide_integer<Bits, Signed> & lhs) noexcept(is_same<Signed, unsigned>::value);
+
+// Binary operators
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator*(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator*(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator/(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator/(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator+(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator+(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator-(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+std::common_type_t<Arithmetic, Arithmetic2> constexpr operator-(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator%(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator%(const Integral & rhs, const Integral2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator&(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator&(const Integral & rhs, const Integral2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator|(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator|(const Integral & rhs, const Integral2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+std::common_type_t<wide_integer<Bits, Signed>, wide_integer<Bits2, Signed2>> constexpr
+operator^(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Integral, typename Integral2, class = __only_integer<Integral, Integral2>>
+std::common_type_t<Integral, Integral2> constexpr operator^(const Integral & rhs, const Integral2 & lhs);
+
+// TODO: Integral
+template <size_t Bits, typename Signed>
+constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, int n) noexcept;
+template <size_t Bits, typename Signed>
+constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, int n) noexcept;
+
+template <size_t Bits, typename Signed, typename Int, typename = std::enable_if_t<!std::is_same_v<Int, int>>>
+constexpr wide_integer<Bits, Signed> operator<<(const wide_integer<Bits, Signed> & lhs, Int n) noexcept
+{
+    return lhs << int(n);
+}
+template <size_t Bits, typename Signed, typename Int, typename = std::enable_if_t<!std::is_same_v<Int, int>>>
+constexpr wide_integer<Bits, Signed> operator>>(const wide_integer<Bits, Signed> & lhs, Int n) noexcept
+{
+    return lhs >> int(n);
+}
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator<(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator<(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator>(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator>(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator<=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator<=(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator>=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator>=(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator==(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator==(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed, size_t Bits2, typename Signed2>
+constexpr bool operator!=(const wide_integer<Bits, Signed> & lhs, const wide_integer<Bits2, Signed2> & rhs);
+template <typename Arithmetic, typename Arithmetic2, class = __only_arithmetic<Arithmetic, Arithmetic2>>
+constexpr bool operator!=(const Arithmetic & rhs, const Arithmetic2 & lhs);
+
+template <size_t Bits, typename Signed>
+std::string to_string(const wide_integer<Bits, Signed> & n);
+
+template <size_t Bits, typename Signed>
+struct hash<wide_integer<Bits, Signed>>;
+
+}
+
+#include "wide_integer_impl.h"
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
--- a/contrib/capnproto-cmake/CMakeLists.txt
+++ b/contrib/capnproto-cmake/CMakeLists.txt
@ -74,10 +74,9 @@ target_link_libraries(capnpc PUBLIC capnp)

 # The library has substandard code
 if (COMPILER_GCC)
-    set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-maybe-uninitialized
-        -Wno-deprecated-declarations -Wno-class-memaccess)
+    set (SUPPRESS_WARNINGS -w)
 elseif (COMPILER_CLANG)
-    set (SUPPRESS_WARNINGS -Wno-non-virtual-dtor -Wno-sign-compare -Wno-strict-aliasing -Wno-deprecated-declarations)
+    set (SUPPRESS_WARNINGS -w)
    set (CAPNP_PRIVATE_CXX_FLAGS -fno-char8_t)
 endif ()

--- a/debian/clickhouse-server.init
+++ b/debian/clickhouse-server.init
@ -67,13 +67,6 @@ if uname -mpi | grep -q 'x86_64'; then
 fi


-SUPPORTED_COMMANDS="{start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
-is_supported_command()
-{
-    echo "$SUPPORTED_COMMANDS" | grep -E "(\{|\|)$1(\||})" &> /dev/null
-}
-
-
 is_running()
 {
    pgrep --pidfile "$CLICKHOUSE_PIDFILE" $(echo "${PROGRAM}" | cut -c1-15) 1> /dev/null 2> /dev/null
@ -283,13 +276,12 @@ use_cron()
    fi
    return 0
 }
-
+# returns false if cron disabled (with systemd)
 enable_cron()
 {
    use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE"
 }
-
-
+# returns false if cron disabled (with systemd)
 disable_cron()
 {
    use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE"
@ -312,15 +304,14 @@ main()
    EXIT_STATUS=0
    case "$1" in
    start)
-        start && enable_cron
+        service_or_func start && enable_cron
        ;;
    stop)
-        # disable_cron returns false if cron disabled (with systemd) - not checking return status
        disable_cron
-        stop
+        service_or_func stop
        ;;
    restart)
-        restart && enable_cron
+        service_or_func restart && enable_cron
        ;;
    forcestop)
        disable_cron
@ -330,7 +321,7 @@ main()
        forcerestart && enable_cron
        ;;
    reload)
-        restart
+        service_or_func restart
        ;;
    condstart)
        is_running || service_or_func start
@ -354,7 +345,7 @@ main()
        disable_cron
        ;;
    *)
-        echo "Usage: $0 $SUPPORTED_COMMANDS"
+        echo "Usage: $0 {start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}"
        exit 2
        ;;
    esac
--- a/docker/test/integration/runner/compose/docker_compose_mysql.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql.yml
@ -7,3 +7,4 @@ services:
            MYSQL_ROOT_PASSWORD: clickhouse
        ports:
          - 3308:3306
+        command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
--- a/docker/test/integration/runner/compose/docker_compose_mysql_5_7.yml
+++ b/docker/test/integration/runner/compose/docker_compose_mysql_5_7.yml
@ -1,10 +0,0 @@
-version: '2.3'
-services:
-    mysql5_7:
-        image: mysql:5.7
-        restart: always
-        environment:
-            MYSQL_ROOT_PASSWORD: clickhouse
-        ports:
-          - 33307:3306
-        command: --server_id=100 --log-bin='mysql-bin-1.log' --default-time-zone='+3:00' --gtid-mode="ON" --enforce-gtid-consistency
--- a/docker/test/performance-comparison/compare.sh
+++ b/docker/test/performance-comparison/compare.sh
@ -565,40 +565,54 @@ create table unstable_queries_report engine File(TSV, 'report/unstable-queries.t
        toDecimal64(stat_threshold, 3), unstable_fail, test, query_index, query_display_name
    from queries where unstable_show order by stat_threshold desc;

-create table test_time_changes engine File(TSV, 'report/test-time-changes.tsv') as
-    select test, queries, average_time_change from (
-        select test, count(*) queries,
-            sum(left) as left, sum(right) as right,
-            (right - left) / right average_time_change
-        from queries
-        group by test
-        order by abs(average_time_change) desc
-    )
-    ;

-create table unstable_tests engine File(TSV, 'report/unstable-tests.tsv') as
-    select test, sum(unstable_show) total_unstable, sum(changed_show) total_changed
+create view test_speedup as
+    select
+        test,
+        exp2(avg(log2(left / right))) times_speedup,
+        count(*) queries,
+        unstable + changed bad,
+        sum(changed_show) changed,
+        sum(unstable_show) unstable
    from queries
    group by test
-    order by total_unstable + total_changed desc
+    order by times_speedup desc
+    ;
+
+create view total_speedup as
+    select
+        'Total' test,
+        exp2(avg(log2(times_speedup))) times_speedup,
+        sum(queries) queries,
+        unstable + changed bad,
+        sum(changed) changed,
+        sum(unstable) unstable
+    from test_speedup
    ;

 create table test_perf_changes_report engine File(TSV, 'report/test-perf-changes.tsv') as
-    select test,
-        queries,
-        coalesce(total_unstable, 0) total_unstable,
-        coalesce(total_changed, 0) total_changed,
-        total_unstable + total_changed total_bad,
-        coalesce(toString(toDecimal64(average_time_change, 3)), '??') average_time_change_str
-    from test_time_changes
-    full join unstable_tests
-    using test
-    where (abs(average_time_change) > 0.05 and queries > 5)
-        or (total_bad > 0)
-    order by total_bad desc, average_time_change desc
-    settings join_use_nulls = 1
+    with
+        (times_speedup >= 1
+            ? '-' || toString(toDecimal64(times_speedup, 3)) || 'x'
+            : '+' || toString(toDecimal64(1 / times_speedup, 3)) || 'x')
+        as times_speedup_str
+    select test, times_speedup_str, queries, bad, changed, unstable
+    -- Not sure what's the precedence of UNION ALL vs WHERE & ORDER BY, hence all
+    -- the braces.
+    from (
+        (
+            select * from total_speedup
+        ) union all (
+            select * from test_speedup
+            where
+                (times_speedup >= 1 ? times_speedup : (1 / times_speedup)) >= 1.005
+                or bad
+        )
+    )
+    order by test = 'Total' desc, times_speedup desc
    ;

+
 create view total_client_time_per_query as select *
    from file('analyze/client-times.tsv', TSV,
        'test text, query_index int, client float, server float');
--- a/docker/test/performance-comparison/perf.py
+++ b/docker/test/performance-comparison/perf.py
@ -262,6 +262,13 @@ for query_index, q in enumerate(test_queries):
            print(f'query\t{query_index}\t{run_id}\t{conn_index}\t{c.last_query.elapsed}')
            server_seconds += c.last_query.elapsed

+            if c.last_query.elapsed > 10:
+                # Stop processing pathologically slow queries, to avoid timing out
+                # the entire test task. This shouldn't really happen, so we don't
+                # need much handling for this case and can just exit.
+                print(f'The query no. {query_index} is taking too long to run ({c.last_query.elapsed} s)', file=sys.stderr)
+                exit(2)
+
    client_seconds = time.perf_counter() - start_seconds
    print(f'client-time\t{query_index}\t{client_seconds}\t{server_seconds}')

--- a/docker/test/performance-comparison/report.py
+++ b/docker/test/performance-comparison/report.py
@ -370,7 +370,7 @@ if args.report == 'main':
        columns = [
            'Old,&nbsp;s',                                          # 0
            'New,&nbsp;s',                                          # 1
-            'Times speedup / slowdown',                 # 2
+            'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)',                 # 2
            'Relative difference (new&nbsp;&minus;&nbsp;old) / old',   # 3
            'p&nbsp;<&nbsp;0.001 threshold',                   # 4
            # Failed                                           # 5
@ -447,7 +447,7 @@ if args.report == 'main':
    addSimpleTable('Skipped tests', ['Test', 'Reason'], skipped_tests_rows)

    addSimpleTable('Test performance changes',
-        ['Test', 'Queries', 'Unstable', 'Changed perf', 'Total not OK', 'Avg relative time diff'],
+        ['Test', 'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)', 'Queries', 'Total not OK', 'Changed perf', 'Unstable'],
        tsvRows('report/test-perf-changes.tsv'))

    def add_test_times():
@ -647,7 +647,7 @@ elif args.report == 'all-queries':
            # Unstable #1
            'Old,&nbsp;s', #2
            'New,&nbsp;s', #3
-            'Times speedup / slowdown',                 #4
+            'Ratio of speedup&nbsp;(-) or slowdown&nbsp;(+)',                 #4
            'Relative difference (new&nbsp;&minus;&nbsp;old) / old', #5
            'p&nbsp;&lt;&nbsp;0.001 threshold',          #6
            'Test',                                   #7
--- a/docker/test/stateful/run.sh
+++ b/docker/test/stateful/run.sh
@ -29,17 +29,26 @@ if [[ -n "$USE_DATABASE_ATOMIC" ]] && [[ "$USE_DATABASE_ATOMIC" -eq 1 ]]; then
    ln -s /usr/share/clickhouse-test/config/database_atomic_usersd.xml /etc/clickhouse-server/users.d/
 fi

-echo "TSAN_OPTIONS='verbosity=1000 halt_on_error=1 history_size=7'" >> /etc/environment
-echo "TSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
-echo "UBSAN_OPTIONS='print_stacktrace=1'" >> /etc/environment
-echo "ASAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
-echo "UBSAN_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
-echo "LLVM_SYMBOLIZER_PATH=/usr/lib/llvm-10/bin/llvm-symbolizer" >> /etc/environment
+function start()
+{
+    counter=0
+    until clickhouse-client --query "SELECT 1"
+    do
+        if [ "$counter" -gt 120 ]
+        then
+            echo "Cannot start clickhouse-server"
+            cat /var/log/clickhouse-server/stdout.log
+            tail -n1000 /var/log/clickhouse-server/stderr.log
+            tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
+            break
+        fi
+        timeout 120 service clickhouse-server start
+        sleep 0.5
+        counter=$(($counter + 1))
+    done
+}

-service zookeeper start
-sleep 5
-service clickhouse-server start
-sleep 5
+start
 /s3downloader --dataset-names $DATASETS
 chmod 777 -R /var/lib/clickhouse
 clickhouse-client --query "SHOW DATABASES"
--- a/docker/test/stateful_with_coverage/run.sh
+++ b/docker/test/stateful_with_coverage/run.sh
@ -71,14 +71,26 @@ ln -s /usr/share/clickhouse-test/config/macros.xml /etc/clickhouse-server/config
 ln -s --backup=simple --suffix=_original.xml \
    /usr/share/clickhouse-test/config/query_masking_rules.xml /etc/clickhouse-server/config.d/

+function start()
+{
+    counter=0
+    until clickhouse-client --query "SELECT 1"
+    do
+        if [ "$counter" -gt 120 ]
+        then
+            echo "Cannot start clickhouse-server"
+            cat /var/log/clickhouse-server/stdout.log
+            tail -n1000 /var/log/clickhouse-server/stderr.log
+            tail -n1000 /var/log/clickhouse-server/clickhouse-server.log
+            break
+        fi
+        timeout 120 service clickhouse-server start
+        sleep 0.5
+        counter=$(($counter + 1))
+    done
+}

-service zookeeper start
-
-sleep 5
-
-start_clickhouse
-
-sleep 5
+start

 if ! /s3downloader --dataset-names $DATASETS; then
    echo "Cannot download datatsets"
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -1290,6 +1290,47 @@ Possible values:

 Default value: 0.

+## distributed\_group\_by\_no\_merge {#distributed-group-by-no-merge}
+
+Do not merge aggregation states from different servers for distributed query processing, you can use this in case it is for certain that there are different keys on different shards
+
+Possible values:
+
+-   0 — Disabled (final query processing is done on the initiator node).
+-   1 - Do not merge aggregation states from different servers for distributed query processing (query completelly processed on the shard, initiator only proxy the data).
+-   2 - Same as 1 but apply `ORDER BY` and `LIMIT` on the initiator (can be used for queries with `ORDER BY` and/or `LIMIT`).
+
+**Example**
+
+```sql
+SELECT *
+FROM remote('127.0.0.{2,3}', system.one)
+GROUP BY dummy
+LIMIT 1
+SETTINGS distributed_group_by_no_merge = 1
+FORMAT PrettyCompactMonoBlock
+
+┌─dummy─┐
+│     0 │
+│     0 │
+└───────┘
+```
+
+```sql
+SELECT *
+FROM remote('127.0.0.{2,3}', system.one)
+GROUP BY dummy
+LIMIT 1
+SETTINGS distributed_group_by_no_merge = 2
+FORMAT PrettyCompactMonoBlock
+
+┌─dummy─┐
+│     0 │
+└───────┘
+```
+
+Default value: 0
+
 ## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards}

 Enables or disables skipping of unused shards for [SELECT](../../sql-reference/statements/select/index.md) queries that have sharding key condition in `WHERE/PREWHERE` (assuming that the data is distributed by sharding key, otherwise does nothing).
@ -1337,6 +1378,40 @@ Possible values:

 Default value: 0

+## optimize\_distributed\_group\_by\_sharding\_key {#optimize-distributed-group-by-sharding-key}
+
+Optimize `GROUP BY sharding_key` queries, by avoiding costly aggregation on the initiator server (which will reduce memory usage for the query on the initiator server).
+
+The following types of queries are supported (and all combinations of them):
+
+- `SELECT DISTINCT [..., ]sharding_key[, ...] FROM dist`
+- `SELECT ... FROM dist GROUP BY sharding_key[, ...]`
+- `SELECT ... FROM dist GROUP BY sharding_key[, ...] ORDER BY x`
+- `SELECT ... FROM dist GROUP BY sharding_key[, ...] LIMIT 1`
+- `SELECT ... FROM dist GROUP BY sharding_key[, ...] LIMIT 1 BY x`
+
+The following types of queries are not supported (support for some of them may be added later):
+
+- `SELECT ... GROUP BY sharding_key[, ...] WITH TOTALS`
+- `SELECT ... GROUP BY sharding_key[, ...] WITH ROLLUP`
+- `SELECT ... GROUP BY sharding_key[, ...] WITH CUBE`
+- `SELECT ... GROUP BY sharding_key[, ...] SETTINGS extremes=1`
+
+Possible values:
+
+-   0 — Disabled.
+-   1 — Enabled.
+
+Default value: 0
+
+See also:
+
+-   [distributed\_group\_by\_no\_merge](#distributed-group-by-no-merge)
+-   [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards)
+
+!!! note "Note"
+    Right now it requires `optimize_skip_unused_shards` (the reason behind this is that one day it may be enabled by default, and it will work correctly only if data was inserted via Distributed table, i.e. data is distributed according to sharding_key).
+
 ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop}

 Enables or disables throwing an exception if an [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) query didn’t perform a merge.
@ -1894,9 +1969,9 @@ Locking timeout is used to protect from deadlocks while executing read/write ope

 Possible values:

-   Positive integer.
+-   Positive integer (in seconds).
 -   0 — No locking timeout.

-Default value: `120`.
+Default value: `120` seconds.

 [Original article](https://clickhouse.tech/docs/en/operations/settings/settings/) <!-- hide -->
--- a/docs/ru/operations/settings/settings.md
+++ b/docs/ru/operations/settings/settings.md
@ -1756,4 +1756,17 @@ SELECT idx, i FROM null_in WHERE i IN (1, NULL) SETTINGS transform_null_in = 1;
 -   [Секции и настройки запроса CREATE TABLE](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) (настройка `merge_with_ttl_timeout`)
 -   [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl)

+## lock_acquire_timeout {#lock_acquire_timeout}
+
+Устанавливает, сколько секунд сервер ожидает возможности выполнить блокировку таблицы.
+
+Таймаут устанавливается для защиты от взаимоблокировки при выполнении операций чтения или записи. Если время ожидания истекло, а блокировку выполнить не удалось, сервер возвращает исключение с кодом `DEADLOCK_AVOIDED` и сообщением "Locking attempt timed out! Possible deadlock avoided. Client should retry." ("Время ожидания блокировки истекло! Возможная взаимоблокировка предотвращена. Повторите запрос.").
+
+Возможные значения:
+
+-   Положительное целое число (в секундах).
+-   0 — таймаут не устанавливается.
+
+Значение по умолчанию: `120` секунд.
+
 [Оригинальная статья](https://clickhouse.tech/docs/ru/operations/settings/settings/) <!--hide-->
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@ -180,12 +180,13 @@ def build(args):
    if not args.skip_website:
        website.build_website(args)

-    test.test_templates(args.website_dir)
+    if not args.skip_test_templates:
+        test.test_templates(args.website_dir)

-    build_docs(args)
-
-    from github import build_releases
-    build_releases(args, build_docs)
+    if not args.skip_docs:
+        build_docs(args)
+        from github import build_releases
+        build_releases(args, build_docs)

    if not args.skip_blog:
        blog.build_blog(args)
@ -220,6 +221,8 @@ if __name__ == '__main__':
    arg_parser.add_argument('--skip-website', action='store_true')
    arg_parser.add_argument('--skip-blog', action='store_true')
    arg_parser.add_argument('--skip-git-log', action='store_true')
+    arg_parser.add_argument('--skip-docs', action='store_true')
+    arg_parser.add_argument('--skip-test-templates', action='store_true')
    arg_parser.add_argument('--test-only', action='store_true')
    arg_parser.add_argument('--minify', action='store_true')
    arg_parser.add_argument('--htmlproofer', action='store_true')
--- a/programs/benchmark/Benchmark.cpp
+++ b/programs/benchmark/Benchmark.cpp
@ -104,6 +104,8 @@ public:
            query_processing_stage = QueryProcessingStage::FetchColumns;
        else if (stage == "with_mergeable_state")
            query_processing_stage = QueryProcessingStage::WithMergeableState;
+        else if (stage == "with_mergeable_state_after_aggregation")
+            query_processing_stage = QueryProcessingStage::WithMergeableStateAfterAggregation;
        else
            throw Exception("Unknown query processing stage: " + stage, ErrorCodes::BAD_ARGUMENTS);

@ -564,8 +566,8 @@ int mainEntryClickHouseBenchmark(int argc, char ** argv)
        desc.add_options()
            ("help",                                                            "produce help message")
            ("concurrency,c", value<unsigned>()->default_value(1),              "number of parallel queries")
-            ("delay,d",       value<double>()->default_value(1), "delay between intermediate reports in seconds (set 0 to disable reports)")
-            ("stage",         value<std::string>()->default_value("complete"),  "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state")
+            ("delay,d",       value<double>()->default_value(1),                "delay between intermediate reports in seconds (set 0 to disable reports)")
+            ("stage",         value<std::string>()->default_value("complete"),  "request query processing up to specified stage: complete,fetch_columns,with_mergeable_state,with_mergeable_state_after_aggregation")
            ("iterations,i",  value<size_t>()->default_value(0),                "amount of queries to be executed")
            ("timelimit,t",   value<double>()->default_value(0.),               "stop launch of queries after specified time limit")
            ("randomize,r",   value<bool>()->default_value(false),              "randomize order of execution")
--- a/programs/obfuscator/Obfuscator.cpp
+++ b/programs/obfuscator/Obfuscator.cpp
@ -13,6 +13,7 @@
 #include <DataTypes/DataTypeArray.h>
 #include <DataTypes/DataTypeNullable.h>
 #include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeUUID.h>
 #include <Interpreters/Context.h>
 #include <DataStreams/IBlockOutputStream.h>
 #include <DataStreams/LimitBlockInputStream.h>
@ -363,6 +364,17 @@ static void transformFixedString(const UInt8 * src, UInt8 * dst, size_t size, UI
    }
 }

+static void transformUUID(const UInt128 & src, UInt128 & dst, UInt64 seed)
+{
+    SipHash hash;
+    hash.update(seed);
+    hash.update(reinterpret_cast<const char *>(&src), sizeof(UInt128));
+
+    /// Saving version and variant from an old UUID
+    hash.get128(reinterpret_cast<char *>(&dst));
+    dst.high = (dst.high & 0x1fffffffffffffffull) | (src.high & 0xe000000000000000ull);
+    dst.low = (dst.low & 0xffffffffffff0fffull) | (src.low & 0x000000000000f000ull);
+}

 class FixedStringModel : public IModel
 {
@ -400,6 +412,38 @@ public:
    }
 };

+class UUIDModel : public IModel
+{
+private:
+    UInt64 seed;
+
+public:
+    explicit UUIDModel(UInt64 seed_) : seed(seed_) {}
+
+    void train(const IColumn &) override {}
+    void finalize() override {}
+
+    ColumnPtr generate(const IColumn & column) override
+    {
+        const ColumnUInt128 & src_column = assert_cast<const ColumnUInt128 &>(column);
+        const auto & src_data = src_column.getData();
+
+        auto res_column = ColumnUInt128::create();
+        auto & res_data = res_column->getData();
+
+        res_data.resize(src_data.size());
+        for (size_t i = 0; i < src_column.size(); ++i)
+            transformUUID(src_data[i], res_data[i], seed);
+
+        return res_column;
+    }
+
+    void updateSeed() override
+    {
+        seed = hash(seed);
+    }
+};
+

 /// Leave date part as is and apply pseudorandom permutation to time difference with previous value within the same log2 class.
 class DateTimeModel : public IModel
@ -935,6 +979,9 @@ public:
        if (typeid_cast<const DataTypeFixedString *>(&data_type))
            return std::make_unique<FixedStringModel>(seed);

+        if (typeid_cast<const DataTypeUUID *>(&data_type))
+            return std::make_unique<UUIDModel>(seed);
+
        if (const auto * type = typeid_cast<const DataTypeArray *>(&data_type))
            return std::make_unique<ArrayModel>(get(*type->getNestedType(), seed, markov_model_params));

--- a/src/AggregateFunctions/AggregateFunctionTopK.cpp
+++ b/src/AggregateFunctions/AggregateFunctionTopK.cpp
@ -85,12 +85,12 @@ AggregateFunctionPtr createAggregateFunctionTopK(const std::string & name, const
            load_factor = applyVisitor(FieldVisitorConvertToNumber<UInt64>(), params[1]);

            if (load_factor < 1)
-                throw Exception("Too small parameter for aggregate function " + name + ". Minimum: 1",
+                throw Exception("Too small parameter 'load_factor' for aggregate function " + name + ". Minimum: 1",
                    ErrorCodes::ARGUMENT_OUT_OF_BOUND);
        }

-        if (k > TOP_K_MAX_SIZE)
-            throw Exception("Too large parameter for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
+        if (k > TOP_K_MAX_SIZE || load_factor > TOP_K_MAX_SIZE || k * load_factor > TOP_K_MAX_SIZE)
+            throw Exception("Too large parameter(s) for aggregate function " + name + ". Maximum: " + toString(TOP_K_MAX_SIZE),
                ErrorCodes::ARGUMENT_OUT_OF_BOUND);

        if (k == 0)
--- a/src/Columns/ColumnDecimal.h
+++ b/src/Columns/ColumnDecimal.h
@ -126,7 +126,7 @@ public:

    bool isNumeric() const override { return false; }
    bool canBeInsideNullable() const override { return true; }
-    bool isFixedAndContiguous() const override { return is_POD; }
+    bool isFixedAndContiguous() const override { return true; }
    size_t sizeOfValueIfFixed() const override { return sizeof(T); }

    size_t size() const override { return data.size(); }
--- a/src/Columns/ColumnVector.h
+++ b/src/Columns/ColumnVector.h
@ -12,11 +12,6 @@
 namespace DB
 {

-namespace ErrorCodes
-{
-    extern const int NOT_IMPLEMENTED;
-}
-
 /** Stuff for comparing numbers.
  * Integer values are compared as usual.
  * Floating-point numbers are compared this way that NaNs always end up at the end
@ -298,23 +293,17 @@ public:
    void gather(ColumnGathererStream & gatherer_stream) override;

    bool canBeInsideNullable() const override { return true; }
-    bool isFixedAndContiguous() const override { return is_POD; }
+    bool isFixedAndContiguous() const override { return true; }
    size_t sizeOfValueIfFixed() const override { return sizeof(T); }

    StringRef getRawData() const override
    {
-        if constexpr (is_POD)
-            return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
-        else
-            throw Exception("getRawData() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
+        return StringRef(reinterpret_cast<const char*>(data.data()), byteSize());
    }

    StringRef getDataAt(size_t n) const override
    {
-        if constexpr (is_POD)
-            return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
-        else
-            throw Exception("getDataAt() is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
+        return StringRef(reinterpret_cast<const char *>(&data[n]), sizeof(data[n]));
    }

    bool structureEquals(const IColumn & rhs) const override
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Core/Types.h>
+#include <Core/BigInt.h>
 #include <Common/UInt128.h>
 #include <common/unaligned.h>

@ -89,8 +90,7 @@ template <typename T>
 inline typename std::enable_if<is_big_int_v<T>, DB::UInt64>::type
 intHashCRC32(const T & x, DB::UInt64 updated_value)
 {
-    std::vector<UInt64> parts;
-    export_bits(x, std::back_inserter(parts), sizeof(UInt64), false);
+    std::vector<UInt64> parts = DB::BigInt<T>::toIntArray(x);
    for (const auto & part : parts)
        updated_value = intHashCRC32(part, updated_value);

@ -199,7 +199,7 @@ inline size_t DefaultHash64(std::enable_if_t<(sizeof(T) > sizeof(UInt64)), T> ke
    {
        return intHash64(key.low ^ key.high);
    }
-    else if constexpr (std::is_same_v<T, bInt256> || std::is_same_v<T, bUInt256>)
+    else if constexpr (is_big_int_v<T> && sizeof(T) == 32)
    {
        return intHash64(static_cast<UInt64>(key) ^
            static_cast<UInt64>(key >> 64) ^
@ -256,7 +256,7 @@ inline size_t hashCRC32(std::enable_if_t<(sizeof(T) > sizeof(UInt64)), T> key)
    {
        return intHashCRC32(key.low ^ key.high);
    }
-    else if constexpr (std::is_same_v<T, bInt256> || std::is_same_v<T, bUInt256>)
+    else if constexpr (is_big_int_v<T> && sizeof(T) == 32)
    {
        return intHashCRC32(static_cast<UInt64>(key) ^
            static_cast<UInt64>(key >> 64) ^
@ -358,7 +358,7 @@ struct IntHash32
        {
            return intHash32<salt>(key.low ^ key.high);
        }
-        else if constexpr (std::is_same_v<T, bInt256> || std::is_same_v<T, bUInt256>)
+        else if constexpr (is_big_int_v<T> && sizeof(T) == 32)
        {
            return intHash32<salt>(static_cast<UInt64>(key) ^
                static_cast<UInt64>(key >> 64) ^
--- a/src/Common/SipHash.h
+++ b/src/Common/SipHash.h
@ -148,7 +148,7 @@ public:
    }

    template <typename T>
-    std::enable_if_t<is_big_int_v<T>, void> update(const T & x)
+    std::enable_if_t<is_big_int_v<T> && !std::has_unique_object_representations_v<T>, void> update(const T & x)
    {
        update(DB::BigInt<T>::serialize(x));
    }
@ -213,7 +213,7 @@ std::enable_if_t<std::has_unique_object_representations_v<T>, UInt64> sipHash64(
 }

 template <typename T>
-std::enable_if_t<(std::is_floating_point_v<T> || is_big_int_v<T>), UInt64> sipHash64(const T & x)
+std::enable_if_t<(std::is_floating_point_v<T> || (is_big_int_v<T> && !std::has_unique_object_representations_v<T>)), UInt64> sipHash64(const T & x)
 {
    SipHash hash;
    hash.update(x);
--- a/src/Common/SpaceSaving.h
+++ b/src/Common/SpaceSaving.h
@ -147,16 +147,17 @@ public:
    {
        // Increase weight of a key that already exists
        auto hash = counter_map.hash(key);
-        auto counter = findCounter(key, hash);
-        if (counter)
+
+        if (auto counter = findCounter(key, hash); counter)
        {
            counter->count += increment;
            counter->error += error;
            percolate(counter);
            return;
        }
+
        // Key doesn't exist, but can fit in the top K
-        else if (unlikely(size() < capacity()))
+        if (unlikely(size() < capacity()))
        {
            auto c = new Counter(arena.emplace(key), increment, error, hash);
            push(c);
--- a/src/Common/StringSearcher.h
+++ b/src/Common/StringSearcher.h
@ -254,7 +254,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
+                if (haystack + n <= haystack_end && pageSafe(haystack))
                {
                    const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                    const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel);
@ -463,7 +463,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
+                if (haystack + n <= haystack_end && pageSafe(haystack))
                {
                    const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
                    const auto v_against_l_offset = _mm_cmpeq_epi8(v_haystack_offset, cachel);
@ -652,7 +652,7 @@ public:
                const auto offset = __builtin_ctz(mask);
                haystack += offset;

-                if (haystack < haystack_end && haystack + n <= haystack_end && pageSafe(haystack))
+                if (haystack + n <= haystack_end && pageSafe(haystack))
                {
                    /// check for first 16 octets
                    const auto v_haystack_offset = _mm_loadu_si128(reinterpret_cast<const __m128i *>(haystack));
--- a/src/Common/UInt128.h
+++ b/src/Common/UInt128.h
@ -67,6 +67,11 @@ struct UInt128
    bool inline operator <= (const Int128 rhs) const { return *this <= UInt128(rhs, rhs >> 64) && rhs >= 0; }
    bool inline operator <  (const Int128 rhs) const { return *this <  UInt128(rhs, rhs >> 64) && rhs >= 0; }

+    bool inline operator >  (const Int256 rhs) const { return (rhs < 0) || ((Int256(high) << 64) + low) > rhs; }
+    bool inline operator >  (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) > rhs; }
+    bool inline operator <  (const Int256 rhs) const { return (rhs >= 0) && ((Int256(high) << 64) + low) < rhs; }
+    bool inline operator <  (const UInt256 rhs) const { return ((UInt256(high) << 64) + low) < rhs; }
+
    template <typename T> bool inline operator== (const T rhs) const { return *this == UInt128(rhs); }
    template <typename T> bool inline operator!= (const T rhs) const { return *this != UInt128(rhs); }
    template <typename T> bool inline operator>= (const T rhs) const { return *this >= UInt128(rhs); }
--- a/src/Common/ZooKeeper/ZooKeeperImpl.cpp
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.cpp
@ -424,7 +424,7 @@ void ZooKeeperRequest::write(WriteBuffer & out) const

 struct ZooKeeperResponse : virtual Response
 {
-    virtual ~ZooKeeperResponse() = default;
+    virtual ~ZooKeeperResponse() override = default;
    virtual void readImpl(ReadBuffer &) = 0;
 };

--- a/src/Common/ZooKeeper/ZooKeeperImpl.h
+++ b/src/Common/ZooKeeper/ZooKeeperImpl.h
@ -260,7 +260,7 @@ struct ZooKeeperRequest : virtual Request

    ZooKeeperRequest() = default;
    ZooKeeperRequest(const ZooKeeperRequest &) = default;
-    virtual ~ZooKeeperRequest() = default;
+    virtual ~ZooKeeperRequest() override = default;

    virtual ZooKeeper::OpNum getOpNum() const = 0;

--- a/src/Common/intExp.h
+++ b/src/Common/intExp.h
@ -138,9 +138,9 @@ constexpr inline Int128 exp10_i128(int x)
 }


-inline bInt256 exp10_i256(int x)
+inline wInt256 exp10_i256(int x)
 {
-    using Int256 = bInt256;
+    using Int256 = wInt256;
    static constexpr Int256 i10e18{1000000000000000000ll};
    static const Int256 values[] = {
        static_cast<Int256>(1ll),
--- a/src/Core/BigInt.h
+++ b/src/Core/BigInt.h
@ -7,46 +7,15 @@ namespace DB
 {

 template <typename T>
-struct BigIntPayload
+struct BigInt
 {
-    static_assert(!is_big_int_v<T>);
-    static constexpr size_t size = 0;
-};
-
-template <> struct BigIntPayload<bUInt256> { static constexpr size_t size = 32; };
-
-template <> struct BigIntPayload<bInt256>
-{
-    using UnsingedType = bUInt256;
+    static_assert(sizeof(T) == 32);
    static constexpr size_t size = 32;
-};
-
-template <typename T>
-struct BigInt : BigIntPayload<T>
-{
-    using BigIntPayload<T>::size;
-
-    static constexpr size_t lastBit()
-    {
-        return size * 8 - 1;
-    }

    static StringRef serialize(const T & x, char * pos)
    {
-        if constexpr (is_signed_v<T>)
-        {
-            using UnsignedT = typename BigIntPayload<T>::UnsingedType;
-
-            if (x < 0)
-            {
-                UnsignedT unsigned_x = UnsignedT{0} - static_cast<UnsignedT>(-x);
-                export_bits(unsigned_x, pos, 8, false);
-            }
-            else
-                export_bits(x, pos, 8, false);
-        }
-        else
-            export_bits(x, pos, 8, false);
+        //unalignedStore<T>(pos, x);
+        memcpy(pos, &x, size);
        return StringRef(pos, size);
    }

@ -59,24 +28,20 @@ struct BigInt : BigIntPayload<T>

    static T deserialize(const char * pos)
    {
-        if constexpr (is_signed_v<T>)
-        {
-            using UnsignedT = typename BigIntPayload<T>::UnsingedType;
+        //return unalignedLoad<T>(pos);
+        T res;
+        memcpy(&res, pos, size);
+        return res;
+    }

-            UnsignedT unsigned_x;
-            import_bits(unsigned_x, pos, pos + size, false);
-
-            bool is_negative = bit_test(unsigned_x, lastBit());
-            if (is_negative)
-                unsigned_x = UnsignedT{0} - unsigned_x;
-            return static_cast<T>(unsigned_x);
-        }
-        else
-        {
-            T x;
-            import_bits(x, pos, pos + size, false);
-            return x;
-        }
+    static std::vector<UInt64> toIntArray(const T & x)
+    {
+        std::vector<UInt64> parts(4, 0);
+        parts[0] = UInt64(x);
+        parts[1] = UInt64(x >> 64);
+        parts[2] = UInt64(x >> 128);
+        parts[4] = UInt64(x >> 192);
+        return parts;
    }
 };

--- a/src/Core/DecimalComparison.h
+++ b/src/Core/DecimalComparison.h
@ -226,25 +226,25 @@ private:
    static NO_INLINE UInt8 apply(A a, B b, CompareInt scale [[maybe_unused]])
    {
        CompareInt x;
-        if constexpr (is_big_int_v<CompareInt> && IsDecimalNumber<A>)
+        if constexpr (IsDecimalNumber<A>)
            x = a.value;
        else
-            x = bigint_cast<CompareInt>(a);
+            x = a;

        CompareInt y;
-        if constexpr (is_big_int_v<CompareInt> && IsDecimalNumber<B>)
+        if constexpr (IsDecimalNumber<B>)
            y = b.value;
        else
-            y = bigint_cast<CompareInt>(b);
+            y = b;

        if constexpr (_check_overflow)
        {
            bool overflow = false;

            if constexpr (sizeof(A) > sizeof(CompareInt))
-                overflow |= (A(x) != a);
+                overflow |= (bigint_cast<A>(x) != a);
            if constexpr (sizeof(B) > sizeof(CompareInt))
-                overflow |= (B(y) != b);
+                overflow |= (bigint_cast<B>(y) != b);
            if constexpr (is_unsigned_v<A>)
                overflow |= (x < 0);
            if constexpr (is_unsigned_v<B>)
--- a/src/Core/MySQL/MySQLReplication.cpp
+++ b/src/Core/MySQL/MySQLReplication.cpp
@ -742,7 +742,9 @@ namespace MySQLReplication

    void GTIDEvent::dump(std::ostream & out) const
    {
-        auto gtid_next = gtid.uuid.toUnderType().toHexString() + ":" + std::to_string(gtid.seq_no);
+        WriteBufferFromOwnString ws;
+        writeUUIDText(gtid.uuid, ws);
+        auto gtid_next = ws.str() + ":" + std::to_string(gtid.seq_no);

        header.dump(out);
        out << "GTID Next: " << gtid_next << std::endl;
--- a/src/Core/MySQL/MySQLReplication.h
+++ b/src/Core/MySQL/MySQLReplication.h
@ -499,7 +499,7 @@ namespace MySQLReplication
        virtual BinlogEventPtr readOneEvent() = 0;
        virtual void setReplicateDatabase(String db) = 0;
        virtual void setGTIDSets(GTIDSets sets) = 0;
-        virtual ~IFlavor() = default;
+        virtual ~IFlavor() override = default;
    };

    class MySQLFlavor : public IFlavor
--- a/src/Core/QueryProcessingStage.h
+++ b/src/Core/QueryProcessingStage.h
@ -10,17 +10,36 @@ namespace DB
 namespace QueryProcessingStage
 {
    /// Numbers matter - the later stage has a larger number.
+    ///
+    /// It is part of Protocol ABI, add values only to the end.
+    /// Also keep in mind that the code may depends on the order of fields, so be double aware when you will add new values.
    enum Enum
    {
-        FetchColumns       = 0,    /// Only read/have been read the columns specified in the query.
-        WithMergeableState = 1,    /// Until the stage where the results of processing on different servers can be combined.
-        Complete           = 2,    /// Completely.
+        /// Only read/have been read the columns specified in the query.
+        FetchColumns       = 0,
+        /// Until the stage where the results of processing on different servers can be combined.
+        WithMergeableState = 1,
+        /// Completely.
+        Complete           = 2,
+        /// Until the stage where the aggregate functions were calculated and finalized.
+        ///
+        /// It is used for auto distributed_group_by_no_merge optimization for distributed engine.
+        /// (See comments in StorageDistributed).
+        WithMergeableStateAfterAggregation = 3,
+
+        MAX = 4,
    };

    inline const char * toString(UInt64 stage)
    {
-        static const char * data[] = { "FetchColumns", "WithMergeableState", "Complete" };
-        return stage < 3
+        static const char * data[] =
+        {
+            "FetchColumns",
+            "WithMergeableState",
+            "Complete",
+            "WithMergeableStateAfterAggregation",
+        };
+        return stage < MAX
            ? data[stage]
            : "Unknown stage";
    }
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -107,8 +107,8 @@ class IColumn;
    \
    M(Bool, skip_unavailable_shards, false, "If 1, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
    \
-    M(Bool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \
    M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard, if 1 SELECT is executed on each shard, if 2 SELECT and INSERT is executed on each shard", 0) \
+    M(UInt64, distributed_group_by_no_merge, 0, "If 1, Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards. If 2 - same as 1 but also apply ORDER BY and LIMIT stages", 0) \
    M(Bool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avodiing costly aggregation on the initiator server).", 0) \
    M(Bool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \
    M(UInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \
@ -380,6 +380,7 @@ class IColumn;
    M(Bool, cast_keep_nullable, false, "CAST operator keep Nullable for result data type", 0) \
    M(Bool, alter_partition_verbose_result, false, "Output information about affected parts. Currently works only for FREEZE and ATTACH commands.", 0) \
    M(Bool, allow_experimental_database_materialize_mysql, false, "Allow to create database with Engine=MaterializeMySQL(...).", 0) \
+    M(Bool, system_events_show_zero_values, false, "Include all metrics, even with zero values", 0) \
    \
    /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
    \
--- a/src/Core/Types.h
+++ b/src/Core/Types.h
@ -58,14 +58,14 @@ using UInt8 = ::UInt8;
 using UInt16 = ::UInt16;
 using UInt32 = ::UInt32;
 using UInt64 = ::UInt64;
-using UInt256 = ::bUInt256;
+using UInt256 = ::wUInt256;

 using Int8 = ::Int8;
 using Int16 = ::Int16;
 using Int32 = ::Int32;
 using Int64 = ::Int64;
 using Int128 = ::Int128;
-using Int256 = ::bInt256;
+using Int256 = ::wInt256;

 using Float32 = float;
 using Float64 = double;
--- a/src/DataTypes/NumberTraits.h
+++ b/src/DataTypes/NumberTraits.h
@ -28,21 +28,13 @@ constexpr size_t min(size_t x, size_t y)
    return x < y ? x : y;
 }

+/// @note There's no auto scale to larger big integer, only for integral ones.
+/// It's cause of (U)Int64 backward compatibilty and very big performance penalties.
 constexpr size_t nextSize(size_t size)
 {
-    return min(size * 2, 8);
-}
-
-template <bool is_signed>
-constexpr size_t nextSize2(size_t size)
-{
-    // old way for built-in integers
-    if (size <= 8) return nextSize(size);
-
-    if constexpr (is_signed)
-        return size <= 32 ? 32 : 48;
-    else
-        return size <= 32 ? 16 : 48;
+    if (size < 8)
+        return size * 2;
+    return size;
 }

 template <bool is_signed, bool is_floating, size_t size>
@ -55,9 +47,8 @@ template <> struct Construct<false, false, 1> { using Type = UInt8; };
 template <> struct Construct<false, false, 2> { using Type = UInt16; };
 template <> struct Construct<false, false, 4> { using Type = UInt32; };
 template <> struct Construct<false, false, 8> { using Type = UInt64; };
-template <> struct Construct<false, false, 16> { using Type = UInt256; };
+template <> struct Construct<false, false, 16> { using Type = UInt256; }; /// TODO: we cannot use our UInt128 here
 template <> struct Construct<false, false, 32> { using Type = UInt256; };
-template <> struct Construct<false, false, 48> { using Type = UInt256; };
 template <> struct Construct<false, true, 1> { using Type = Float32; };
 template <> struct Construct<false, true, 2> { using Type = Float32; };
 template <> struct Construct<false, true, 4> { using Type = Float32; };
@ -67,8 +58,7 @@ template <> struct Construct<true, false, 2> { using Type = Int16; };
 template <> struct Construct<true, false, 4> { using Type = Int32; };
 template <> struct Construct<true, false, 8> { using Type = Int64; };
 template <> struct Construct<true, false, 16> { using Type = Int128; };
-template <> struct Construct<true, false, 32> { using Type = Int128; };
-template <> struct Construct<true, false, 48> { using Type = Int256; };
+template <> struct Construct<true, false, 32> { using Type = Int256; };
 template <> struct Construct<true, true, 1> { using Type = Float32; };
 template <> struct Construct<true, true, 2> { using Type = Float32; };
 template <> struct Construct<true, true, 4> { using Type = Float32; };
@ -86,7 +76,7 @@ template <typename A, typename B> struct ResultOfAdditionMultiplication
    using Type = typename Construct<
        is_signed_v<A> || is_signed_v<B>,
        std::is_floating_point_v<A> || std::is_floating_point_v<B>,
-        nextSize2< is_signed_v<A> || is_signed_v<B> >(max(sizeof(A), sizeof(B)))>::Type;
+        nextSize(max(sizeof(A), sizeof(B)))>::Type;
 };

 template <typename A, typename B> struct ResultOfSubtraction
@ -94,7 +84,7 @@ template <typename A, typename B> struct ResultOfSubtraction
    using Type = typename Construct<
        true,
        std::is_floating_point_v<A> || std::is_floating_point_v<B>,
-        nextSize2< is_signed_v<A> || is_signed_v<B> >(max(sizeof(A), sizeof(B)))>::Type;
+        nextSize(max(sizeof(A), sizeof(B)))>::Type;
 };

 /** When dividing, you always get a floating-point number.
@ -127,7 +117,7 @@ template <typename A> struct ResultOfNegate
    using Type = typename Construct<
        true,
        std::is_floating_point_v<A>,
-        is_signed_v<A> ? sizeof(A) : nextSize2<true>(sizeof(A))>::Type;
+        is_signed_v<A> ? sizeof(A) : nextSize(sizeof(A))>::Type;
 };

 template <typename A> struct ResultOfAbs
--- a/src/Functions/DivisionUtils.h
+++ b/src/Functions/DivisionUtils.h
@ -81,8 +81,10 @@ struct DivideIntegralImpl
        /// NOTE: overflow is still possible when dividing large signed number to large unsigned number or vice-versa. But it's less harmful.
        if constexpr (is_integer_v<A> && is_integer_v<B> && (is_signed_v<A> || is_signed_v<B>))
        {
-            return checkedDivision(make_signed_t<CastA>(a),
-                sizeof(A) > sizeof(B) ? make_signed_t<A>(CastB(b)) : make_signed_t<CastB>(b));
+            using SignedCastA = make_signed_t<CastA>;
+            using SignedCastB = std::conditional_t<sizeof(A) <= sizeof(B), make_signed_t<CastB>, SignedCastA>;
+
+            return bigint_cast<Result>(checkedDivision(bigint_cast<SignedCastA>(a), bigint_cast<SignedCastB>(b)));
        }
        else
            return bigint_cast<Result>(checkedDivision(CastA(a), CastB(b)));
@ -108,7 +110,7 @@ struct ModuloImpl
        if constexpr (std::is_floating_point_v<ResultType>)
        {
            /// This computation is similar to `fmod` but the latter is not inlined and has 40 times worse performance.
-            return ResultType(a) - trunc(ResultType(a) / ResultType(b)) * ResultType(b);
+            return bigint_cast<ResultType>(a) - trunc(bigint_cast<ResultType>(a) / bigint_cast<ResultType>(b)) * bigint_cast<ResultType>(b);
        }
        else
        {
@ -125,7 +127,7 @@ struct ModuloImpl
                if constexpr (is_big_int_v<IntegerBType> && sizeof(IntegerAType) <= sizeof(IntegerBType))
                    return bigint_cast<Result>(bigint_cast<CastB>(int_a) % int_b);
                else
-                    return bigint_cast<Result>(int_a % int_b);
+                    return bigint_cast<Result>(int_a % bigint_cast<CastA>(int_b));
            }
            else
                return IntegerAType(a) % IntegerBType(b);
--- a/src/Functions/FunctionBinaryArithmetic.h
+++ b/src/Functions/FunctionBinaryArithmetic.h
@ -361,12 +361,8 @@ private:
                return apply(a.value, b);
            else if constexpr (IsDecimalNumber<U>)
                return apply(a, b.value);
-            else if constexpr (std::is_same_v<T, UInt8>)
-                return apply(UInt16(a), b);
-            else if constexpr (std::is_same_v<U, UInt8>)
-                return apply(a, UInt16(b));
            else
-                return applyNative(static_cast<NativeResultType>(a), static_cast<NativeResultType>(b));
+                return applyNative(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b));
        }
        else
            return applyNative(a, b);
@ -381,12 +377,8 @@ private:
                return applyScaled<scale_left>(a.value, b, scale);
            else if constexpr (IsDecimalNumber<U>)
                return applyScaled<scale_left>(a, b.value, scale);
-            else if constexpr (std::is_same_v<T, UInt8>)
-                return applyScaled<scale_left>(UInt16(a), b, scale);
-            else if constexpr (std::is_same_v<U, UInt8>)
-                return applyScaled<scale_left>(a, UInt16(b), scale);
            else
-                return applyNativeScaled<scale_left>(static_cast<NativeResultType>(a), static_cast<NativeResultType>(b), scale);
+                return applyNativeScaled<scale_left>(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b), scale);
        }
        else
            return applyNativeScaled<scale_left>(a, b, scale);
@ -401,12 +393,8 @@ private:
                return applyScaledDiv(a.value, b, scale);
            else if constexpr (IsDecimalNumber<U>)
                return applyScaledDiv(a, b.value, scale);
-            else if constexpr (std::is_same_v<T, UInt8>)
-                return applyScaledDiv(UInt16(a), b, scale);
-            else if constexpr (std::is_same_v<U, UInt8>)
-                return applyScaledDiv(a, UInt16(b), scale);
            else
-                return applyNativeScaledDiv(static_cast<NativeResultType>(a), static_cast<NativeResultType>(b), scale);
+                return applyNativeScaledDiv(bigint_cast<NativeResultType>(a), bigint_cast<NativeResultType>(b), scale);
        }
        else
            return applyNativeScaledDiv(a, b, scale);
--- a/src/Functions/FunctionDateOrDateTimeToSomething.h
+++ b/src/Functions/FunctionDateOrDateTimeToSomething.h
@ -67,8 +67,16 @@ public:
                ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

        /// For DateTime, if time zone is specified, attach it to type.
+        /// If the time zone is specified but empty, throw an exception.
        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime>)
-            return std::make_shared<ToDataType>(extractTimeZoneNameFromFunctionArguments(arguments, 1, 0));
+        {
+            std::string time_zone = extractTimeZoneNameFromFunctionArguments(arguments, 1, 0);
+            if (time_zone.empty())
+                throw Exception(
+                    "Function " + getName() + " supports a 2nd argument (optional) that must be non-empty and be a valid time zone",
+                    ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+            return std::make_shared<ToDataType>(time_zone);
+        }
        if constexpr (std::is_same_v<ToDataType, DataTypeDateTime64>)
        {
            Int64 scale = DataTypeDateTime64::default_scale;
--- a/src/Functions/GatherUtils/Algorithms.h
+++ b/src/Functions/GatherUtils/Algorithms.h
@ -558,7 +558,7 @@ bool sliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
 {
    /// TODO: Decimal scale
    if constexpr (IsDecimalNumber<T> && IsDecimalNumber<U>)
-        return accurate::equalsOp(typename T::NativeType(first.data[first_ind]), typename U::NativeType(second.data[second_ind]));
+        return accurate::equalsOp(first.data[first_ind].value, second.data[second_ind].value);
    else if constexpr (IsDecimalNumber<T> || IsDecimalNumber<U>)
        return false;
    else
@ -588,7 +588,7 @@ bool insliceEqualElements(const NumericArraySlice<T> & first [[maybe_unused]],
                          size_t second_ind [[maybe_unused]])
 {
    if constexpr (IsDecimalNumber<T>)
-        return accurate::equalsOp(typename T::NativeType(first.data[first_ind]), typename T::NativeType(first.data[second_ind]));
+        return accurate::equalsOp(first.data[first_ind].value, first.data[second_ind].value);
    else
        return accurate::equalsOp(first.data[first_ind], first.data[second_ind]);
 }
--- a/src/Functions/GatherUtils/Sources.h
+++ b/src/Functions/GatherUtils/Sources.h
@ -122,9 +122,15 @@ struct NumericArraySource : public ArraySourceImpl<NumericArraySource<T>>
    }
 };

+
+/// The methods can be virtual or not depending on the template parameter. See IStringSource.
 #if !__clang__
-#pragma GCC diagnostic push
-#pragma GCC diagnostic ignored "-Wsuggest-override"
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wsuggest-override"
+#elif __clang_major__ >= 11
+    #pragma GCC diagnostic push
+    #pragma GCC diagnostic ignored "-Wsuggest-override"
+    #pragma GCC diagnostic ignored "-Wsuggest-destructor-override"
 #endif

 template <typename Base>
@ -205,7 +211,7 @@ struct ConstSource : public Base
    }
 };

-#if !__clang__
+#if !__clang__ || __clang_major__ >= 11
 #pragma GCC diagnostic pop
 #endif

--- a/src/Functions/abs.cpp
+++ b/src/Functions/abs.cpp
@ -16,11 +16,10 @@ struct AbsImpl
    {
        if constexpr (IsDecimalNumber<A>)
            return a < A(0) ? A(-a) : a;
-        else if constexpr (is_big_int_v<A>)
-            // from boost/multiprecision/number.hpp
-            return static_cast<ResultType>(abs(a));
+        else if constexpr (is_big_int_v<A> && is_signed_v<A>)
+            return (a < 0) ? -a : a;
        else if constexpr (is_integer_v<A> && is_signed_v<A>)
-            return a < 0 ? static_cast<ResultType>(~a) + 1 : a;
+            return a < 0 ? static_cast<ResultType>(~a) + 1 : static_cast<ResultType>(a);
        else if constexpr (is_integer_v<A> && is_unsigned_v<A>)
            return static_cast<ResultType>(a);
        else if constexpr (std::is_floating_point_v<A>)
--- a/src/Functions/bitRotateLeft.cpp
+++ b/src/Functions/bitRotateLeft.cpp
@ -18,7 +18,7 @@ struct BitRotateLeftImpl
    template <typename Result = ResultType>
    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
    {
-        if constexpr (is_big_int_v<ResultType>)
+        if constexpr (is_big_int_v<A> || is_big_int_v<B>)
            throw Exception("Bit rotate is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
        else
            return (static_cast<Result>(a) << static_cast<Result>(b))
--- a/src/Functions/bitRotateRight.cpp
+++ b/src/Functions/bitRotateRight.cpp
@ -18,7 +18,7 @@ struct BitRotateRightImpl
    template <typename Result = ResultType>
    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
    {
-        if constexpr (is_big_int_v<ResultType>)
+        if constexpr (is_big_int_v<A> || is_big_int_v<B>)
            throw Exception("Bit rotate is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
        else
            return (static_cast<Result>(a) >> static_cast<Result>(b))
--- a/src/Functions/bitShiftLeft.cpp
+++ b/src/Functions/bitShiftLeft.cpp
@ -19,9 +19,9 @@ struct BitShiftLeftImpl
    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
    {
        if constexpr (is_big_int_v<B>)
-            throw Exception("BitShiftLeftImpl is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
+            throw Exception("BitShiftLeft is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
        else if constexpr (is_big_int_v<A>)
-            return static_cast<Result>(a) << bigint_cast<UInt32>(b);
+            return bigint_cast<Result>(a) << bigint_cast<UInt32>(b);
        else
            return static_cast<Result>(a) << static_cast<Result>(b);
    }
--- a/src/Functions/bitShiftRight.cpp
+++ b/src/Functions/bitShiftRight.cpp
@ -19,9 +19,9 @@ struct BitShiftRightImpl
    static inline NO_SANITIZE_UNDEFINED Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
    {
        if constexpr (is_big_int_v<B>)
-            throw Exception("BitRotate is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
+            throw Exception("BitShiftRight is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
        else if constexpr (is_big_int_v<A>)
-            return static_cast<Result>(a) >> bigint_cast<UInt32>(b);
+            return bigint_cast<Result>(a) >> bigint_cast<UInt32>(b);
        else
            return static_cast<Result>(a) >> static_cast<Result>(b);
    }
--- a/src/Functions/bitTest.cpp
+++ b/src/Functions/bitTest.cpp
@ -19,10 +19,8 @@ struct BitTestImpl
    template <typename Result = ResultType>
    NO_SANITIZE_UNDEFINED static inline Result apply(A a [[maybe_unused]], B b [[maybe_unused]])
    {
-        if constexpr (is_big_int_v<B>)
+        if constexpr (is_big_int_v<A> || is_big_int_v<B>)
            throw Exception("bitTest is not implemented for big integers as second argument", ErrorCodes::NOT_IMPLEMENTED);
-        else if constexpr (is_big_int_v<A>)
-            return bit_test(a, static_cast<UInt32>(b));
        else
            return (typename NumberTraits::ToInteger<A>::Type(a) >> typename NumberTraits::ToInteger<B>::Type(b)) & 1;
    }
--- a/src/Functions/extractTimeZoneFromFunctionArguments.cpp
+++ b/src/Functions/extractTimeZoneFromFunctionArguments.cpp
@ -13,6 +13,7 @@ namespace DB
 namespace ErrorCodes
 {
    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
 }


@ -54,7 +55,12 @@ std::string extractTimeZoneNameFromFunctionArguments(const ColumnsWithTypeAndNam
 const DateLUTImpl & extractTimeZoneFromFunctionArguments(Block & block, const ColumnNumbers & arguments, size_t time_zone_arg_num, size_t datetime_arg_num)
 {
    if (arguments.size() == time_zone_arg_num + 1)
-        return DateLUT::instance(extractTimeZoneNameFromColumn(*block.getByPosition(arguments[time_zone_arg_num]).column));
+    {
+        std::string time_zone = extractTimeZoneNameFromColumn(*block.getByPosition(arguments[time_zone_arg_num]).column);
+        if (time_zone.empty())
+            throw Exception("Provided time zone must be non-empty and be a valid time zone", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return DateLUT::instance(time_zone);
+    }
    else
    {
        if (arguments.empty())
--- a/src/Functions/gcd.cpp
+++ b/src/Functions/gcd.cpp
@ -20,7 +20,7 @@ struct GCDImpl
    template <typename Result = ResultType>
    static inline Result apply([[maybe_unused]] A a, [[maybe_unused]] B b)
    {
-        if constexpr (is_big_int_v<A> || is_big_int_v<B>)
+        if constexpr (is_big_int_v<A> || is_big_int_v<B> || is_big_int_v<Result>)
            throw Exception("GCD is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
        else
        {
--- a/src/Functions/lcm.cpp
+++ b/src/Functions/lcm.cpp
@ -40,14 +40,14 @@ struct LCMImpl
    static const constexpr bool allow_fixed_string = false;

    template <typename Result = ResultType>
-    static inline std::enable_if_t<is_big_int_v<A> || is_big_int_v<B>, Result>
+    static inline std::enable_if_t<is_big_int_v<A> || is_big_int_v<B> || is_big_int_v<Result>, Result>
    apply([[maybe_unused]] A a, [[maybe_unused]] B b)
    {
        throw Exception("LCM is not implemented for big integers", ErrorCodes::NOT_IMPLEMENTED);
    }

    template <typename Result = ResultType>
-    static inline std::enable_if_t<!is_big_int_v<A> && !is_big_int_v<B>, Result>
+    static inline std::enable_if_t<!is_big_int_v<A> && !is_big_int_v<B> && !is_big_int_v<Result>, Result>
    apply([[maybe_unused]] A a, [[maybe_unused]] B b)
    {
        throwIfDivisionLeadsToFPE(typename NumberTraits::ToInteger<A>::Type(a), typename NumberTraits::ToInteger<B>::Type(b));
--- a/src/Functions/roundToExp2.cpp
+++ b/src/Functions/roundToExp2.cpp
@ -6,6 +6,11 @@
 namespace DB
 {

+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 template <typename T>
 inline std::enable_if_t<std::is_integral_v<T> && (sizeof(T) <= sizeof(UInt32)), T>
 roundDownToPowerOfTwo(T x)
@ -48,10 +53,9 @@ roundDownToPowerOfTwo(T x)

 template <typename T>
 inline std::enable_if_t<is_big_int_v<T>, T>
-roundDownToPowerOfTwo(T x)
+roundDownToPowerOfTwo(T)
 {
-    // extention from boost/multiprecision/number.hpp
-    return T(1) << msb(x);
+    throw Exception("roundToExp2() for big integers is not implemented", ErrorCodes::NOT_IMPLEMENTED);
 }

 /** For integer data types:
--- a/src/IO/WriteHelpers.h
+++ b/src/IO/WriteHelpers.h
@ -831,6 +831,7 @@ template <> inline void writeText<bool>(const bool & x, WriteBuffer & buf) { wri
 inline void writeText(const char * x, WriteBuffer & buf) { writeEscapedString(x, strlen(x), buf); }
 inline void writeText(const char * x, size_t size, WriteBuffer & buf) { writeEscapedString(x, size, buf); }

+inline void writeText(const DayNum & x, WriteBuffer & buf) { writeDateText(LocalDate(x), buf); }
 inline void writeText(const LocalDate & x, WriteBuffer & buf) { writeDateText(x, buf); }
 inline void writeText(const LocalDateTime & x, WriteBuffer & buf) { writeDateTimeText(x, buf); }
 inline void writeText(const UUID & x, WriteBuffer & buf) { writeUUIDText(x, buf); }
--- a/src/IO/readDecimalText.h
+++ b/src/IO/readDecimalText.h
@ -160,7 +160,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
            " Expected to read decimal with scale {} and precision {}";

        if constexpr (is_big_int_v<typename T::NativeType>)
-            throw Exception(fmt::format(pattern, digits, x.value.str(), exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+            throw Exception(fmt::format(pattern, digits, bigintToString(x.value), exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
        else
            throw Exception(fmt::format(pattern, digits, x, exponent, scale, precision), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
    }
@ -180,7 +180,7 @@ inline void readDecimalText(ReadBuffer & buf, T & x, uint32_t precision, uint32_
        {
            /// Too many digits after point. Just cut off excessive digits.
            auto divisor = intExp10OfSize<typename T::NativeType>(divisor_exp);
-            assert(divisor > T(0)); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
+            assert(divisor > 0); /// This is for Clang Static Analyzer. It is not smart enough to infer it automatically.
            x.value /= divisor;
            scale = 0;
            return;
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@ -362,7 +362,9 @@ AggregatedDataVariants::Type Aggregator::chooseAggregationMethod()
            return AggregatedDataVariants::Type::key64;
        if (size_of_field == 16)
            return AggregatedDataVariants::Type::keys128;
-        throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
+        if (size_of_field == 32)
+            return AggregatedDataVariants::Type::keys256;
+        throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
    }

    /// If all keys fits in N bits, will use hash table with all keys packed (placed contiguously) to single N-bit key.
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@ -221,7 +221,9 @@ HashJoin::Type HashJoin::chooseMethod(const ColumnRawPtrs & key_columns, Sizes &
            return Type::key64;
        if (size_of_field == 16)
            return Type::keys128;
-        throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
+        if (size_of_field == 32)
+            return Type::keys256;
+        throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
    }

    /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@ -553,6 +553,11 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
        return res;
    }

+    if (options.to_stage == QueryProcessingStage::Enum::WithMergeableStateAfterAggregation)
+    {
+        return analysis_result.before_order_and_select->getSampleBlock();
+    }
+
    return analysis_result.final_projection->getSampleBlock();
 }

@ -740,6 +745,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
    auto & expressions = analysis_result;
    const auto & subqueries_for_sets = query_analyzer->getSubqueriesForSets();
    bool intermediate_stage = false;
+    bool to_aggregation_stage = false;
+    bool from_aggregation_stage = false;

    if (options.only_analyze)
    {
@ -788,6 +795,14 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
            options.to_stage == QueryProcessingStage::WithMergeableState)
            intermediate_stage = true;

+        /// Support optimize_distributed_group_by_sharding_key
+        /// Is running on the initiating server during distributed processing?
+        if (from_stage == QueryProcessingStage::WithMergeableStateAfterAggregation)
+            from_aggregation_stage = true;
+        /// Is running on remote servers during distributed processing?
+        if (options.to_stage == QueryProcessingStage::WithMergeableStateAfterAggregation)
+            to_aggregation_stage = true;
+
        if (storage && expressions.filter_info && expressions.prewhere_info)
            throw Exception("PREWHERE is not supported if the table is filtered by row-level security expression", ErrorCodes::ILLEGAL_PREWHERE);

@ -848,6 +863,12 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
            if (expressions.need_aggregate)
                executeMergeAggregated(query_plan, aggregate_overflow_row, aggregate_final);
        }
+        if (from_aggregation_stage)
+        {
+            if (intermediate_stage || expressions.first_stage || expressions.second_stage)
+                throw Exception("Query with after aggregation stage cannot have any other stages", ErrorCodes::LOGICAL_ERROR);
+        }
+

        if (expressions.first_stage)
        {
@ -939,9 +960,13 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
                executeSubqueriesInSetsAndJoins(query_plan, subqueries_for_sets);
        }

-        if (expressions.second_stage)
+        if (expressions.second_stage || from_aggregation_stage)
        {
-            if (expressions.need_aggregate)
+            if (from_aggregation_stage)
+            {
+                /// No need to aggregate anything, since this was done on remote shards.
+            }
+            else if (expressions.need_aggregate)
            {
                /// If you need to combine aggregated results from multiple servers
                if (!expressions.first_stage)
@ -994,7 +1019,8 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
              * limiting the number of rows in each up to `offset + limit`.
              */
            bool has_prelimit = false;
-            if (query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
+            if (!to_aggregation_stage &&
+                query.limitLength() && !query.limit_with_ties && !hasWithTotalsInAnySubqueryInFromClause(query) &&
                !query.arrayJoinExpressionList() && !query.distinct && !expressions.hasLimitBy() && !settings.extremes)
            {
                executePreLimit(query_plan, false);
@ -1023,18 +1049,23 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
                has_prelimit = true;
            }

-            /** We must do projection after DISTINCT because projection may remove some columns.
-              */
-            executeProjection(query_plan, expressions.final_projection);
+            /// Projection not be done on the shards, since then initiator will not find column in blocks.
+            /// (significant only for WithMergeableStateAfterAggregation).
+            if (!to_aggregation_stage)
+            {
+                /// We must do projection after DISTINCT because projection may remove some columns.
+                executeProjection(query_plan, expressions.final_projection);
+            }

-            /** Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok.
-              */
+            /// Extremes are calculated before LIMIT, but after LIMIT BY. This is Ok.
            executeExtremes(query_plan);

-            if (!has_prelimit)  /// Limit is no longer needed if there is prelimit.
+            /// Limit is no longer needed if there is prelimit.
+            if (!to_aggregation_stage && !has_prelimit)
                executeLimit(query_plan);

-            executeOffset(query_plan);
+            if (!to_aggregation_stage)
+                executeOffset(query_plan);
        }
    }

--- a/src/Interpreters/SetVariants.cpp
+++ b/src/Interpreters/SetVariants.cpp
@ -110,9 +110,8 @@ typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::choose
            size_t size_of_field = nested_key_columns[0]->sizeOfValueIfFixed();
            if ((size_of_field == 1) || (size_of_field == 2) || (size_of_field == 4) || (size_of_field == 8))
                return Type::nullable_keys128;
-            else
-                throw Exception{"Logical error: numeric column has sizeOfField not in 1, 2, 4, 8.",
-                    ErrorCodes::LOGICAL_ERROR};
+
+            /// Pass to more generic method
        }

        if (all_fixed)
@ -145,7 +144,9 @@ typename SetVariantsTemplate<Variant>::Type SetVariantsTemplate<Variant>::choose
            return Type::key64;
        if (size_of_field == 16)
            return Type::keys128;
-        throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16.", ErrorCodes::LOGICAL_ERROR);
+        if (size_of_field == 32)
+            return Type::keys256;
+        throw Exception("Logical error: numeric column has sizeOfField not in 1, 2, 4, 8, 16, 32.", ErrorCodes::LOGICAL_ERROR);
    }

    /// If the keys fit in N bits, we will use a hash table for N-bit-packed keys
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@ -402,6 +402,11 @@ void PipelineExecutor::execute(size_t num_threads)
        for (auto & node : graph->nodes)
            if (node->exception)
                std::rethrow_exception(node->exception);
+
+        /// Exception which happened in executing thread, but not at processor.
+        for (auto & executor_context : executor_contexts)
+            if (executor_context->exception)
+                std::rethrow_exception(executor_context->exception);
    }
    catch (...)
    {
@ -469,16 +474,7 @@ void PipelineExecutor::wakeUpExecutor(size_t thread_num)

 void PipelineExecutor::executeSingleThread(size_t thread_num, size_t num_threads)
 {
-    try
-    {
-        executeStepImpl(thread_num, num_threads);
-    }
-    catch (...)
-    {
-        /// In case of exception from executor itself, stop other threads.
-        finish();
-        throw;
-    }
+    executeStepImpl(thread_num, num_threads);

 #ifndef NDEBUG
    auto & context = executor_contexts[thread_num];
@ -735,7 +731,16 @@ void PipelineExecutor::executeImpl(size_t num_threads)
                            CurrentThread::detachQueryIfNotDetached();
                );

-                executeSingleThread(thread_num, num_threads);
+                try
+                {
+                    executeSingleThread(thread_num, num_threads);
+                }
+                catch (...)
+                {
+                    /// In case of exception from executor itself, stop other threads.
+                    finish();
+                    executor_contexts[thread_num]->exception = std::current_exception();
+                }
            });
        }

--- a/src/Processors/Executors/PipelineExecutor.h
+++ b/src/Processors/Executors/PipelineExecutor.h
@ -97,6 +97,9 @@ private:
        /// Currently processing node.
        ExecutingGraph::Node * node = nullptr;

+        /// Exception from executing thread itself.
+        std::exception_ptr exception;
+
 #ifndef NDEBUG
        /// Time for different processing stages.
        UInt64 total_time_ns = 0;
--- a/src/Processors/Port.h
+++ b/src/Processors/Port.h
@ -1,5 +1,6 @@
 #pragma once

+#include <atomic>
 #include <memory>
 #include <vector>
 #include <variant>
--- a/src/Server/MySQLHandler.cpp
+++ b/src/Server/MySQLHandler.cpp
@ -394,6 +394,7 @@ static bool isFederatedServerSetupSetCommand(const String & query)
        "|(^(SET FOREIGN_KEY_CHECKS(.*)))"
        "|(^(SET AUTOCOMMIT(.*)))"
        "|(^(SET sql_mode(.*)))"
+        "|(^(SET @@(.*)))"
        "|(^(SET SESSION TRANSACTION ISOLATION LEVEL(.*)))"
        , std::regex::icase};
    return 1 == std::regex_match(query, expr);
--- a/src/Storages/MergeTree/AllMergeSelector.cpp
+++ b/src/Storages/MergeTree/AllMergeSelector.cpp
@ -6,14 +6,14 @@
 namespace DB
 {

-AllMergeSelector::PartsInPartition AllMergeSelector::select(
-    const Partitions & partitions,
+AllMergeSelector::PartsRange AllMergeSelector::select(
+    const PartsRanges & parts_ranges,
    const size_t /*max_total_size_to_merge*/)
 {
    size_t min_partition_size = 0;
-    Partitions::const_iterator best_partition;
+    PartsRanges::const_iterator best_partition;

-    for (auto it = partitions.begin(); it != partitions.end(); ++it)
+    for (auto it = parts_ranges.begin(); it != parts_ranges.end(); ++it)
    {
        if (it->size() <= 1)
            continue;
--- a/src/Storages/MergeTree/AllMergeSelector.h
+++ b/src/Storages/MergeTree/AllMergeSelector.h
@ -11,8 +11,8 @@ class AllMergeSelector : public IMergeSelector
 {
 public:
    /// Parameter max_total_size_to_merge is ignored.
-    PartsInPartition select(
-        const Partitions & partitions,
+    PartsRange select(
+        const PartsRanges & parts_ranges,
        const size_t max_total_size_to_merge) override;
 };

--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@ -1042,6 +1042,37 @@ void IMergeTreeDataPart::accumulateColumnSizes(ColumnToSize & column_to_size) co
        column_to_size[column_name] = size.data_compressed;
 }

+
+bool IMergeTreeDataPart::checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const
+{
+    if (!metadata_snapshot->hasAnyTTL())
+        return false;
+
+    if (metadata_snapshot->hasRowsTTL())
+    {
+        if (isEmpty()) /// All rows were finally deleted and we don't store TTL
+            return true;
+        else if (ttl_infos.table_ttl.min == 0)
+            return false;
+    }
+
+    for (const auto & [column, desc] : metadata_snapshot->getColumnTTLs())
+    {
+        /// Part has this column, but we don't calculated TTL for it
+        if (!ttl_infos.columns_ttl.count(column) && getColumns().contains(column))
+            return false;
+    }
+
+    for (const auto & move_desc : metadata_snapshot->getMoveTTLs())
+    {
+        /// Move TTL is not calculated
+        if (!ttl_infos.moves_ttl.count(move_desc.result_column))
+            return false;
+    }
+
+    return true;
+}
+
 bool isCompactPart(const MergeTreeDataPartPtr & data_part)
 {
    return (data_part && data_part->getType() == MergeTreeDataPartType::COMPACT);
--- a/src/Storages/MergeTree/IMergeTreeDataPart.h
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.h
@ -344,6 +344,11 @@ public:

    static inline constexpr auto DELETE_ON_DESTROY_MARKER_FILE_NAME = "delete-on-destroy.txt";

+    /// Checks that all TTLs (table min/max, column ttls, so on) for part
+    /// calculated. Part without calculated TTL may exist if TTL was added after
+    /// part creation (using alter query with materialize_ttl setting).
+    bool checkAllTTLCalculated(const StorageMetadataPtr & metadata_snapshot) const;
+
 protected:

    /// Total size of all columns, calculated once in calcuateColumnSizesOnDisk
--- a/src/Storages/MergeTree/LevelMergeSelector.cpp
+++ b/src/Storages/MergeTree/LevelMergeSelector.cpp
@ -14,7 +14,7 @@ namespace
  */
 struct Estimator
 {
-    using Iterator = LevelMergeSelector::PartsInPartition::const_iterator;
+    using Iterator = LevelMergeSelector::PartsRange::const_iterator;

    void consider(Iterator begin, Iterator end, size_t sum_size)
    {
@ -28,9 +28,9 @@ struct Estimator
        }
    }

-    LevelMergeSelector::PartsInPartition getBest() const
+    LevelMergeSelector::PartsRange getBest() const
    {
-        return LevelMergeSelector::PartsInPartition(best_begin, best_end);
+        return LevelMergeSelector::PartsRange(best_begin, best_end);
    }

    double min_score = 0;
@ -40,7 +40,7 @@ struct Estimator


 void selectWithinPartition(
-    const LevelMergeSelector::PartsInPartition & parts,
+    const LevelMergeSelector::PartsRange & parts,
    const size_t max_total_size_to_merge,
    Estimator & estimator,
    const LevelMergeSelector::Settings & settings)
@ -103,14 +103,14 @@ void selectWithinPartition(
 }


-LevelMergeSelector::PartsInPartition LevelMergeSelector::select(
-    const Partitions & partitions,
+LevelMergeSelector::PartsRange LevelMergeSelector::select(
+    const PartsRanges & parts_ranges,
    const size_t max_total_size_to_merge)
 {
    Estimator estimator;

-    for (const auto & partition : partitions)
-        selectWithinPartition(partition, max_total_size_to_merge, estimator, settings);
+    for (const auto & parts_range: parts_ranges)
+        selectWithinPartition(parts_range, max_total_size_to_merge, estimator, settings);

    return estimator.getBest();
 }
--- a/src/Storages/MergeTree/LevelMergeSelector.h
+++ b/src/Storages/MergeTree/LevelMergeSelector.h
@ -19,8 +19,8 @@ public:

    explicit LevelMergeSelector(const Settings & settings_) : settings(settings_) {}

-    PartsInPartition select(
-        const Partitions & partitions,
+    PartsRange select(
+        const PartsRanges & parts_ranges,
        const size_t max_total_size_to_merge) override;

 private:
--- a/src/Storages/MergeTree/MergeSelector.h
+++ b/src/Storages/MergeTree/MergeSelector.h
@ -48,16 +48,16 @@ public:
    };

    /// Parts are belong to partitions. Only parts within same partition could be merged.
-    using PartsInPartition = std::vector<Part>;
+    using PartsRange = std::vector<Part>;

    /// Parts are in some specific order. Parts could be merged only in contiguous ranges.
-    using Partitions = std::vector<PartsInPartition>;
+    using PartsRanges = std::vector<PartsRange>;

    /** Function could be called at any frequency and it must decide, should you do any merge at all.
      * If better not to do any merge, it returns empty result.
      */
-    virtual PartsInPartition select(
-        const Partitions & partitions,
+    virtual PartsRange select(
+        const PartsRanges & parts_ranges,
        const size_t max_total_size_to_merge) = 0;

    virtual ~IMergeSelector() = default;
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@ -227,13 +227,25 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(

    time_t current_time = std::time(nullptr);

-    IMergeSelector::Partitions partitions;
+    IMergeSelector::PartsRanges parts_ranges;

    const String * prev_partition_id = nullptr;
    /// Previous part only in boundaries of partition frame
    const MergeTreeData::DataPartPtr * prev_part = nullptr;
+
    for (const MergeTreeData::DataPartPtr & part : data_parts)
    {
+        const String & partition_id = part->info.partition_id;
+
+        if (!prev_partition_id || partition_id != *prev_partition_id)
+        {
+            if (parts_ranges.empty() || !parts_ranges.back().empty())
+                parts_ranges.emplace_back();
+            /// New partition frame.
+            prev_partition_id = &partition_id;
+            prev_part = nullptr;
+        }
+
        /// Check predicate only for first part in each partition.
        if (!prev_part)
        {
@ -244,15 +256,19 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
            if (!can_merge_callback(nullptr, part, nullptr))
                continue;
        }
-
-        const String & partition_id = part->info.partition_id;
-        if (!prev_partition_id || partition_id != *prev_partition_id || (prev_part && !can_merge_callback(*prev_part, part, nullptr)))
+        else
        {
-            if (partitions.empty() || !partitions.back().empty())
-                partitions.emplace_back();
-            /// New partition frame.
-            prev_partition_id = &partition_id;
-            prev_part = nullptr;
+            /// If we cannot merge with previous part we had to start new parts
+            /// interval (in the same partition)
+            if (!can_merge_callback(*prev_part, part, nullptr))
+            {
+                /// Starting new interval in the same partition
+                if (!parts_ranges.back().empty())
+                    parts_ranges.emplace_back();
+
+                /// Now we have no previous part, but it affects only logging
+                prev_part = nullptr;
+            }
        }

        IMergeSelector::Part part_info;
@ -263,7 +279,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
        part_info.min_ttl = part->ttl_infos.part_min_ttl;
        part_info.max_ttl = part->ttl_infos.part_max_ttl;

-        partitions.back().emplace_back(part_info);
+        parts_ranges.back().emplace_back(part_info);

        /// Check for consistency of data parts. If assertion is failed, it requires immediate investigation.
        if (prev_part && part->info.partition_id == (*prev_part)->info.partition_id
@ -275,7 +291,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
        prev_part = &part;
    }

-    IMergeSelector::PartsInPartition parts_to_merge;
+    IMergeSelector::PartsRange parts_to_merge;

    if (!ttl_merges_blocker.isCancelled())
    {
@ -284,7 +300,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
                current_time,
                data_settings->merge_with_ttl_timeout,
                data_settings->ttl_only_drop_parts);
-        parts_to_merge = merge_selector.select(partitions, max_total_size_to_merge);
+        parts_to_merge = merge_selector.select(parts_ranges, max_total_size_to_merge);
    }

    if (parts_to_merge.empty())
@ -294,7 +310,7 @@ bool MergeTreeDataMergerMutator::selectPartsToMerge(
            merge_settings.base = 1;

        parts_to_merge = SimpleMergeSelector(merge_settings)
-                            .select(partitions, max_total_size_to_merge);
+                            .select(parts_ranges, max_total_size_to_merge);

        /// Do not allow to "merge" part with itself for regular merges, unless it is a TTL-merge where it is ok to remove some values with expired ttl
        if (parts_to_merge.size() == 1)
@ -635,8 +651,17 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
    new_data_part->is_temp = true;

    bool need_remove_expired_values = false;
+    bool force_ttl = false;
    for (const auto & part : parts)
+    {
        new_data_part->ttl_infos.update(part->ttl_infos);
+        if (metadata_snapshot->hasAnyTTL() && !part->checkAllTTLCalculated(metadata_snapshot))
+        {
+            LOG_INFO(log, "Some TTL values were not calculated for part {}. Will calculate them forcefully during merge.", part->name);
+            need_remove_expired_values = true;
+            force_ttl = true;
+        }
+    }

    const auto & part_min_ttl = new_data_part->ttl_infos.part_min_ttl;
    if (part_min_ttl && part_min_ttl <= time_of_merge)
@ -809,7 +834,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
        merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, Names());

    if (need_remove_expired_values)
-        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, false);
+        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl);


    if (metadata_snapshot->hasSecondaryIndices())
--- a/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartTTLInfo.h
@ -38,7 +38,7 @@ struct MergeTreeDataPartTTLInfos
    MergeTreeDataPartTTLInfo table_ttl;

    /// `part_min_ttl` and `part_max_ttl` are TTLs which are used for selecting parts
-    /// to merge in order to remove expired rows.    
+    /// to merge in order to remove expired rows.
    time_t part_min_ttl = 0;
    time_t part_max_ttl = 0;

@ -58,7 +58,7 @@ struct MergeTreeDataPartTTLInfos
            part_max_ttl = time_max;
    }

-    bool empty()
+    bool empty() const
    {
        return !part_min_ttl && moves_ttl.empty();
    }
--- a/src/Storages/MergeTree/SimpleMergeSelector.cpp
+++ b/src/Storages/MergeTree/SimpleMergeSelector.cpp
@ -15,7 +15,7 @@ namespace
  */
 struct Estimator
 {
-    using Iterator = SimpleMergeSelector::PartsInPartition::const_iterator;
+    using Iterator = SimpleMergeSelector::PartsRange::const_iterator;

    void consider(Iterator begin, Iterator end, size_t sum_size, size_t size_prev_at_left, const SimpleMergeSelector::Settings & settings)
    {
@ -42,9 +42,9 @@ struct Estimator
        }
    }

-    SimpleMergeSelector::PartsInPartition getBest() const
+    SimpleMergeSelector::PartsRange getBest() const
    {
-        return SimpleMergeSelector::PartsInPartition(best_begin, best_end);
+        return SimpleMergeSelector::PartsRange(best_begin, best_end);
    }

    static double score(double count, double sum_size, double sum_size_fixed_cost)
@ -137,7 +137,7 @@ bool allow(


 void selectWithinPartition(
-    const SimpleMergeSelector::PartsInPartition & parts,
+    const SimpleMergeSelector::PartsRange & parts,
    const size_t max_total_size_to_merge,
    Estimator & estimator,
    const SimpleMergeSelector::Settings & settings)
@ -185,14 +185,14 @@ void selectWithinPartition(
 }


-SimpleMergeSelector::PartsInPartition SimpleMergeSelector::select(
-    const Partitions & partitions,
+SimpleMergeSelector::PartsRange SimpleMergeSelector::select(
+    const PartsRanges & parts_ranges,
    const size_t max_total_size_to_merge)
 {
    Estimator estimator;

-    for (const auto & partition : partitions)
-        selectWithinPartition(partition, max_total_size_to_merge, estimator, settings);
+    for (const auto & part_range : parts_ranges)
+        selectWithinPartition(part_range, max_total_size_to_merge, estimator, settings);

    return estimator.getBest();
 }
--- a/src/Storages/MergeTree/SimpleMergeSelector.h
+++ b/src/Storages/MergeTree/SimpleMergeSelector.h
@ -73,8 +73,8 @@ public:

    explicit SimpleMergeSelector(const Settings & settings_) : settings(settings_) {}

-    PartsInPartition select(
-        const Partitions & partitions,
+    PartsRange select(
+        const PartsRanges & parts_ranges,
        const size_t max_total_size_to_merge) override;

 private:
--- a/src/Storages/MergeTree/TTLMergeSelector.cpp
+++ b/src/Storages/MergeTree/TTLMergeSelector.cpp
@ -15,18 +15,18 @@ const String & getPartitionIdForPart(const TTLMergeSelector::Part & part_info)
 }


-IMergeSelector::PartsInPartition TTLMergeSelector::select(
-    const Partitions & partitions,
+IMergeSelector::PartsRange TTLMergeSelector::select(
+    const PartsRanges & parts_ranges,
    const size_t max_total_size_to_merge)
 {
-    using Iterator = IMergeSelector::PartsInPartition::const_iterator;
+    using Iterator = IMergeSelector::PartsRange::const_iterator;
    Iterator best_begin;
    ssize_t partition_to_merge_index = -1;
    time_t partition_to_merge_min_ttl = 0;

-    for (size_t i = 0; i < partitions.size(); ++i)
+    for (size_t i = 0; i < parts_ranges.size(); ++i)
    {
-        const auto & mergeable_parts_in_partition = partitions[i];
+        const auto & mergeable_parts_in_partition = parts_ranges[i];
        if (mergeable_parts_in_partition.empty())
            continue;

@ -51,7 +51,7 @@ IMergeSelector::PartsInPartition TTLMergeSelector::select(
    if (partition_to_merge_index == -1 || partition_to_merge_min_ttl > current_time)
        return {};

-    const auto & best_partition = partitions[partition_to_merge_index];
+    const auto & best_partition = parts_ranges[partition_to_merge_index];
    Iterator best_end = best_begin + 1;
    size_t total_size = 0;

@ -88,7 +88,7 @@ IMergeSelector::PartsInPartition TTLMergeSelector::select(
    const auto & best_partition_id = getPartitionIdForPart(best_partition.front());
    merge_due_times[best_partition_id] = current_time + merge_cooldown_time;

-    return PartsInPartition(best_begin, best_end);
+    return PartsRange(best_begin, best_end);
 }

 }
--- a/src/Storages/MergeTree/TTLMergeSelector.h
+++ b/src/Storages/MergeTree/TTLMergeSelector.h
@ -10,7 +10,7 @@ namespace DB
 {

 /** Merge selector, which is used to remove values with expired ttl.
-  * It selects parts to merge by greedy algorithm: 
+  * It selects parts to merge by greedy algorithm:
  *  1. Finds part with the most earliest expired ttl and includes it to result.
  *  2. Tries to find the longest range of parts with expired ttl, that includes part from step 1.
  * Finally, merge selector updates TTL merge timer for the selected partition
@ -26,8 +26,8 @@ public:
          merge_cooldown_time(merge_cooldown_time_),
          only_drop_parts(only_drop_parts_) {}

-    PartsInPartition select(
-        const Partitions & partitions,
+    PartsRange select(
+        const PartsRanges & parts_ranges,
        const size_t max_total_size_to_merge) override;

 private:
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@ -56,12 +56,15 @@

 #include <memory>
 #include <filesystem>
+#include <optional>


 namespace
 {
 const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_HAS_SHARDING_KEY = 1;
 const UInt64 FORCE_OPTIMIZE_SKIP_UNUSED_SHARDS_ALWAYS           = 2;
+
+const UInt64 DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION = 2;
 }

 namespace DB
@ -242,22 +245,82 @@ void replaceConstantExpressions(
    visitor.visit(node);
 }

-QueryProcessingStage::Enum getQueryProcessingStageImpl(const Context & context, QueryProcessingStage::Enum to_stage, const ClusterPtr & cluster)
+/// Returns one of the following:
+/// - QueryProcessingStage::Complete
+/// - QueryProcessingStage::WithMergeableStateAfterAggregation
+/// - none (in this case regular WithMergeableState should be used)
+std::optional<QueryProcessingStage::Enum> getOptimizedQueryProcessingStage(const ASTPtr & query_ptr, bool extremes, const Block & sharding_key_block)
 {
-    const Settings & settings = context.getSettingsRef();
+    const auto & select = query_ptr->as<ASTSelectQuery &>();

+    auto sharding_block_has = [&](const auto & exprs, size_t limit = SIZE_MAX) -> bool
+    {
+        size_t i = 0;
+        for (auto & expr : exprs)
+        {
+            ++i;
+            if (i > limit)
+                break;
+
+            auto id = expr->template as<ASTIdentifier>();
+            if (!id)
+                return false;
+            /// TODO: if GROUP BY contains multiIf()/if() it should contain only columns from sharding_key
+            if (!sharding_key_block.has(id->name))
+                return false;
+        }
+        return true;
+    };
+
+    // GROUP BY qualifiers
+    // - TODO: WITH TOTALS can be implemented
+    // - TODO: WITH ROLLUP can be implemented (I guess)
+    if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
+        return {};
+
+    // TODO: extremes support can be implemented
+    if (extremes)
+        return {};
+
+    // DISTINCT
+    if (select.distinct)
+    {
+        if (!sharding_block_has(select.select()->children))
+            return {};
+    }
+
+    // GROUP BY
+    const ASTPtr group_by = select.groupBy();
+    if (!group_by)
+    {
+        if (!select.distinct)
+            return {};
+    }
+    else
+    {
+        if (!sharding_block_has(group_by->children, 1))
+            return {};
+    }
+
+    // ORDER BY
+    const ASTPtr order_by = select.orderBy();
+    if (order_by)
+        return QueryProcessingStage::WithMergeableStateAfterAggregation;
+
+    // LIMIT BY
+    // LIMIT
+    if (select.limitBy() || select.limitLength())
+        return QueryProcessingStage::WithMergeableStateAfterAggregation;
+
+    // Only simple SELECT FROM GROUP BY sharding_key can use Complete state.
+    return QueryProcessingStage::Complete;
+}
+
+size_t getClusterQueriedNodes(const Settings & settings, const ClusterPtr & cluster)
+{
    size_t num_local_shards = cluster->getLocalShardCount();
    size_t num_remote_shards = cluster->getRemoteShardCount();
-    size_t result_size = (num_remote_shards * settings.max_parallel_replicas) + num_local_shards;
-
-    if (settings.distributed_group_by_no_merge)
-        return QueryProcessingStage::Complete;
-    /// Nested distributed query cannot return Complete stage,
-    /// since the parent query need to aggregate the results after.
-    if (to_stage == QueryProcessingStage::WithMergeableState)
-        return QueryProcessingStage::WithMergeableState;
-    return result_size == 1 ? QueryProcessingStage::Complete
-                            : QueryProcessingStage::WithMergeableState;
+    return (num_remote_shards * settings.max_parallel_replicas) + num_local_shards;
 }

 }
@ -374,87 +437,23 @@ StoragePtr StorageDistributed::createWithOwnCluster(
    return res;
 }

-
-bool StorageDistributed::canForceGroupByNoMerge(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
-{
-    const auto & settings = context.getSettingsRef();
-    std::string reason;
-
-    if (settings.distributed_group_by_no_merge)
-        return true;
-    if (!settings.optimize_distributed_group_by_sharding_key)
-        return false;
-
-    /// Distributed-over-Distributed (see getQueryProcessingStageImpl())
-    if (to_stage == QueryProcessingStage::WithMergeableState)
-        return false;
-    if (!settings.optimize_skip_unused_shards)
-        return false;
-    if (!has_sharding_key)
-        return false;
-
-    const auto & select = query_ptr->as<ASTSelectQuery &>();
-
-    if (select.group_by_with_totals || select.group_by_with_rollup || select.group_by_with_cube)
-        return false;
-
-    // TODO: The following can be optimized too (but with some caveats, will be addressed later):
-    // - ORDER BY
-    // - LIMIT BY
-    // - LIMIT
-    if (select.orderBy())
-        return false;
-    if (select.limitBy() || select.limitLength())
-        return false;
-
-    if (select.distinct)
-    {
-        for (auto & expr : select.select()->children)
-        {
-            const auto * id = expr->as<ASTIdentifier>();
-            if (!id)
-                return false;
-            if (!sharding_key_expr->getSampleBlock().has(id->name))
-                return false;
-        }
-
-        reason = "DISTINCT " + backQuote(serializeAST(*select.select(), true));
-    }
-
-    const ASTPtr group_by = select.groupBy();
-    if (!group_by)
-    {
-        if (!select.distinct)
-            return false;
-    }
-    else
-    {
-        // injective functions are optimized out in optimizeGroupBy()
-        // hence all we need to check is that column in GROUP BY matches sharding expression
-        auto & group_exprs = group_by->children;
-        if (group_exprs.empty())
-            throw Exception("No ASTExpressionList in GROUP BY", ErrorCodes::LOGICAL_ERROR);
-
-        const auto * id = group_exprs[0]->as<ASTIdentifier>();
-        if (!id)
-            return false;
-        if (!sharding_key_expr->getSampleBlock().has(id->name))
-            return false;
-
-        reason = "GROUP BY " + backQuote(serializeAST(*group_by, true));
-    }
-
-    LOG_DEBUG(log, "Force distributed_group_by_no_merge for {} (injective)", reason);
-    return true;
-}
-
 QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Context &context, QueryProcessingStage::Enum to_stage, const ASTPtr & query_ptr) const
 {
    const auto & settings = context.getSettingsRef();
    auto metadata_snapshot = getInMemoryMetadataPtr();

-    if (canForceGroupByNoMerge(context, to_stage, query_ptr))
-        return QueryProcessingStage::Complete;
+    if (settings.distributed_group_by_no_merge)
+    {
+        if (settings.distributed_group_by_no_merge == DISTRIBUTED_GROUP_BY_NO_MERGE_AFTER_AGGREGATION)
+            return QueryProcessingStage::WithMergeableStateAfterAggregation;
+        else
+            return QueryProcessingStage::Complete;
+    }
+
+    /// Nested distributed query cannot return Complete stage,
+    /// since the parent query need to aggregate the results after.
+    if (to_stage == QueryProcessingStage::WithMergeableState)
+        return QueryProcessingStage::WithMergeableState;

    ClusterPtr cluster = getCluster();
    if (settings.optimize_skip_unused_shards)
@ -464,7 +463,26 @@ QueryProcessingStage::Enum StorageDistributed::getQueryProcessingStage(const Con
            cluster = optimized_cluster;
    }

-    return getQueryProcessingStageImpl(context, to_stage, cluster);
+    /// If there is only one node, the query can be fully processed by the
+    /// shard, initiator will work as a proxy only.
+    if (getClusterQueriedNodes(settings, cluster) == 1)
+        return QueryProcessingStage::Complete;
+
+    if (settings.optimize_skip_unused_shards &&
+        settings.optimize_distributed_group_by_sharding_key &&
+        has_sharding_key &&
+        sharding_key_is_deterministic)
+    {
+        Block sharding_key_block = sharding_key_expr->getSampleBlock();
+        auto stage = getOptimizedQueryProcessingStage(query_ptr, settings.extremes, sharding_key_block);
+        if (stage)
+        {
+            LOG_DEBUG(log, "Force processing stage to {}", QueryProcessingStage::toString(*stage));
+            return *stage;
+        }
+    }
+
+    return QueryProcessingStage::WithMergeableState;
 }

 Pipe StorageDistributed::read(
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@ -66,8 +66,6 @@ public:

    bool isRemote() const override { return true; }

-    /// Return true if distributed_group_by_no_merge may be applied.
-    bool canForceGroupByNoMerge(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const;
    QueryProcessingStage::Enum getQueryProcessingStage(const Context &, QueryProcessingStage::Enum to_stage, const ASTPtr &) const override;

    Pipe read(
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@ -452,6 +452,8 @@ Block StorageMerge::getQueryHeader(
        }
        case QueryProcessingStage::WithMergeableState:
        case QueryProcessingStage::Complete:
+        case QueryProcessingStage::WithMergeableStateAfterAggregation:
+        case QueryProcessingStage::MAX:
        {
            auto query = query_info.query->clone();
            removeJoin(*query->as<ASTSelectQuery>());
--- a/src/Storages/System/StorageSystemEvents.cpp
+++ b/src/Storages/System/StorageSystemEvents.cpp
@ -1,4 +1,5 @@
 #include <Common/ProfileEvents.h>
+#include <Interpreters/Context.h>
 #include <DataTypes/DataTypeString.h>
 #include <DataTypes/DataTypesNumber.h>
 #include <Storages/System/StorageSystemEvents.h>
@ -15,13 +16,13 @@ NamesAndTypesList StorageSystemEvents::getNamesAndTypes()
    };
 }

-void StorageSystemEvents::fillData(MutableColumns & res_columns, const Context &, const SelectQueryInfo &) const
+void StorageSystemEvents::fillData(MutableColumns & res_columns, const Context & context, const SelectQueryInfo &) const
 {
    for (size_t i = 0, end = ProfileEvents::end(); i < end; ++i)
    {
        UInt64 value = ProfileEvents::global_counters[i];

-        if (0 != value)
+        if (0 != value || context.getSettingsRef().system_events_show_zero_values)
        {
            res_columns[0]->insert(ProfileEvents::getName(ProfileEvents::Event(i)));
            res_columns[1]->insert(value);
--- a/src/Storages/tests/merge_selector.cpp
+++ b/src/Storages/tests/merge_selector.cpp
@ -14,8 +14,8 @@ int main(int, char **)
 {
    using namespace DB;

-    IMergeSelector::Partitions partitions(1);
-    IMergeSelector::PartsInPartition & parts = partitions.back();
+    IMergeSelector::PartsRanges partitions(1);
+    IMergeSelector::PartsRange & parts = partitions.back();

    SimpleMergeSelector::Settings settings;
 //    settings.base = 2;
@ -53,7 +53,7 @@ int main(int, char **)

    while (parts.size() > 1)
    {
-        IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 0);
+        IMergeSelector::PartsRange selected_parts = selector.select(partitions, 0);

        if (selected_parts.empty())
        {
--- a/src/Storages/tests/merge_selector2.cpp
+++ b/src/Storages/tests/merge_selector2.cpp
@ -19,8 +19,8 @@ int main(int, char **)
 {
    using namespace DB;

-    IMergeSelector::Partitions partitions(1);
-    IMergeSelector::PartsInPartition & parts = partitions.back();
+    IMergeSelector::PartsRanges partitions(1);
+    IMergeSelector::PartsRange & parts = partitions.back();

 /*    SimpleMergeSelector::Settings settings;
    SimpleMergeSelector selector(settings);*/
@ -52,7 +52,7 @@ int main(int, char **)

    while (parts.size() > 1)
    {
-        IMergeSelector::PartsInPartition selected_parts = selector.select(partitions, 100ULL * 1024 * 1024 * 1024);
+        IMergeSelector::PartsRange selected_parts = selector.select(partitions, 100ULL * 1024 * 1024 * 1024);

        if (selected_parts.empty())
        {
--- a/tests/integration/CMakeLists.txt
+++ b/tests/integration/CMakeLists.txt
@ -18,7 +18,7 @@ if(MAKE_STATIC_LIBRARIES AND DOCKER_CMD)
    if(NOT INTEGRATION_USE_RUNNER AND DOCKER_COMPOSE_CMD AND PYTEST_CMD)
        # To run one test with debug:
        # cmake . -DPYTEST_OPT="-ss;test_cluster_copier"
-        add_test(NAME integration-pytest WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/programs/server/" ${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT})
+        add_test(NAME integration-pytest WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} COMMAND env ${TEST_USE_BINARIES} "CLICKHOUSE_TESTS_BASE_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/programs/server/" "CLICKHOUSE_TESTS_CONFIG_DIR=${ClickHouse_SOURCE_DIR}/tests/config/" ${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT})
        message(STATUS "Using tests in docker DOCKER=${DOCKER_CMD}; DOCKER_COMPOSE=${DOCKER_COMPOSE_CMD}; PYTEST=${PYTEST_STARTER} ${PYTEST_CMD} ${PYTEST_OPT}")
    endif()
 endif()
--- a/tests/integration/helpers/0_common_enable_dictionaries.xml
+++ b/tests/integration/helpers/0_common_enable_dictionaries.xml
@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<yandex>
+    <dictionaries_config>/etc/clickhouse-server/dictionaries/*.xml</dictionaries_config>
+</yandex>
--- a/tests/integration/helpers/client.py
+++ b/tests/integration/helpers/client.py
@ -71,7 +71,7 @@ class CommandRequest:
        self.stderr_file = tempfile.TemporaryFile()
        self.ignore_error = ignore_error

-        #print " ".join(command)
+        # print " ".join(command)

        # we suppress stderror on client becase sometimes thread sanitizer
        # can print some debug information there
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@ -1,25 +1,25 @@
 import base64
+import cassandra.cluster
 import distutils.dir_util
+import docker
 import errno
+import httplib
+import logging
 import os
 import os.path as p
+import pprint
+import psycopg2
 import pwd
+import pymongo
+import pymysql
 import re
+import requests
 import shutil
 import socket
 import subprocess
 import time
 import urllib
-import httplib
-import requests
 import xml.dom.minidom
-import logging
-import docker
-import pprint
-import psycopg2
-import pymongo
-import pymysql
-import cassandra.cluster
 from dicttoxml import dicttoxml
 from kazoo.client import KazooClient
 from kazoo.exceptions import KazooException
@ -88,12 +88,14 @@ class ClickHouseCluster:
    these directories will contain logs, database files, docker-compose config, ClickHouse configs etc.
    """

-    def __init__(self, base_path, name=None, base_configs_dir=None, server_bin_path=None, client_bin_path=None,
+    def __init__(self, base_path, name=None, base_config_dir=None, server_bin_path=None, client_bin_path=None,
                 odbc_bridge_bin_path=None, zookeeper_config_path=None, custom_dockerd_host=None):
+        for param in os.environ.keys():
+            print "ENV %40s %s" % (param,os.environ[param])
        self.base_dir = p.dirname(base_path)
        self.name = name if name is not None else ''

-        self.base_configs_dir = base_configs_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR',
+        self.base_config_dir = base_config_dir or os.environ.get('CLICKHOUSE_TESTS_BASE_CONFIG_DIR',
                                                                   '/etc/clickhouse-server/')
        self.server_bin_path = p.realpath(
            server_bin_path or os.environ.get('CLICKHOUSE_TESTS_SERVER_BIN_PATH', '/usr/bin/clickhouse'))
@ -155,6 +157,7 @@ class ClickHouseCluster:

        self.docker_client = None
        self.is_up = False
+        print "CLUSTER INIT base_config_dir:{}".format(self.base_config_dir)

    def get_client_cmd(self):
        cmd = self.client_bin_path
@ -162,7 +165,7 @@ class ClickHouseCluster:
            cmd += " client"
        return cmd

-    def add_instance(self, name, config_dir=None, main_configs=None, user_configs=None, macros=None,
+    def add_instance(self, name, base_config_dir=None, main_configs=None, user_configs=None, dictionaries = None, macros=None,
                     with_zookeeper=False, with_mysql=False, with_kafka=False, with_rabbitmq=False, clickhouse_path_dir=None,
                     with_odbc_drivers=False, with_postgres=False, with_hdfs=False, with_mongo=False,
                     with_redis=False, with_minio=False, with_cassandra=False,
@ -172,7 +175,7 @@ class ClickHouseCluster:
        """Add an instance to the cluster.

        name - the name of the instance directory and the value of the 'instance' macro in ClickHouse.
-        config_dir - a directory with config files which content will be copied to /etc/clickhouse-server/ directory
+        base_config_dir - a directory with config.xml and users.xml files which will be copied to /etc/clickhouse-server/ directory
        main_configs - a list of config files that will be added to config.d/ directory
        user_configs - a list of config files that will be added to users.d/ directory
        with_zookeeper - if True, add ZooKeeper configuration to configs and ZooKeeper instances to the cluster.
@ -188,14 +191,36 @@ class ClickHouseCluster:
            tag = self.docker_base_tag

        instance = ClickHouseInstance(
-            self, self.base_dir, name, config_dir, main_configs or [], user_configs or [], macros or {},
-            with_zookeeper,
-            self.zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio, with_cassandra,
-            self.base_configs_dir, self.server_bin_path,
-            self.odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=hostname,
-            env_variables=env_variables or {}, image=image, tag=tag, stay_alive=stay_alive, ipv4_address=ipv4_address,
+            cluster=self,
+            base_path=self.base_dir,
+            name=name,
+            base_config_dir=base_config_dir if base_config_dir else self.base_config_dir,
+            custom_main_configs=main_configs or [],
+            custom_user_configs=user_configs or [],
+            custom_dictionaries=dictionaries or [],
+            macros=macros or {},
+            with_zookeeper=with_zookeeper,
+            zookeeper_config_path=self.zookeeper_config_path,
+            with_mysql=with_mysql,
+            with_kafka=with_kafka,
+            with_rabbitmq=with_rabbitmq,
+            with_mongo=with_mongo,
+            with_redis=with_redis,
+            with_minio=with_minio,
+            with_cassandra=with_cassandra,
+            server_bin_path=self.server_bin_path,
+            odbc_bridge_bin_path=self.odbc_bridge_bin_path,
+            clickhouse_path_dir=clickhouse_path_dir,
+            with_odbc_drivers=with_odbc_drivers,
+            hostname=hostname,
+            env_variables=env_variables or {},
+            image=image,
+            tag=tag,
+            stay_alive=stay_alive,
+            ipv4_address=ipv4_address,
            ipv6_address=ipv6_address,
-            with_installed_binary=with_installed_binary, tmpfs=tmpfs or [])
+            with_installed_binary=with_installed_binary,
+            tmpfs=tmpfs or [])

        docker_compose_yml_dir = get_docker_compose_path()

@ -462,19 +487,19 @@ class ClickHouseCluster:
            try:
                minio_client.list_buckets()

-                logging.info("Connected to Minio.")
+                print("Connected to Minio.")

                if minio_client.bucket_exists(self.minio_bucket):
                    minio_client.remove_bucket(self.minio_bucket)

                minio_client.make_bucket(self.minio_bucket)

-                logging.info("S3 bucket '%s' created", self.minio_bucket)
+                print("S3 bucket '%s' created", self.minio_bucket)

                self.minio_client = minio_client
                return
            except Exception as ex:
-                logging.warning("Can't connect to Minio: %s", str(ex))
+                print("Can't connect to Minio: %s", str(ex))
                time.sleep(1)

        raise Exception("Can't wait Minio to start")
@ -486,10 +511,10 @@ class ClickHouseCluster:
            try:
                sr_client._send_request(sr_client.url)
                self.schema_registry_client = sr_client
-                logging.info("Connected to SchemaRegistry")
+                print("Connected to SchemaRegistry")
                return
            except Exception as ex:
-                logging.warning("Can't connect to SchemaRegistry: %s", str(ex))
+                print("Can't connect to SchemaRegistry: %s", str(ex))
                time.sleep(1)

    def wait_cassandra_to_start(self, timeout=30):
@ -505,25 +530,27 @@ class ClickHouseCluster:
                time.sleep(1)

    def start(self, destroy_dirs=True):
+        print "Cluster start called. is_up={}, destroy_dirs={}".format(self.is_up, destroy_dirs)
        if self.is_up:
            return

        # Just in case kill unstopped containers from previous launch
        try:
-            logging.info("Trying to kill unstopped containers...")
+            print("Trying to kill unstopped containers...")

            if not subprocess_call(['docker-compose', 'kill']):
                subprocess_call(['docker-compose', 'down', '--volumes'])
-            logging.info("Unstopped containers killed")
+            print("Unstopped containers killed")
        except:
            pass

        try:
            if destroy_dirs and p.exists(self.instances_dir):
-                logging.info("Removing instances dir %s", self.instances_dir)
+                print("Removing instances dir %s", self.instances_dir)
                shutil.rmtree(self.instances_dir)

            for instance in self.instances.values():
+                print('Setup directory for instance: {} destroy_dirs: {}'.format(instance.name, destroy_dirs))
                instance.create_dir(destroy_dir=destroy_dirs)

            self.docker_client = docker.from_env(version=self.docker_api_version)
@ -531,6 +558,7 @@ class ClickHouseCluster:
            common_opts = ['up', '-d', '--force-recreate']

            if self.with_zookeeper and self.base_zookeeper_cmd:
+                print('Setup ZooKeeper')
                env = os.environ.copy()
                if not self.zookeeper_use_tmpfs:
                    env['ZK_FS'] = 'bind'
@ -549,14 +577,17 @@ class ClickHouseCluster:
                self.wait_zookeeper_to_start(120)

            if self.with_mysql and self.base_mysql_cmd:
+                print('Setup MySQL')
                subprocess_check_call(self.base_mysql_cmd + common_opts)
                self.wait_mysql_to_start(120)

            if self.with_postgres and self.base_postgres_cmd:
+                print('Setup Postgres')
                subprocess_check_call(self.base_postgres_cmd + common_opts)
                self.wait_postgres_to_start(120)

            if self.with_kafka and self.base_kafka_cmd:
+                print('Setup Kafka')
                subprocess_check_call(self.base_kafka_cmd + common_opts + ['--renew-anon-volumes'])
                self.kafka_docker_id = self.get_instance_docker_id('kafka1')
                self.wait_schema_registry_to_start(120)
@ -566,14 +597,17 @@ class ClickHouseCluster:
                self.rabbitmq_docker_id = self.get_instance_docker_id('rabbitmq1')

            if self.with_hdfs and self.base_hdfs_cmd:
+                print('Setup HDFS')
                subprocess_check_call(self.base_hdfs_cmd + common_opts)
                self.wait_hdfs_to_start(120)

            if self.with_mongo and self.base_mongo_cmd:
+                print('Setup Mongo')
                subprocess_check_call(self.base_mongo_cmd + common_opts)
                self.wait_mongo_to_start(30)

            if self.with_redis and self.base_redis_cmd:
+                print('Setup Redis')
                subprocess_check_call(self.base_redis_cmd + ['up', '-d', '--force-recreate'])
                time.sleep(10)

@ -612,18 +646,19 @@ class ClickHouseCluster:
                self.wait_cassandra_to_start()

            clickhouse_start_cmd = self.base_cmd + ['up', '-d', '--no-recreate']
-            logging.info("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
+            print("Trying to create ClickHouse instance by command %s", ' '.join(map(str, clickhouse_start_cmd)))
            subprocess_check_call(clickhouse_start_cmd)
-            logging.info("ClickHouse instance created")
+            print("ClickHouse instance created")
+

            start_deadline = time.time() + 20.0  # seconds
            for instance in self.instances.itervalues():
                instance.docker_client = self.docker_client
                instance.ip_address = self.get_instance_ip(instance.name)

-                logging.info("Waiting for ClickHouse start...")
+                print("Waiting for ClickHouse start...")
                instance.wait_for_start(start_deadline)
-                logging.info("ClickHouse started")
+                print("ClickHouse started")

                instance.client = Client(instance.ip_address, command=self.client_bin_path)

@ -637,7 +672,10 @@ class ClickHouseCluster:
    def shutdown(self, kill=True):
        sanitizer_assert_instance = None
        with open(self.docker_logs_path, "w+") as f:
-            subprocess.check_call(self.base_cmd + ['logs'], stdout=f)
+            try:
+                subprocess.check_call(self.base_cmd + ['logs'], stdout=f)
+            except Exception as e:
+                print "Unable to get logs from docker."
            f.seek(0)
            for line in f:
                if SANITIZER_SIGN in line:
@ -645,8 +683,15 @@ class ClickHouseCluster:
                    break

        if kill:
-            subprocess_check_call(self.base_cmd + ['kill'])
-        subprocess_check_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
+            try:
+                subprocess_check_call(self.base_cmd + ['kill'])
+            except Exception as e:
+                print "Kill command failed durung shutdown. {}".format(repr(e))
+
+        try:
+            subprocess_check_call(self.base_cmd + ['down', '--volumes', '--remove-orphans'])
+        except Exception as e:
+                print "Down + remove orphans failed durung shutdown. {}".format(repr(e))

        self.is_up = False

@ -711,7 +756,7 @@ services:
        image: {image}:{tag}
        hostname: {hostname}
        volumes:
-            - {configs_dir}:/etc/clickhouse-server/
+            - {instance_config_dir}:/etc/clickhouse-server/
            - {db_dir}:/var/lib/clickhouse/
            - {logs_dir}:/var/log/clickhouse-server/
            {binary_volume}
@ -744,10 +789,9 @@ services:
 class ClickHouseInstance:

    def __init__(
-            self, cluster, base_path, name, custom_config_dir, custom_main_configs, custom_user_configs, macros,
-            with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo,
-            with_redis, with_minio, with_cassandra, base_configs_dir, server_bin_path, odbc_bridge_bin_path,
-            clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None,
+            self, cluster, base_path, name, base_config_dir, custom_main_configs, custom_user_configs, custom_dictionaries,
+            macros, with_zookeeper, zookeeper_config_path, with_mysql, with_kafka, with_rabbitmq, with_mongo, with_redis, with_minio,
+            with_cassandra, server_bin_path, odbc_bridge_bin_path, clickhouse_path_dir, with_odbc_drivers, hostname=None, env_variables=None,
            image="yandex/clickhouse-integration-test", tag="latest",
            stay_alive=False, ipv4_address=None, ipv6_address=None, with_installed_binary=False, tmpfs=None):

@ -758,15 +802,15 @@ class ClickHouseInstance:
        self.hostname = hostname if hostname is not None else self.name

        self.tmpfs = tmpfs or []
-        self.custom_config_dir = p.abspath(p.join(base_path, custom_config_dir)) if custom_config_dir else None
+        self.base_config_dir = p.abspath(p.join(base_path, base_config_dir)) if base_config_dir else None
        self.custom_main_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_main_configs]
        self.custom_user_config_paths = [p.abspath(p.join(base_path, c)) for c in custom_user_configs]
+        self.custom_dictionaries_paths = [p.abspath(p.join(base_path, c)) for c in custom_dictionaries]
        self.clickhouse_path_dir = p.abspath(p.join(base_path, clickhouse_path_dir)) if clickhouse_path_dir else None
        self.macros = macros if macros is not None else {}
        self.with_zookeeper = with_zookeeper
        self.zookeeper_config_path = zookeeper_config_path

-        self.base_configs_dir = base_configs_dir
        self.server_bin_path = server_bin_path
        self.odbc_bridge_bin_path = odbc_bridge_bin_path

@ -782,7 +826,7 @@ class ClickHouseInstance:
        self.docker_compose_path = p.join(self.path, 'docker_compose.yml')
        self.env_variables = env_variables or {}
        if with_odbc_drivers:
-            self.odbc_ini_path = os.path.dirname(self.docker_compose_path) + "/odbc.ini:/etc/odbc.ini"
+            self.odbc_ini_path = self.path + "/odbc.ini:/etc/odbc.ini"
            self.with_mysql = True
        else:
            self.odbc_ini_path = ""
@ -985,7 +1029,7 @@ class ClickHouseInstance:
            time_left = deadline - current_time
            if deadline is not None and current_time >= deadline:
                raise Exception("Timed out while waiting for instance `{}' with ip address {} to start. "
-                                "Container status: {}".format(self.name, self.ip_address, status))
+                                "Container status: {}, logs: {}".format(self.name, self.ip_address, status, handle.logs()))

            # Repeatedly poll the instance address until there is something that listens there.
            # Usually it means that ClickHouse is ready to accept queries.
@ -1067,40 +1111,46 @@ class ClickHouseInstance:

        os.makedirs(self.path)

-        configs_dir = p.abspath(p.join(self.path, 'configs'))
-        os.mkdir(configs_dir)
+        instance_config_dir = p.abspath(p.join(self.path, 'configs'))
+        os.makedirs(instance_config_dir)

-        shutil.copy(p.join(self.base_configs_dir, 'config.xml'), configs_dir)
-        shutil.copy(p.join(self.base_configs_dir, 'users.xml'), configs_dir)
+        print "Copy common default production configuration from {}".format(self.base_config_dir)
+        shutil.copyfile(p.join(self.base_config_dir, 'config.xml'), p.join(instance_config_dir, 'config.xml'))
+        shutil.copyfile(p.join(self.base_config_dir, 'users.xml'), p.join(instance_config_dir, 'users.xml'))

+        print "Create directory for configuration generated in this helper"
        # used by all utils with any config
-        conf_d_dir = p.abspath(p.join(configs_dir, 'conf.d'))
-        # used by server with main config.xml
-        self.config_d_dir = p.abspath(p.join(configs_dir, 'config.d'))
-        users_d_dir = p.abspath(p.join(configs_dir, 'users.d'))
+        conf_d_dir = p.abspath(p.join(instance_config_dir, 'conf.d'))
        os.mkdir(conf_d_dir)
-        os.mkdir(self.config_d_dir)
-        os.mkdir(users_d_dir)

+        print "Create directory for common tests configuration"
+        # used by server with main config.xml
+        self.config_d_dir = p.abspath(p.join(instance_config_dir, 'config.d'))
+        os.mkdir(self.config_d_dir)
+        users_d_dir = p.abspath(p.join(instance_config_dir, 'users.d'))
+        os.mkdir(users_d_dir)
+        dictionaries_dir = p.abspath(p.join(instance_config_dir, 'dictionaries'))
+        os.mkdir(dictionaries_dir)
+
+        print "Copy common configuration from helpers"
        # The file is named with 0_ prefix to be processed before other configuration overloads.
        shutil.copy(p.join(HELPERS_DIR, '0_common_instance_config.xml'), self.config_d_dir)
        shutil.copy(p.join(HELPERS_DIR, '0_common_instance_users.xml'), users_d_dir)
+        if len(self.custom_dictionaries_paths):
+            shutil.copy(p.join(HELPERS_DIR, '0_common_enable_dictionaries.xml'), self.config_d_dir)

-        # Generate and write macros file
+        print "Generate and write macros file"
        macros = self.macros.copy()
        macros['instance'] = self.name
-        with open(p.join(self.config_d_dir, 'macros.xml'), 'w') as macros_config:
+        with open(p.join(conf_d_dir, 'macros.xml'), 'w') as macros_config:
            macros_config.write(self.dict_to_xml({"macros": macros}))

        # Put ZooKeeper config
        if self.with_zookeeper:
            shutil.copy(self.zookeeper_config_path, conf_d_dir)

-        # Copy config dir
-        if self.custom_config_dir:
-            distutils.dir_util.copy_tree(self.custom_config_dir, configs_dir)
-
        # Copy config.d configs
+        print "Copy custom test config files {} to {}".format(self.custom_main_config_paths, self.config_d_dir)
        for path in self.custom_main_config_paths:
            shutil.copy(path, self.config_d_dir)

@ -1108,12 +1158,19 @@ class ClickHouseInstance:
        for path in self.custom_user_config_paths:
            shutil.copy(path, users_d_dir)

+        # Copy dictionaries configs to configs/dictionaries
+        for path in self.custom_dictionaries_paths:
+            shutil.copy(path, dictionaries_dir)
+
        db_dir = p.abspath(p.join(self.path, 'database'))
+        print "Setup database dir {}".format(db_dir)
        os.mkdir(db_dir)
        if self.clickhouse_path_dir is not None:
+            print "Database files taken from {}".format(self.clickhouse_path_dir)
            distutils.dir_util.copy_tree(self.clickhouse_path_dir, db_dir)

        logs_dir = p.abspath(p.join(self.path, 'logs'))
+        print "Setup logs dir {}".format(logs_dir)
        os.mkdir(logs_dir)

        depends_on = []
@ -1138,6 +1195,8 @@ class ClickHouseInstance:

        env_file = _create_env_file(os.path.dirname(self.docker_compose_path), self.env_variables)

+        print "Env {} stored in {}".format(self.env_variables, env_file)
+
        odbc_ini_path = ""
        if self.odbc_ini_path:
            self._create_odbc_config_file()
@ -1148,6 +1207,8 @@ class ClickHouseInstance:
        if self.stay_alive:
            entrypoint_cmd = CLICKHOUSE_STAY_ALIVE_COMMAND

+        print "Entrypoint cmd: {}".format(entrypoint_cmd)
+
        networks = app_net = ipv4_address = ipv6_address = net_aliases = net_alias1 = ""
        if self.ipv4_address is not None or self.ipv6_address is not None or self.hostname != self.name:
            networks = "networks:"
@ -1167,6 +1228,7 @@ class ClickHouseInstance:
            binary_volume = "- " + self.server_bin_path + ":/usr/share/clickhouse_fresh"
            odbc_bridge_volume = "- " + self.odbc_bridge_bin_path + ":/usr/share/clickhouse-odbc-bridge_fresh"

+
        with open(self.docker_compose_path, 'w') as docker_compose:
            docker_compose.write(DOCKER_COMPOSE_TEMPLATE.format(
                image=self.image,
@ -1175,7 +1237,7 @@ class ClickHouseInstance:
                hostname=self.hostname,
                binary_volume=binary_volume,
                odbc_bridge_volume=odbc_bridge_volume,
-                configs_dir=configs_dir,
+                instance_config_dir=instance_config_dir,
                config_d_dir=self.config_d_dir,
                db_dir=db_dir,
                tmpfs=str(self.tmpfs),
--- a/tests/integration/helpers/dictonaries/decimals_dictionary.xml
+++ b/tests/integration/helpers/dictonaries/decimals_dictionary.xml
@ -0,0 +1,197 @@
+<dictionaries>
+<dictionary>
+    <name>flat_decimals</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>decimals</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <flat/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>d32</name>
+            <type>Decimal32(4)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d64</name>
+            <type>Decimal64(6)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d128</name>
+            <type>Decimal128(1)</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>hashed_decimals</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>decimals</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <hashed/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>d32</name>
+            <type>Decimal32(4)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d64</name>
+            <type>Decimal64(6)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d128</name>
+            <type>Decimal128(1)</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>cache_decimals</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>decimals</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <cache><size_in_cells>1000</size_in_cells></cache>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>d32</name>
+            <type>Decimal32(4)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d64</name>
+            <type>Decimal64(6)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d128</name>
+            <type>Decimal128(1)</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_hashed_decimals</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>decimals</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_hashed/>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>key</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>d32</name>
+            <type>Decimal32(4)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d64</name>
+            <type>Decimal64(6)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d128</name>
+            <type>Decimal128(1)</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_cache_decimals</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>decimals</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>key</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>d32</name>
+            <type>Decimal32(4)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d64</name>
+            <type>Decimal64(6)</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>d128</name>
+            <type>Decimal128(1)</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+</dictionaries>
--- a/tests/integration/helpers/dictonaries/ints_dictionary.xml
+++ b/tests/integration/helpers/dictonaries/ints_dictionary.xml
@ -0,0 +1,514 @@
+<dictionaries>
+<dictionary>
+    <name>flat_ints</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <flat/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>hashed_ints</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <hashed/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>hashed_sparse_ints</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <sparse_hashed/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>cache_ints</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <cache><size_in_cells>1000</size_in_cells></cache>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_hashed_ints</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_hashed/>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>key</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_cache_ints</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>key</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+
+<dictionary>
+<name>one_cell_cache_ints</name>
+<source>
+    <clickhouse>
+        <host>localhost</host>
+        <port>9000</port>
+        <user>default</user>
+        <password></password>
+        <db>test_01054</db>
+        <table>ints</table>
+    </clickhouse>
+</source>
+<lifetime>0</lifetime>
+<layout>
+    <cache><size_in_cells>1</size_in_cells></cache>
+</layout>
+<structure>
+    <id>
+        <name>key</name>
+    </id>
+    <attribute>
+        <name>i8</name>
+        <type>Int8</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>i16</name>
+        <type>Int16</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>i32</name>
+        <type>Int32</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>i64</name>
+        <type>Int64</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>u8</name>
+        <type>UInt8</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>u16</name>
+        <type>UInt16</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>u32</name>
+        <type>UInt32</type>
+        <null_value>0</null_value>
+    </attribute>
+    <attribute>
+        <name>u64</name>
+        <type>UInt64</type>
+        <null_value>0</null_value>
+    </attribute>
+</structure>
+</dictionary>
+
+
+<dictionary>
+    <name>one_cell_cache_ints_overflow</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>test_01054_overflow</db>
+            <table>ints</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <cache><size_in_cells>1</size_in_cells></cache>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>i8</name>
+            <type>Int8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i16</name>
+            <type>Int16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i32</name>
+            <type>Int32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>i64</name>
+            <type>Int64</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u8</name>
+            <type>UInt8</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u16</name>
+            <type>UInt16</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u32</name>
+            <type>UInt32</type>
+            <null_value>0</null_value>
+        </attribute>
+        <attribute>
+            <name>u64</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+</dictionaries>
--- a/tests/integration/helpers/dictonaries/strings_dictionary.xml
+++ b/tests/integration/helpers/dictonaries/strings_dictionary.xml
@ -0,0 +1,209 @@
+<dictionaries>
+<dictionary>
+    <name>flat_strings</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <flat/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>str</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>hashed_strings</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <hashed/>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>str</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>cache_strings</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <cache><size_in_cells>1000</size_in_cells></cache>
+    </layout>
+    <structure>
+        <id>
+            <name>key</name>
+        </id>
+        <attribute>
+            <name>str</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_hashed_strings</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_hashed/>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>key</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>str</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_cache_strings</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>key</name>
+                <type>UInt64</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>str</name>
+            <type>String</type>
+            <null_value></null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_hashed_strings_key</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_hashed/>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>str</name>
+                <type>String</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>key</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+
+<dictionary>
+    <name>complex_cache_strings_key</name>
+    <source>
+        <clickhouse>
+            <host>localhost</host>
+            <port>9000</port>
+            <user>default</user>
+            <password></password>
+            <db>system</db>
+            <table>strings</table>
+        </clickhouse>
+    </source>
+    <lifetime>0</lifetime>
+    <layout>
+        <complex_key_cache><size_in_cells>1000</size_in_cells></complex_key_cache>
+    </layout>
+    <structure>
+        <key>
+            <attribute>
+                <name>str</name>
+                <type>String</type>
+            </attribute>
+        </key>
+        <attribute>
+            <name>key</name>
+            <type>UInt64</type>
+            <null_value>0</null_value>
+        </attribute>
+    </structure>
+</dictionary>
+</dictionaries>
--- a/tests/integration/helpers/test_tools.py
+++ b/tests/integration/helpers/test_tools.py
@ -11,6 +11,9 @@ class TSV:
            raw_lines = contents.splitlines(True)
        elif isinstance(contents, list):
            raw_lines = ['\t'.join(map(str, l)) if isinstance(l, list) else str(l) for l in contents]
+        elif isinstance(contents, TSV):
+            self.lines = contents.lines
+            return
        else:
            raise TypeError("contents must be either file or string or list, actual type: " + type(contents).__name__)
        self.lines = [l.strip() for l in raw_lines if l.strip()]
--- a/tests/integration/runner
+++ b/tests/integration/runner
@ -53,7 +53,7 @@ def check_args_and_update_paths(args):

    logging.info("base_configs_dir: {},  binary: {}, cases_dir: {} ".format(args.base_configs_dir, args.binary, args.cases_dir))

-    for path in [args.binary, args.base_configs_dir, args.cases_dir, CLICKHOUSE_ROOT]:
+    for path in [args.binary, args.bridge_binary, args.base_configs_dir, args.cases_dir, CLICKHOUSE_ROOT]:
        if not os.path.exists(path):
            raise Exception("Path {} doesn't exist".format(path))

--- a/tests/integration/test_access_control_on_cluster/test.py
+++ b/tests/integration/test_access_control_on_cluster/test.py
@ -4,9 +4,9 @@ from helpers.cluster import ClickHouseCluster
 from helpers.client import QueryRuntimeException

 cluster = ClickHouseCluster(__file__)
-ch1 = cluster.add_instance('ch1', config_dir="configs", with_zookeeper=True)
-ch2 = cluster.add_instance('ch2', config_dir="configs", with_zookeeper=True)
-ch3 = cluster.add_instance('ch3', config_dir="configs", with_zookeeper=True)
+ch1 = cluster.add_instance('ch1', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
+ch2 = cluster.add_instance('ch2', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)
+ch3 = cluster.add_instance('ch3', main_configs=["configs/config.d/clusters.xml"], with_zookeeper=True)

@pytest.fixture(scope="module", autouse=True)
 def started_cluster():
--- a/tests/integration/test_adaptive_granularity/test.py
+++ b/tests/integration/test_adaptive_granularity/test.py
@ -9,23 +9,23 @@ from helpers.test_tools import assert_eq_with_retry


 cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance('node1', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
-node2 = cluster.add_instance('node2', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
+node1 = cluster.add_instance('node1', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
+node2 = cluster.add_instance('node2', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)

-node3 = cluster.add_instance('node3', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', with_installed_binary=True)
-node4 = cluster.add_instance('node4', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
+node3 = cluster.add_instance('node3', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', with_installed_binary=True)
+node4 = cluster.add_instance('node4', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)

-node5 = cluster.add_instance('node5', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', with_installed_binary=True)
-node6 = cluster.add_instance('node6', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)
+node5 = cluster.add_instance('node5', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', with_installed_binary=True)
+node6 = cluster.add_instance('node6', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True)

-node7 = cluster.add_instance('node7', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
-node8 = cluster.add_instance('node8', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
+node7 = cluster.add_instance('node7', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
+node8 = cluster.add_instance('node8', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)

-node9 = cluster.add_instance('node9', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
-node10 = cluster.add_instance('node10', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)
+node9 = cluster.add_instance('node9', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
+node10 = cluster.add_instance('node10', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml', 'configs/merge_tree_settings.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.6.3.18', stay_alive=True, with_installed_binary=True)

-node11 = cluster.add_instance('node11', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
-node12 = cluster.add_instance('node12', config_dir="configs", main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
+node11 = cluster.add_instance('node11', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)
+node12 = cluster.add_instance('node12', main_configs=['configs/remote_servers.xml', 'configs/log_conf.xml'], with_zookeeper=True, image='yandex/clickhouse-server', tag='19.1.15', stay_alive=True, with_installed_binary=True)


 def prepare_single_pair_with_setting(first_node, second_node, group):
--- a/tests/integration/test_allowed_client_hosts/test.py
+++ b/tests/integration/test_allowed_client_hosts/test.py
@ -4,7 +4,7 @@ from helpers.cluster import ClickHouseCluster


 cluster = ClickHouseCluster(__file__)
-server = cluster.add_instance('server', config_dir="configs")
+server = cluster.add_instance('server', user_configs=["configs/users.d/network.xml"])

 clientA1 = cluster.add_instance('clientA1', hostname = 'clientA1.com')
 clientA2 = cluster.add_instance('clientA2', hostname = 'clientA2.com')
@ -20,7 +20,12 @@ clientD2 = cluster.add_instance('clientD2', hostname = 'xxx.clientD0002.ru')
 clientD3 = cluster.add_instance('clientD3', hostname = 'clientD0003.ru')


+def check_clickhouse_is_ok(client_node, server_node):
+    assert client_node.exec_in_container(["bash", "-c", "/usr/bin/curl -s {}:8123 ".format(server_node.hostname)]) == "Ok.\n"
+
+
 def query_from_one_node_to_another(client_node, server_node, query):
+    check_clickhouse_is_ok(client_node, server_node)
    return client_node.exec_in_container(["bash", "-c", "/usr/bin/clickhouse client --host {} --query {!r}".format(server_node.hostname, query)])


@ -56,5 +61,6 @@ def test_allowed_host():

    for client_node in expected_to_fail:
        with pytest.raises(Exception) as e:
-            query_from_one_node_to_another(client_node, server, "SELECT * FROM test_table")
+            result = query_from_one_node_to_another(client_node, server, "SELECT * FROM test_table")
+            print("Client node: {} Server node: {} Result: {}".format(client_node, server_node, result))
        assert "default: Authentication failed" in str(e)
--- a/tests/integration/test_allowed_url_from_config/test.py
+++ b/tests/integration/test_allowed_url_from_config/test.py
@ -40,7 +40,7 @@ def test_config_with_only_regexp_hosts(start_cluster):
    assert node3.query("CREATE TABLE table_test_3_1 (word String) Engine=URL('https://host:80', HDFS)") == ""
    assert node3.query("CREATE TABLE table_test_3_2 (word String) Engine=URL('https://yandex.ru', CSV)") == ""
    assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_3 (word String) Engine=URL('https://host', CSV)")
-    assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)") 
+    assert "not allowed" in node3.query_and_get_error("CREATE TABLE table_test_3_4 (word String) Engine=URL('https://yandex2.ru', S3)")

 def test_config_without_allowed_hosts(start_cluster):
    assert node4.query("CREATE TABLE table_test_4_1 (word String) Engine=URL('https://host:80', CSV)") == ""
@ -49,18 +49,18 @@ def test_config_without_allowed_hosts(start_cluster):
    assert node4.query("CREATE TABLE table_test_4_4 (word String) Engine=URL('ftp://something.com', S3)") == ""

 def test_table_function_remote(start_cluster):
+    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
+    assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)", settings={"connections_with_failover_max_tries":1, "connect_timeout_with_failover_ms": 1000, "connect_timeout_with_failover_secure_ms": 1000, "connect_timeout": 1, "send_timeout":1})
    assert node6.query("SELECT * FROM remote('localhost', system, events)") != ""
    assert node6.query("SELECT * FROM remoteSecure('localhost', system, metrics)") != ""
    assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remoteSecure('localhost:800', system, events)")
    assert "URL \"localhost:800\" is not allowed in config.xml" in node6.query_and_get_error("SELECT * FROM remote('localhost:800', system, metrics)")
-    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-02-1', system, events)")
-    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-1', system, events")
-    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-{1|2}', system, events)")
-    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remote('example01-0{1,2}-{1|2}', system, events)")
-    assert "not allowed in config.xml" not in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-{01..02}-{1|2}', system, events)")
-    assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-01-1,example01-03-1', system, events)")
-    assert "not allowed" in node6.query_and_get_error("SELECT * FROM remote('example01-01-{1|3}', system, events)")
-    assert "not allowed" in node6.query_and_get_error("SELECT * FROM remoteSecure('example01-0{1,3}-1', system, metrics)")

 def test_redirect(start_cluster):
    hdfs_api = HDFSApi("root")
--- a/tests/integration/test_alter_codec/test.py
+++ b/tests/integration/test_alter_codec/test.py
@ -6,11 +6,9 @@ from helpers.cluster import ClickHouseCluster
 cluster = ClickHouseCluster(__file__)

 node1 = cluster.add_instance('node1',
-            config_dir='configs',
            main_configs=['configs/logs_config.xml'])

 node2 = cluster.add_instance('node2',
-            config_dir='configs',
            main_configs=['configs/logs_config.xml'])


--- a/tests/integration/test_atomic_drop_table/test.py
+++ b/tests/integration/test_atomic_drop_table/test.py
@ -6,7 +6,7 @@ from helpers.cluster import ClickHouseCluster


 cluster = ClickHouseCluster(__file__)
-node1 = cluster.add_instance('node1', config_dir="configs", with_zookeeper=True)
+node1 = cluster.add_instance('node1', main_configs=["configs/config.d/zookeeper_session_timeout.xml", "configs/remote_servers.xml"], with_zookeeper=True)


@pytest.fixture(scope="module")
--- a/tests/integration/test_cluster_copier/configs/conf.d/clusters.xml
+++ b/tests/integration/test_cluster_copier/configs/conf.d/clusters.xml
@ -1,80 +1,74 @@
+<?xml version="1.0"?>
 <yandex>
-<remote_servers>
-
-    <cluster0>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>s0_0_0</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>s0_0_1</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>s0_1_0</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </cluster0>
-
-    <cluster1>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>s1_0_0</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>s1_0_1</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>s1_1_0</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </cluster1>
-
-    <shard_0_0>
-        <shard>
-            <internal_replication>true</internal_replication>
-            <replica>
-                <host>s0_0_0</host>
-                <port>9000</port>
-            </replica>
-            <replica>
-                <host>s0_0_1</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </shard_0_0>
-
-    <source_trivial_cluster>
-        <shard>
-            <replica>
-                <host>s0_0_0</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </source_trivial_cluster>
-
-
-    <destination_trivial_cluster>
-        <shard>
-            <replica>
-                <host>s1_0_0</host>
-                <port>9000</port>
-            </replica>
-        </shard>
-    </destination_trivial_cluster>
-
-</remote_servers>
+    <remote_servers>
+        <cluster0>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>s0_0_0</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>s0_0_1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>s0_1_0</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster0>
+        <cluster1>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>s1_0_0</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>s1_0_1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>s1_1_0</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </cluster1>
+        <shard_0_0>
+            <shard>
+                <internal_replication>true</internal_replication>
+                <replica>
+                    <host>s0_0_0</host>
+                    <port>9000</port>
+                </replica>
+                <replica>
+                    <host>s0_0_1</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </shard_0_0>
+        <source_trivial_cluster>
+            <shard>
+                <replica>
+                    <host>s0_0_0</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </source_trivial_cluster>
+        <destination_trivial_cluster>
+            <shard>
+                <replica>
+                    <host>s1_0_0</host>
+                    <port>9000</port>
+                </replica>
+            </shard>
+        </destination_trivial_cluster>
+    </remote_servers>
 </yandex>
--- a/tests/integration/test_cluster_copier/test.py
+++ b/tests/integration/test_cluster_copier/test.py
@ -54,7 +54,8 @@ def started_cluster():
                for replica_name in replicas:
                    name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name)
                    cluster.add_instance(name,
-                        config_dir="configs",
+                        main_configs=["configs/conf.d/query_log.xml", "configs/conf.d/ddl.xml", "configs/conf.d/clusters.xml"],
+                        user_configs=["configs/users.xml"],
                        macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name},
                        with_zookeeper=True)

@ -226,6 +227,7 @@ def execute_task(task, cmd_options):
    zk.ensure_path(zk_task_path)
    zk.create(zk_task_path + "/description", task.copier_task_config)

+
    # Run cluster-copier processes on each node
    docker_api = docker.from_env().api
    copiers_exec_ids = []
@ -241,9 +243,11 @@ def execute_task(task, cmd_options):
    for instance_name in copiers:
        instance = cluster.instances[instance_name]
        container = instance.get_docker_handle()
+        instance.copy_file_to_container(os.path.join(CURRENT_TEST_DIR, "configs/config-copier.xml"), "/etc/clickhouse-server/config-copier.xml")
+        print "Copied copier config to {}".format(instance.name)
        exec_id = docker_api.exec_create(container.id, cmd, stderr=True)
-        docker_api.exec_start(exec_id, detach=True)
-
+        output = docker_api.exec_start(exec_id).decode('utf8')
+        print(output)
        copiers_exec_ids.append(exec_id)
        print "Copier for {} ({}) has started".format(instance.name, instance.ip_address)

--- a/tests/integration/test_cluster_copier/trivial_test.py
+++ b/tests/integration/test_cluster_copier/trivial_test.py
@ -34,7 +34,7 @@ def started_cluster():
                for replica_name in replicas:
                    name = "s{}_{}_{}".format(cluster_name, shard_name, replica_name)
                    cluster.add_instance(name,
-                                         config_dir="configs",
+                                         main_configs=[], user_configs=[],
                                         macros={"cluster": cluster_name, "shard": shard_name, "replica": replica_name},
                                         with_zookeeper=True)

--- a/Show More
+++ b/Show More