Merge branch 'master' into simplier_coverage

2024-11-22 07:31:57 +00:00 · 2020-12-22 18:37:15 +03:00 · 2020-12-22 18:37:15 +03:00 · f0b2bf9247
commit f0b2bf9247
parent e28ab8d683 0e807d0647
167 changed files with 3149 additions and 1332 deletions
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@ -1,8 +1,8 @@
 # This workflow checks out code, performs an Anchore container image
 # vulnerability and compliance scan, and integrates the results with
-# GitHub Advanced Security code scanning feature.  For more information on
+# GitHub Advanced Security code scanning feature. For more information on
 # the Anchore scan action usage and parameters, see
-# https://github.com/anchore/scan-action.  For more information on
+# https://github.com/anchore/scan-action. For more information on
 # Anchore container image scanning in general, see
 # https://docs.anchore.com.

@ -28,18 +28,12 @@ jobs:
        perl -pi -e 's|=\$version||g' Dockerfile
        docker build . --file Dockerfile --tag localbuild/testimage:latest      
    - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
-      uses: anchore/scan-action@master
+      uses: anchore/scan-action@v2
+      id: scan
      with:
-        image-reference: "localbuild/testimage:latest"
-        dockerfile-path: "docker/server/Dockerfile"
+        image: "localbuild/testimage:latest"
        acs-report-enable: true
-        fail-build: true
-    - name: Upload artifact
-      uses: actions/upload-artifact@v1.0.0
-      with:
-        name: AnchoreReports
-        path: ./anchore-reports/
    - name: Upload Anchore Scan Report
      uses: github/codeql-action/upload-sarif@v1
      with:
-        sarif_file: results.sarif
+        sarif_file: ${{ steps.scan.outputs.sarif }}
--- a/.gitmodules
+++ b/.gitmodules
@ -53,7 +53,8 @@
 	url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
 [submodule "contrib/arrow"]
 	path = contrib/arrow
-	url = https://github.com/apache/arrow
+	url = https://github.com/ClickHouse-Extras/arrow
+	branch = clickhouse-arrow-2.0.0
 [submodule "contrib/thrift"]
 	path = contrib/thrift
 	url = https://github.com/apache/thrift.git
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@ -6,6 +6,12 @@
 #include <unistd.h>
 #include <functional>
 #include <sys/file.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <csignal>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <fstream>

 namespace
 {
@ -83,6 +89,8 @@ ReplxxLineReader::ReplxxLineReader(
    /// it also binded to M-p/M-n).
    rx.bind_key(Replxx::KEY::meta('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_NEXT, code); });
    rx.bind_key(Replxx::KEY::meta('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_PREVIOUS, code); });
+
+    rx.bind_key(Replxx::KEY::meta('E'), [this](char32_t) { openEditor(); return Replxx::ACTION_RESULT::CONTINUE; });
 }

 ReplxxLineReader::~ReplxxLineReader()
@ -127,7 +135,114 @@ void ReplxxLineReader::addToHistory(const String & line)
        rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
 }

+int ReplxxLineReader::execute(const std::string & command)
+{
+    std::vector<char> argv0("sh", &("sh"[3]));
+    std::vector<char> argv1("-c", &("-c"[3]));
+    std::vector<char> argv2(command.data(), command.data() + command.size() + 1);
+
+    const char * filename = "/bin/sh";
+    char * const argv[] = {argv0.data(), argv1.data(), argv2.data(), nullptr};
+
+    static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
+    if (!real_vfork)
+    {
+        rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString(errno).c_str());
+        return -1;
+    }
+
+    pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
+
+    if (-1 == pid)
+    {
+        rx.print("Cannot vfork: %s\n", errnoToString(errno).c_str());
+        return -1;
+    }
+
+    if (0 == pid)
+    {
+        sigset_t mask;
+        sigemptyset(&mask);
+        sigprocmask(0, nullptr, &mask);
+        sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+
+        execv(filename, argv);
+        _exit(-1);
+    }
+
+    int status = 0;
+    if (-1 == waitpid(pid, &status, 0))
+    {
+        rx.print("Cannot waitpid: %s\n", errnoToString(errno).c_str());
+        return -1;
+    }
+    return status;
+}
+
+void ReplxxLineReader::openEditor()
+{
+    char filename[] = "clickhouse_replxx_XXXXXX.sql";
+    int fd = ::mkstemps(filename, 4);
+    if (-1 == fd)
+    {
+        rx.print("Cannot create temporary file to edit query: %s\n", errnoToString(errno).c_str());
+        return;
+    }
+
+    String editor = std::getenv("EDITOR");
+    if (editor.empty())
+        editor = "vim";
+
+    replxx::Replxx::State state(rx.get_state());
+
+    size_t bytes_written = 0;
+    const char * begin = state.text();
+    size_t offset = strlen(state.text());
+    while (bytes_written != offset)
+    {
+        ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
+        if ((-1 == res || 0 == res) && errno != EINTR)
+        {
+            rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+            return;
+        }
+        bytes_written += res;
+    }
+
+    if (0 != ::close(fd))
+    {
+        rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+        return;
+    }
+
+    if (0 == execute(editor + " " + filename))
+    {
+        try
+        {
+            std::ifstream t(filename);
+            std::string str;
+            t.seekg(0, std::ios::end);
+            str.reserve(t.tellg());
+            t.seekg(0, std::ios::beg);
+            str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
+            rx.set_state(replxx::Replxx::State(str.c_str(), str.size()));
+        }
+        catch (...)
+        {
+            rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+            return;
+        }
+    }
+
+    if (bracketed_paste_enabled)
+        enableBracketedPaste();
+
+    if (0 != ::unlink(filename))
+        rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+}
+
 void ReplxxLineReader::enableBracketedPaste()
 {
+    bracketed_paste_enabled = true;
    rx.enable_bracketed_paste();
 };
--- a/base/common/ReplxxLineReader.h
+++ b/base/common/ReplxxLineReader.h
@ -22,10 +22,13 @@ public:
 private:
    InputStatus readOneLine(const String & prompt) override;
    void addToHistory(const String & line) override;
+    int execute(const std::string & command);
+    void openEditor();

    replxx::Replxx rx;
    replxx::Replxx::highlighter_callback_t highlighter;

    // used to call flock() to synchronize multiple clients using same history file
    int history_file_fd = -1;
+    bool bracketed_paste_enabled = false;
 };
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
@ -5,9 +5,11 @@
 /// (See at http://www.boost.org/LICENSE_1_0.txt)

 #include "throwError.h"
+#include <cmath>
 #include <cfloat>
-#include <limits>
 #include <cassert>
+#include <limits>
+

 namespace wide
 {
@ -239,6 +241,14 @@ struct integer<Bits, Signed>::_impl
    template <class T>
    constexpr static void set_multiplier(integer<Bits, Signed> & self, T t) noexcept {
        constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
+
+        /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
+        if (!std::isfinite(t))
+        {
+            self = 0;
+            return;
+        }
+
        const T alpha = t / max_int;

        if (alpha <= max_int)
--- a/base/glibc-compatibility/musl/__polevll.c
+++ b/base/glibc-compatibility/musl/__polevll.c
@ -0,0 +1,93 @@
+/* origin: OpenBSD /usr/src/lib/libm/src/polevll.c */
+/*
+ * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ *      Evaluate polynomial
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * long double x, y, coef[N+1], polevl[];
+ *
+ * y = polevll( x, coef, N );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates polynomial of degree N:
+ *
+ *                     2          N
+ * y  =  C  + C x + C x  +...+ C x
+ *        0    1     2          N
+ *
+ * Coefficients are stored in reverse order:
+ *
+ * coef[0] = C  , ..., coef[N] = C  .
+ *            N                   0
+ *
+ *  The function p1evll() assumes that coef[N] = 1.0 and is
+ * omitted from the array.  Its calling arguments are
+ * otherwise the same as polevll().
+ *
+ *
+ * SPEED:
+ *
+ * In the interest of speed, there are no checks for out
+ * of bounds arithmetic.  This routine is used by most of
+ * the functions in the library.  Depending on available
+ * equipment features, the user may wish to rewrite the
+ * program in microcode or assembly language.
+ *
+ */
+
+#include "libm.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#else
+/*
+ * Polynomial evaluator:
+ *  P[0] x^n  +  P[1] x^(n-1)  +  ...  +  P[n]
+ */
+long double __polevll(long double x, const long double *P, int n)
+{
+	long double y;
+
+	y = *P++;
+	do {
+		y = y * x + *P++;
+	} while (--n);
+
+	return y;
+}
+
+/*
+ * Polynomial evaluator:
+ *  x^n  +  P[0] x^(n-1)  +  P[1] x^(n-2)  +  ...  +  P[n]
+ */
+long double __p1evll(long double x, const long double *P, int n)
+{
+	long double y;
+
+	n -= 1;
+	y = x + *P++;
+	do {
+		y = y * x + *P++;
+	} while (--n);
+
+	return y;
+}
+#endif
--- a/base/glibc-compatibility/musl/mkstemps.c
+++ b/base/glibc-compatibility/musl/mkstemps.c
@ -0,0 +1,44 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+/* This assumes that a check for the
+   template size has already been made */
+static char * __randname(char * template)
+{
+    int i;
+    struct timespec ts;
+    unsigned long r;
+
+    clock_gettime(CLOCK_REALTIME, &ts);
+    r = (ts.tv_nsec * 65537) ^ ((((intptr_t)(&ts)) / 16) + ((intptr_t)template));
+    for (i = 0; i < 6; i++, r >>= 5)
+        template[i] = 'A' + (r & 15) + (r & 16) * 2;
+
+    return template;
+}
+
+int mkstemps(char * template, int len)
+{
+    size_t l = strlen(template);
+    if (l < 6 || len > l - 6 || memcmp(template + l - len - 6, "XXXXXX", 6))
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    int fd, retries = 100;
+    do
+    {
+        __randname(template + l - len - 6);
+        if ((fd = open(template, O_RDWR | O_CREAT | O_EXCL, 0600)) >= 0)
+            return fd;
+    } while (--retries && errno == EEXIST);
+
+    memcpy(template + l - len - 6, "XXXXXX", 6);
+    return -1;
+}
--- a/base/glibc-compatibility/musl/powf.c
+++ b/base/glibc-compatibility/musl/powf.c
@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "libm.h"
+#include "exp2f_data.h"
+#include "powf_data.h"
+
+/*
+POWF_LOG2_POLY_ORDER = 5
+EXP2F_TABLE_BITS = 5
+
+ULP error: 0.82 (~ 0.5 + relerr*2^24)
+relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
+relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
+relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
+*/
+
+#define N (1 << POWF_LOG2_TABLE_BITS)
+#define T __powf_log2_data.tab
+#define A __powf_log2_data.poly
+#define OFF 0x3f330000
+
+/* Subnormal input is normalized so ix has negative biased exponent.
+   Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.  */
+static inline double_t log2_inline(uint32_t ix)
+{
+	double_t z, r, r2, r4, p, q, y, y0, invc, logc;
+	uint32_t iz, top, tmp;
+	int k, i;
+
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
+	top = tmp & 0xff800000;
+	iz = ix - top;
+	k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
+
+	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k;
+
+	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+	r2 = r * r;
+	y = A[0] * r + A[1];
+	p = A[2] * r + A[3];
+	r4 = r2 * r2;
+	q = A[4] * r + y0;
+	q = p * r2 + q;
+	y = y * r4 + q;
+	return y;
+}
+
+#undef N
+#undef T
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
+
+/* The output of log2 and thus the input of exp2 is either scaled by N
+   (in case of fast toint intrinsics) or not.  The unscaled xd must be
+   in [-1021,1023], sign_bias sets the sign of the result.  */
+static inline float exp2_inline(double_t xd, uint32_t sign_bias)
+{
+	uint64_t ki, ski, t;
+	double_t kd, z, r, r2, y, s;
+
+#if TOINT_INTRINSICS
+#define C __exp2f_data.poly_scaled
+	/* N*x = k + r with r in [-1/2, 1/2] */
+	kd = roundtoint(xd); /* k */
+	ki = converttoint(xd);
+#else
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+	/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
+	kd = eval_as_double(xd + SHIFT);
+	ki = asuint64(kd);
+	kd -= SHIFT; /* k/N */
+#endif
+	r = xd - kd;
+
+	/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+	t = T[ki % N];
+	ski = ki + sign_bias;
+	t += ski << (52 - EXP2F_TABLE_BITS);
+	s = asdouble(t);
+	z = C[0] * r + C[1];
+	r2 = r * r;
+	y = C[2] * r + 1;
+	y = z * r2 + y;
+	y = y * s;
+	return eval_as_float(y);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int checkint(uint32_t iy)
+{
+	int e = iy >> 23 & 0xff;
+	if (e < 0x7f)
+		return 0;
+	if (e > 0x7f + 23)
+		return 2;
+	if (iy & ((1 << (0x7f + 23 - e)) - 1))
+		return 0;
+	if (iy & (1 << (0x7f + 23 - e)))
+		return 1;
+	return 2;
+}
+
+static inline int zeroinfnan(uint32_t ix)
+{
+	return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
+
+float powf(float x, float y)
+{
+	uint32_t sign_bias = 0;
+	uint32_t ix, iy;
+
+	ix = asuint(x);
+	iy = asuint(y);
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
+			  zeroinfnan(iy))) {
+		/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+		if (predict_false(zeroinfnan(iy))) {
+			if (2 * iy == 0)
+				return issignalingf_inline(x) ? x + y : 1.0f;
+			if (ix == 0x3f800000)
+				return issignalingf_inline(y) ? x + y : 1.0f;
+			if (2 * ix > 2u * 0x7f800000 ||
+			    2 * iy > 2u * 0x7f800000)
+				return x + y;
+			if (2 * ix == 2 * 0x3f800000)
+				return 1.0f;
+			if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+				return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+			return y * y;
+		}
+		if (predict_false(zeroinfnan(ix))) {
+			float_t x2 = x * x;
+			if (ix & 0x80000000 && checkint(iy) == 1)
+				x2 = -x2;
+			/* Without the barrier some versions of clang hoist the 1/x2 and
+			   thus division by zero exception can be signaled spuriously.  */
+			return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
+		}
+		/* x and y are non-zero finite.  */
+		if (ix & 0x80000000) {
+			/* Finite x < 0.  */
+			int yint = checkint(iy);
+			if (yint == 0)
+				return __math_invalidf(x);
+			if (yint == 1)
+				sign_bias = SIGN_BIAS;
+			ix &= 0x7fffffff;
+		}
+		if (ix < 0x00800000) {
+			/* Normalize subnormal x so exponent becomes negative.  */
+			ix = asuint(x * 0x1p23f);
+			ix &= 0x7fffffff;
+			ix -= 23 << 23;
+		}
+	}
+	double_t logx = log2_inline(ix);
+	double_t ylogx = y * logx; /* cannot overflow, y is single prec.  */
+	if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
+			  asuint64(126.0 * POWF_SCALE) >> 47)) {
+		/* |y*log(x)| >= 126.  */
+		if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
+			return __math_oflowf(sign_bias);
+		if (ylogx <= -150.0 * POWF_SCALE)
+			return __math_uflowf(sign_bias);
+	}
+	return exp2_inline(ylogx, sign_bias);
+}
--- a/base/glibc-compatibility/musl/powf_data.c
+++ b/base/glibc-compatibility/musl/powf_data.c
@ -0,0 +1,34 @@
+/*
+ * Data definition for powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "powf_data.h"
+
+const struct powf_log2_data __powf_log2_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
+  { 0x1p+0, 0x0p+0 * POWF_SCALE },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
+  },
+  .poly = {
+  0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
+  0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
+  0x1.71547652ab82bp0 * POWF_SCALE,
+  }
+};
--- a/base/glibc-compatibility/musl/powf_data.h
+++ b/base/glibc-compatibility/musl/powf_data.h
@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _POWF_DATA_H
+#define _POWF_DATA_H
+
+#include "libm.h"
+#include "exp2f_data.h"
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+#define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+#define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
+extern hidden const struct powf_log2_data {
+	struct {
+		double invc, logc;
+	} tab[1 << POWF_LOG2_TABLE_BITS];
+	double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data;
+
+#endif
--- a/base/glibc-compatibility/musl/powl.c
+++ b/base/glibc-compatibility/musl/powl.c
@ -0,0 +1,525 @@
+/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
+/*
+ * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*                                                      powl.c
+ *
+ *      Power function, long double precision
+ *
+ *
+ * SYNOPSIS:
+ *
+ * long double x, y, z, powl();
+ *
+ * z = powl( x, y );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Computes x raised to the yth power.  Analytically,
+ *
+ *      x**y  =  exp( y log(x) ).
+ *
+ * Following Cody and Waite, this program uses a lookup table
+ * of 2**-i/32 and pseudo extended precision arithmetic to
+ * obtain several extra bits of accuracy in both the logarithm
+ * and the exponential.
+ *
+ *
+ * ACCURACY:
+ *
+ * The relative error of pow(x,y) can be estimated
+ * by   y dl ln(2),   where dl is the absolute error of
+ * the internally computed base 2 logarithm.  At the ends
+ * of the approximation interval the logarithm equal 1/32
+ * and its relative error is about 1 lsb = 1.1e-19.  Hence
+ * the predicted relative error in the result is 2.3e-21 y .
+ *
+ *                      Relative error:
+ * arithmetic   domain     # trials      peak         rms
+ *
+ *    IEEE     +-1000       40000      2.8e-18      3.7e-19
+ * .001 < x < 1000, with log(x) uniformly distributed.
+ * -1000 < y < 1000, y uniformly distributed.
+ *
+ *    IEEE     0,8700       60000      6.5e-18      1.0e-18
+ * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
+ *
+ *
+ * ERROR MESSAGES:
+ *
+ *   message         condition      value returned
+ * pow overflow     x**y > MAXNUM      INFINITY
+ * pow underflow   x**y < 1/MAXNUM       0.0
+ * pow domain      x<0 and y noninteger  0.0
+ *
+ */
+
+#include "libm.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+long double powl(long double x, long double y)
+{
+	return pow(x, y);
+}
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
+
+/* Table size */
+#define NXT 32
+
+/* log(1+x) =  x - .5x^2 + x^3 *  P(z)/Q(z)
+ * on the domain  2^(-1/32) - 1  <=  x  <=  2^(1/32) - 1
+ */
+static const long double P[] = {
+ 8.3319510773868690346226E-4L,
+ 4.9000050881978028599627E-1L,
+ 1.7500123722550302671919E0L,
+ 1.4000100839971580279335E0L,
+};
+static const long double Q[] = {
+/* 1.0000000000000000000000E0L,*/
+ 5.2500282295834889175431E0L,
+ 8.4000598057587009834666E0L,
+ 4.2000302519914740834728E0L,
+};
+/* A[i] = 2^(-i/32), rounded to IEEE long double precision.
+ * If i is even, A[i] + B[i/2] gives additional accuracy.
+ */
+static const long double A[33] = {
+ 1.0000000000000000000000E0L,
+ 9.7857206208770013448287E-1L,
+ 9.5760328069857364691013E-1L,
+ 9.3708381705514995065011E-1L,
+ 9.1700404320467123175367E-1L,
+ 8.9735453750155359320742E-1L,
+ 8.7812608018664974155474E-1L,
+ 8.5930964906123895780165E-1L,
+ 8.4089641525371454301892E-1L,
+ 8.2287773907698242225554E-1L,
+ 8.0524516597462715409607E-1L,
+ 7.8799042255394324325455E-1L,
+ 7.7110541270397041179298E-1L,
+ 7.5458221379671136985669E-1L,
+ 7.3841307296974965571198E-1L,
+ 7.2259040348852331001267E-1L,
+ 7.0710678118654752438189E-1L,
+ 6.9195494098191597746178E-1L,
+ 6.7712777346844636413344E-1L,
+ 6.6261832157987064729696E-1L,
+ 6.4841977732550483296079E-1L,
+ 6.3452547859586661129850E-1L,
+ 6.2092890603674202431705E-1L,
+ 6.0762367999023443907803E-1L,
+ 5.9460355750136053334378E-1L,
+ 5.8186242938878875689693E-1L,
+ 5.6939431737834582684856E-1L,
+ 5.5719337129794626814472E-1L,
+ 5.4525386633262882960438E-1L,
+ 5.3357020033841180906486E-1L,
+ 5.2213689121370692017331E-1L,
+ 5.1094857432705833910408E-1L,
+ 5.0000000000000000000000E-1L,
+};
+static const long double B[17] = {
+ 0.0000000000000000000000E0L,
+ 2.6176170809902549338711E-20L,
+-1.0126791927256478897086E-20L,
+ 1.3438228172316276937655E-21L,
+ 1.2207982955417546912101E-20L,
+-6.3084814358060867200133E-21L,
+ 1.3164426894366316434230E-20L,
+-1.8527916071632873716786E-20L,
+ 1.8950325588932570796551E-20L,
+ 1.5564775779538780478155E-20L,
+ 6.0859793637556860974380E-21L,
+-2.0208749253662532228949E-20L,
+ 1.4966292219224761844552E-20L,
+ 3.3540909728056476875639E-21L,
+-8.6987564101742849540743E-22L,
+-1.2327176863327626135542E-20L,
+ 0.0000000000000000000000E0L,
+};
+
+/* 2^x = 1 + x P(x),
+ * on the interval -1/32 <= x <= 0
+ */
+static const long double R[] = {
+ 1.5089970579127659901157E-5L,
+ 1.5402715328927013076125E-4L,
+ 1.3333556028915671091390E-3L,
+ 9.6181291046036762031786E-3L,
+ 5.5504108664798463044015E-2L,
+ 2.4022650695910062854352E-1L,
+ 6.9314718055994530931447E-1L,
+};
+
+#define MEXP (NXT*16384.0L)
+/* The following if denormal numbers are supported, else -MEXP: */
+#define MNEXP (-NXT*(16384.0L+64.0L))
+/* log2(e) - 1 */
+#define LOG2EA 0.44269504088896340735992L
+
+#define F W
+#define Fa Wa
+#define Fb Wb
+#define G W
+#define Ga Wa
+#define Gb u
+#define H W
+#define Ha Wb
+#define Hb Wb
+
+static const long double MAXLOGL = 1.1356523406294143949492E4L;
+static const long double MINLOGL = -1.13994985314888605586758E4L;
+static const long double LOGE2L = 6.9314718055994530941723E-1L;
+static const long double huge = 0x1p10000L;
+/* XXX Prevent gcc from erroneously constant folding this. */
+static const volatile long double twom10000 = 0x1p-10000L;
+
+static long double reducl(long double);
+static long double powil(long double, int);
+
+long double __polevll(long double x, const long double *P, int n);
+long double __p1evll(long double x, const long double *P, int n);
+
+long double powl(long double x, long double y)
+{
+	/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
+	int i, nflg, iyflg, yoddint;
+	long e;
+	volatile long double z=0;
+	long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
+
+	/* make sure no invalid exception is raised by nan comparision */
+	if (isnan(x)) {
+		if (!isnan(y) && y == 0.0)
+			return 1.0;
+		return x;
+	}
+	if (isnan(y)) {
+		if (x == 1.0)
+			return 1.0;
+		return y;
+	}
+	if (x == 1.0)
+		return 1.0; /* 1**y = 1, even if y is nan */
+	if (x == -1.0 && !isfinite(y))
+		return 1.0; /* -1**inf = 1 */
+	if (y == 0.0)
+		return 1.0; /* x**0 = 1, even if x is nan */
+	if (y == 1.0)
+		return x;
+	if (y >= LDBL_MAX) {
+		if (x > 1.0 || x < -1.0)
+			return INFINITY;
+		if (x != 0.0)
+			return 0.0;
+	}
+	if (y <= -LDBL_MAX) {
+		if (x > 1.0 || x < -1.0)
+			return 0.0;
+		if (x != 0.0 || y == -INFINITY)
+			return INFINITY;
+	}
+	if (x >= LDBL_MAX) {
+		if (y > 0.0)
+			return INFINITY;
+		return 0.0;
+	}
+
+	w = floorl(y);
+
+	/* Set iyflg to 1 if y is an integer. */
+	iyflg = 0;
+	if (w == y)
+		iyflg = 1;
+
+	/* Test for odd integer y. */
+	yoddint = 0;
+	if (iyflg) {
+		ya = fabsl(y);
+		ya = floorl(0.5 * ya);
+		yb = 0.5 * fabsl(w);
+		if( ya != yb )
+			yoddint = 1;
+	}
+
+	if (x <= -LDBL_MAX) {
+		if (y > 0.0) {
+			if (yoddint)
+				return -INFINITY;
+			return INFINITY;
+		}
+		if (y < 0.0) {
+			if (yoddint)
+				return -0.0;
+			return 0.0;
+		}
+	}
+	nflg = 0; /* (x<0)**(odd int) */
+	if (x <= 0.0) {
+		if (x == 0.0) {
+			if (y < 0.0) {
+				if (signbit(x) && yoddint)
+					/* (-0.0)**(-odd int) = -inf, divbyzero */
+					return -1.0/0.0;
+				/* (+-0.0)**(negative) = inf, divbyzero */
+				return 1.0/0.0;
+			}
+			if (signbit(x) && yoddint)
+				return -0.0;
+			return 0.0;
+		}
+		if (iyflg == 0)
+			return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
+		/* (x<0)**(integer) */
+		if (yoddint)
+			nflg = 1; /* negate result */
+		x = -x;
+	}
+	/* (+integer)**(integer)  */
+	if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
+		w = powil(x, (int)y);
+		return nflg ? -w : w;
+	}
+
+	/* separate significand from exponent */
+	x = frexpl(x, &i);
+	e = i;
+
+	/* find significand in antilog table A[] */
+	i = 1;
+	if (x <= A[17])
+		i = 17;
+	if (x <= A[i+8])
+		i += 8;
+	if (x <= A[i+4])
+		i += 4;
+	if (x <= A[i+2])
+		i += 2;
+	if (x >= A[1])
+		i = -1;
+	i += 1;
+
+	/* Find (x - A[i])/A[i]
+	 * in order to compute log(x/A[i]):
+	 *
+	 * log(x) = log( a x/a ) = log(a) + log(x/a)
+	 *
+	 * log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
+	 */
+	x -= A[i];
+	x -= B[i/2];
+	x /= A[i];
+
+	/* rational approximation for log(1+v):
+	 *
+	 * log(1+v)  =  v  -  v**2/2  +  v**3 P(v) / Q(v)
+	 */
+	z = x*x;
+	w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
+	w = w - 0.5*z;
+
+	/* Convert to base 2 logarithm:
+	 * multiply by log2(e) = 1 + LOG2EA
+	 */
+	z = LOG2EA * w;
+	z += w;
+	z += LOG2EA * x;
+	z += x;
+
+	/* Compute exponent term of the base 2 logarithm. */
+	w = -i;
+	w /= NXT;
+	w += e;
+	/* Now base 2 log of x is w + z. */
+
+	/* Multiply base 2 log by y, in extended precision. */
+
+	/* separate y into large part ya
+	 * and small part yb less than 1/NXT
+	 */
+	ya = reducl(y);
+	yb = y - ya;
+
+	/* (w+z)(ya+yb)
+	 * = w*ya + w*yb + z*y
+	 */
+	F = z * y  +  w * yb;
+	Fa = reducl(F);
+	Fb = F - Fa;
+
+	G = Fa + w * ya;
+	Ga = reducl(G);
+	Gb = G - Ga;
+
+	H = Fb + Gb;
+	Ha = reducl(H);
+	w = (Ga + Ha) * NXT;
+
+	/* Test the power of 2 for overflow */
+	if (w > MEXP)
+		return huge * huge;  /* overflow */
+	if (w < MNEXP)
+		return twom10000 * twom10000;  /* underflow */
+
+	e = w;
+	Hb = H - Ha;
+
+	if (Hb > 0.0) {
+		e += 1;
+		Hb -= 1.0/NXT;  /*0.0625L;*/
+	}
+
+	/* Now the product y * log2(x)  =  Hb + e/NXT.
+	 *
+	 * Compute base 2 exponential of Hb,
+	 * where -0.0625 <= Hb <= 0.
+	 */
+	z = Hb * __polevll(Hb, R, 6);  /*  z = 2**Hb - 1  */
+
+	/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
+	 * Find lookup table entry for the fractional power of 2.
+	 */
+	if (e < 0)
+		i = 0;
+	else
+		i = 1;
+	i = e/NXT + i;
+	e = NXT*i - e;
+	w = A[e];
+	z = w * z;  /*  2**-e * ( 1 + (2**Hb-1) )  */
+	z = z + w;
+	z = scalbnl(z, i);  /* multiply by integer power of 2 */
+
+	if (nflg)
+		z = -z;
+	return z;
+}
+
+
+/* Find a multiple of 1/NXT that is within 1/NXT of x. */
+static long double reducl(long double x)
+{
+	long double t;
+
+	t = x * NXT;
+	t = floorl(t);
+	t = t / NXT;
+	return t;
+}
+
+/*
+ *      Positive real raised to integer power, long double precision
+ *
+ *
+ * SYNOPSIS:
+ *
+ * long double x, y, powil();
+ * int n;
+ *
+ * y = powil( x, n );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns argument x>0 raised to the nth power.
+ * The routine efficiently decomposes n as a sum of powers of
+ * two. The desired power is a product of two-to-the-kth
+ * powers of x.  Thus to compute the 32767 power of x requires
+ * 28 multiplications instead of 32767 multiplications.
+ *
+ *
+ * ACCURACY:
+ *
+ *                      Relative error:
+ * arithmetic   x domain   n domain  # trials      peak         rms
+ *    IEEE     .001,1000  -1022,1023  50000       4.3e-17     7.8e-18
+ *    IEEE        1,2     -1022,1023  20000       3.9e-17     7.6e-18
+ *    IEEE     .99,1.01     0,8700    10000       3.6e-16     7.2e-17
+ *
+ * Returns MAXNUM on overflow, zero on underflow.
+ */
+
+static long double powil(long double x, int nn)
+{
+	long double ww, y;
+	long double s;
+	int n, e, sign, lx;
+
+	if (nn == 0)
+		return 1.0;
+
+	if (nn < 0) {
+		sign = -1;
+		n = -nn;
+	} else {
+		sign = 1;
+		n = nn;
+	}
+
+	/* Overflow detection */
+
+	/* Calculate approximate logarithm of answer */
+	s = x;
+	s = frexpl( s, &lx);
+	e = (lx - 1)*n;
+	if ((e == 0) || (e > 64) || (e < -64)) {
+		s = (s - 7.0710678118654752e-1L) / (s +  7.0710678118654752e-1L);
+		s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
+	} else {
+		s = LOGE2L * e;
+	}
+
+	if (s > MAXLOGL)
+		return huge * huge;  /* overflow */
+
+	if (s < MINLOGL)
+		return twom10000 * twom10000;  /* underflow */
+	/* Handle tiny denormal answer, but with less accuracy
+	 * since roundoff error in 1.0/x will be amplified.
+	 * The precise demarcation should be the gradual underflow threshold.
+	 */
+	if (s < -MAXLOGL+2.0) {
+		x = 1.0/x;
+		sign = -sign;
+	}
+
+	/* First bit of the power */
+	if (n & 1)
+		y = x;
+	else
+		y = 1.0;
+
+	ww = x;
+	n >>= 1;
+	while (n) {
+		ww = ww * ww;   /* arg to the 2-to-the-kth power */
+		if (n & 1)     /* if that bit is set, then include in product */
+			y *= ww;
+		n >>= 1;
+	}
+
+	if (sign < 0)
+		y = 1.0/y;
+	return y;
+}
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
+// TODO: broken implementation to make things compile
+long double powl(long double x, long double y)
+{
+	return pow(x, y);
+}
+#endif
--- a/cmake/find/parquet.cmake
+++ b/cmake/find/parquet.cmake
@ -141,11 +141,6 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
    else()
    set(USE_INTERNAL_PARQUET_LIBRARY 1)

-    if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
-        set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src")
-        set(PARQUET_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" ${ClickHouse_BINARY_DIR}/contrib/arrow/cpp/src)
-    endif()
-
    if(MAKE_STATIC_LIBRARIES)
        set(FLATBUFFERS_LIBRARY flatbuffers)
        set(ARROW_LIBRARY arrow_static)
@ -155,9 +150,6 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
        set(FLATBUFFERS_LIBRARY flatbuffers_shared)
        set(ARROW_LIBRARY arrow_shared)
        set(PARQUET_LIBRARY parquet_shared)
-        if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
-            list(APPEND PARQUET_LIBRARY boost::regex)
-        endif()
        set(THRIFT_LIBRARY thrift)
    endif()

--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@ -163,51 +163,21 @@ if(USE_INTERNAL_SNAPPY_LIBRARY)
 endif()

 if (USE_INTERNAL_PARQUET_LIBRARY)
-if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
    # We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time
-    # But this mode can be used for updating auto-generated parquet files:
-    # cmake -DUSE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE=1 -DUSE_STATIC_LIBRARIES=0
-    # copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> /contrib/arrow-cmake/cpp/src/parquet/
+    # But you can update auto-generated parquet files manually:
+    # cd {BUILD_DIR}/contrib/arrow/cpp/src/parquet && mkdir -p build && cd build
+    # cmake .. -DARROW_COMPUTE=ON -DARROW_PARQUET=ON -DARROW_SIMD_LEVEL=NONE -DARROW_VERBOSE_THIRDPARTY_BUILD=ON
+    #          -DARROW_BUILD_SHARED=1 -DARROW_BUILD_UTILITIES=OFF -DARROW_BUILD_INTEGRATION=OFF
+    #          -DBoost_FOUND=1 -DARROW_TEST_LINKAGE="shared"
+    # make -j8
+    # copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> {BUILD_DIR}/contrib/arrow-cmake/cpp/src/parquet/

    # Also useful parquet reader:
-    # cd contrib/arrow/cpp/build && mkdir -p build && cmake .. -DPARQUET_BUILD_EXECUTABLES=1 && make -j8
-    # contrib/arrow/cpp/build/debug/parquet-reader some_file.parquet
+    # cd {BUILD_DIR}/contrib/arrow/cpp && mkdir -p build && cd build
+    # cmake .. -DARROW_PARQUET=1 -DARROW_WITH_SNAPPY=1 -DPARQUET_BUILD_EXECUTABLES=1
+    # make -j8
+    # {BUILD_DIR}/contrib/arrow/cpp/build/release/parquet-reader some_file.parquet

-    set (ARROW_COMPUTE ON CACHE INTERNAL "")
-    set (ARROW_PARQUET ON CACHE INTERNAL "")
-    set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
-    set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
-    set (ARROW_BUILD_UTILITIES OFF CACHE INTERNAL "")
-    set (ARROW_BUILD_INTEGRATION OFF CACHE INTERNAL "")
-    set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
-    set (Boost_FOUND 1 CACHE INTERNAL "")
-    if (MAKE_STATIC_LIBRARIES)
-        set (PARQUET_ARROW_LINKAGE "static" CACHE INTERNAL "")
-        set (ARROW_TEST_LINKAGE "static" CACHE INTERNAL "")
-        set (ARROW_BUILD_STATIC ${MAKE_STATIC_LIBRARIES} CACHE INTERNAL "")
-    else ()
-        set (PARQUET_ARROW_LINKAGE "shared" CACHE INTERNAL "")
-        set (ARROW_TEST_LINKAGE "shared" CACHE INTERNAL "")
-    endif ()
-
-    if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
-        set (_save_build_type ${CMAKE_BUILD_TYPE})
-        set (CMAKE_BUILD_TYPE Release)
-        string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
-    endif ()
-
-    # Because Arrow uses CMAKE_SOURCE_DIR as a project path
-    # Hopefully will be fixed in https://github.com/apache/arrow/pull/2676
-    set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/cmake_modules")
-    add_subdirectory (arrow/cpp)
-
-    if (_save_build_type)
-        set (CMAKE_BUILD_TYPE ${_save_build_type})
-        unset (_save_build_type)
-        string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
-    endif ()
-
-else()
    add_subdirectory(arrow-cmake)

    # The library is large - avoid bloat.
@ -215,7 +185,6 @@ else()
    target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
    target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
 endif()
-endif()

 if (USE_INTERNAL_AVRO_LIBRARY)
    add_subdirectory(avro-cmake)
--- a/contrib/arrow
+++ b/contrib/arrow
@ -1 +1 @@
-Subproject commit 3cbcb7b62c2f2d02851bff837758637eb592a64b
+Subproject commit 744bdfe188f018e5e05f5deebd4e9ee0a7706cf4
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@ -144,15 +144,16 @@ set(ORC_SRCS

 set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow)

-configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/util/config.h")
+configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")

 # arrow/cpp/src/arrow/CMakeLists.txt
 set(ARROW_SRCS
-        ${LIBRARY_DIR}/array.cc
        ${LIBRARY_DIR}/buffer.cc
-        ${LIBRARY_DIR}/device.cc
        ${LIBRARY_DIR}/builder.cc
+        ${LIBRARY_DIR}/chunked_array.cc
        ${LIBRARY_DIR}/compare.cc
+        ${LIBRARY_DIR}/datum.cc
+        ${LIBRARY_DIR}/device.cc
        ${LIBRARY_DIR}/extension_type.cc
        ${LIBRARY_DIR}/memory_pool.cc
        ${LIBRARY_DIR}/pretty_print.cc
@ -167,11 +168,12 @@ set(ARROW_SRCS
        ${LIBRARY_DIR}/type.cc
        ${LIBRARY_DIR}/visitor.cc

-        ${LIBRARY_DIR}/tensor/coo_converter.cc
-        ${LIBRARY_DIR}/tensor/csc_converter.cc
-        ${LIBRARY_DIR}/tensor/csf_converter.cc
-        ${LIBRARY_DIR}/tensor/csr_converter.cc
-
+        ${LIBRARY_DIR}/array/array_base.cc
+        ${LIBRARY_DIR}/array/array_binary.cc
+        ${LIBRARY_DIR}/array/array_decimal.cc
+        ${LIBRARY_DIR}/array/array_dict.cc
+        ${LIBRARY_DIR}/array/array_nested.cc
+        ${LIBRARY_DIR}/array/array_primitive.cc
        ${LIBRARY_DIR}/array/builder_adaptive.cc
        ${LIBRARY_DIR}/array/builder_base.cc
        ${LIBRARY_DIR}/array/builder_binary.cc
@ -181,17 +183,50 @@ set(ARROW_SRCS
        ${LIBRARY_DIR}/array/builder_primitive.cc
        ${LIBRARY_DIR}/array/builder_union.cc
        ${LIBRARY_DIR}/array/concatenate.cc
-        ${LIBRARY_DIR}/array/dict_internal.cc
+        ${LIBRARY_DIR}/array/data.cc
        ${LIBRARY_DIR}/array/diff.cc
+        ${LIBRARY_DIR}/array/util.cc
        ${LIBRARY_DIR}/array/validate.cc

-        ${LIBRARY_DIR}/csv/converter.cc
+        ${LIBRARY_DIR}/compute/api_scalar.cc
+        ${LIBRARY_DIR}/compute/api_vector.cc
+        ${LIBRARY_DIR}/compute/cast.cc
+        ${LIBRARY_DIR}/compute/exec.cc
+        ${LIBRARY_DIR}/compute/function.cc
+        ${LIBRARY_DIR}/compute/kernel.cc
+        ${LIBRARY_DIR}/compute/registry.cc
+
+        ${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc
+        ${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc
+        ${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc
+        ${LIBRARY_DIR}/compute/kernels/codegen_internal.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_compare.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_nested.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_string.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_validity.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_hash.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_nested.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_selection.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_sort.cc
+        ${LIBRARY_DIR}/compute/kernels/util_internal.cc
+
        ${LIBRARY_DIR}/csv/chunker.cc
        ${LIBRARY_DIR}/csv/column_builder.cc
+        ${LIBRARY_DIR}/csv/column_decoder.cc
+        ${LIBRARY_DIR}/csv/converter.cc
        ${LIBRARY_DIR}/csv/options.cc
        ${LIBRARY_DIR}/csv/parser.cc
        ${LIBRARY_DIR}/csv/reader.cc
-        ${LIBRARY_DIR}/csv/column_decoder.cc

        ${LIBRARY_DIR}/ipc/dictionary.cc
        ${LIBRARY_DIR}/ipc/feather.cc
@ -202,14 +237,25 @@ set(ARROW_SRCS
        ${LIBRARY_DIR}/ipc/writer.cc

        ${LIBRARY_DIR}/io/buffered.cc
+        ${LIBRARY_DIR}/io/caching.cc
        ${LIBRARY_DIR}/io/compressed.cc
        ${LIBRARY_DIR}/io/file.cc
        ${LIBRARY_DIR}/io/interfaces.cc
        ${LIBRARY_DIR}/io/memory.cc
        ${LIBRARY_DIR}/io/slow.cc

+        ${LIBRARY_DIR}/tensor/coo_converter.cc
+        ${LIBRARY_DIR}/tensor/csf_converter.cc
+        ${LIBRARY_DIR}/tensor/csx_converter.cc
+
        ${LIBRARY_DIR}/util/basic_decimal.cc
+        ${LIBRARY_DIR}/util/bit_block_counter.cc
+        ${LIBRARY_DIR}/util/bit_run_reader.cc
        ${LIBRARY_DIR}/util/bit_util.cc
+        ${LIBRARY_DIR}/util/bitmap.cc
+        ${LIBRARY_DIR}/util/bitmap_builders.cc
+        ${LIBRARY_DIR}/util/bitmap_ops.cc
+        ${LIBRARY_DIR}/util/bpacking.cc
        ${LIBRARY_DIR}/util/compression.cc
        ${LIBRARY_DIR}/util/compression_lz4.cc
        ${LIBRARY_DIR}/util/compression_snappy.cc
@ -217,8 +263,12 @@ set(ARROW_SRCS
        ${LIBRARY_DIR}/util/compression_zstd.cc
        ${LIBRARY_DIR}/util/cpu_info.cc
        ${LIBRARY_DIR}/util/decimal.cc
+        ${LIBRARY_DIR}/util/delimiting.cc
+        ${LIBRARY_DIR}/util/formatting.cc
+        ${LIBRARY_DIR}/util/future.cc
        ${LIBRARY_DIR}/util/int_util.cc
        ${LIBRARY_DIR}/util/io_util.cc
+        ${LIBRARY_DIR}/util/iterator.cc
        ${LIBRARY_DIR}/util/key_value_metadata.cc
        ${LIBRARY_DIR}/util/logging.cc
        ${LIBRARY_DIR}/util/memory.cc
@ -226,27 +276,15 @@ set(ARROW_SRCS
        ${LIBRARY_DIR}/util/string.cc
        ${LIBRARY_DIR}/util/task_group.cc
        ${LIBRARY_DIR}/util/thread_pool.cc
+        ${LIBRARY_DIR}/util/time.cc
        ${LIBRARY_DIR}/util/trie.cc
        ${LIBRARY_DIR}/util/utf8.cc
-        ${LIBRARY_DIR}/util/future.cc
-        ${LIBRARY_DIR}/util/formatting.cc
-        ${LIBRARY_DIR}/util/parsing.cc
-        ${LIBRARY_DIR}/util/time.cc
-        ${LIBRARY_DIR}/util/delimiting.cc
-        ${LIBRARY_DIR}/util/iterator.cc
+        ${LIBRARY_DIR}/util/value_parsing.cc

        ${LIBRARY_DIR}/vendored/base64.cpp
        ${ORC_SRCS}
        )

-set(ARROW_SRCS ${ARROW_SRCS}
-        ${LIBRARY_DIR}/compute/context.cc
-        ${LIBRARY_DIR}/compute/kernels/boolean.cc
-        ${LIBRARY_DIR}/compute/kernels/cast.cc
-        ${LIBRARY_DIR}/compute/kernels/hash.cc
-        ${LIBRARY_DIR}/compute/kernels/util_internal.cc
-        )
-
 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
    set(ARROW_WITH_SNAPPY 1)
 endif ()
@ -289,7 +327,8 @@ if (USE_INTERNAL_PROTOBUF_LIBRARY)
    add_dependencies(${ARROW_LIBRARY} protoc)
 endif ()

-target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
+target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src)
+target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/cpp/src)
 target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
 target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
 if (ARROW_WITH_SNAPPY)
@ -319,19 +358,26 @@ set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
 set(GEN_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated)
 # arrow/cpp/src/parquet/CMakeLists.txt
 set(PARQUET_SRCS
+        ${LIBRARY_DIR}/arrow/path_internal.cc
        ${LIBRARY_DIR}/arrow/reader.cc
        ${LIBRARY_DIR}/arrow/reader_internal.cc
        ${LIBRARY_DIR}/arrow/schema.cc
+        ${LIBRARY_DIR}/arrow/schema_internal.cc
        ${LIBRARY_DIR}/arrow/writer.cc
-        ${LIBRARY_DIR}/arrow/path_internal.cc
        ${LIBRARY_DIR}/bloom_filter.cc
        ${LIBRARY_DIR}/column_reader.cc
        ${LIBRARY_DIR}/column_scanner.cc
        ${LIBRARY_DIR}/column_writer.cc
        ${LIBRARY_DIR}/deprecated_io.cc
        ${LIBRARY_DIR}/encoding.cc
+        ${LIBRARY_DIR}/encryption.cc
+        ${LIBRARY_DIR}/encryption_internal.cc
        ${LIBRARY_DIR}/file_reader.cc
        ${LIBRARY_DIR}/file_writer.cc
+        ${LIBRARY_DIR}/internal_file_decryptor.cc
+        ${LIBRARY_DIR}/internal_file_encryptor.cc
+        ${LIBRARY_DIR}/level_conversion.cc
+        ${LIBRARY_DIR}/level_comparison.cc
        ${LIBRARY_DIR}/metadata.cc
        ${LIBRARY_DIR}/murmur3.cc
        ${LIBRARY_DIR}/platform.cc
@ -340,10 +386,6 @@ set(PARQUET_SRCS
        ${LIBRARY_DIR}/schema.cc
        ${LIBRARY_DIR}/statistics.cc
        ${LIBRARY_DIR}/types.cc
-        ${LIBRARY_DIR}/encryption.cc
-        ${LIBRARY_DIR}/encryption_internal.cc
-        ${LIBRARY_DIR}/internal_file_decryptor.cc
-        ${LIBRARY_DIR}/internal_file_encryptor.cc

        ${GEN_LIBRARY_DIR}/parquet_constants.cpp
        ${GEN_LIBRARY_DIR}/parquet_types.cpp
--- a/contrib/arrow-cmake/cpp/src/arrow/util/config.h
+++ b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#define ARROW_VERSION_MAJOR 
-#define ARROW_VERSION_MINOR 
-#define ARROW_VERSION_PATCH 
-#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
-
-#define ARROW_SO_VERSION ""
-#define ARROW_FULL_SO_VERSION ""
-
-/* #undef GRPCPP_PP_INCLUDE */
--- a/contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
+++ b/contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
@ -22,8 +22,8 @@
 #define PARQUET_VERSION_MINOR 5
 #define PARQUET_VERSION_PATCH 1

-#define PARQUET_SO_VERSION 0
-#define PARQUET_FULL_SO_VERSION 0.17
+#define PARQUET_SO_VERSION "200"
+#define PARQUET_FULL_SO_VERSION "200.0.0"

 // define the parquet created by version
 #define CREATED_BY_VERSION "parquet-cpp version 1.5.1-SNAPSHOT"
--- a/contrib/jemalloc
+++ b/contrib/jemalloc
@ -1 +1 @@
-Subproject commit 93e27e435cac846028da20cd9b0841fbc9110bd2
+Subproject commit e6891d9746143bf2cf617493d880ba5a0b9a3efd
--- a/debian/clickhouse-test.install
+++ b/debian/clickhouse-test.install
@ -1,5 +1,2 @@
 usr/bin/clickhouse-test
-usr/bin/clickhouse-test-server
 usr/share/clickhouse-test/*
-etc/clickhouse-client/client-test.xml
-etc/clickhouse-server/server-test.xml
--- a/debian/rules
+++ b/debian/rules
@ -93,7 +93,7 @@ override_dh_auto_build:

 override_dh_auto_test:
 ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server
+	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V
 endif

 override_dh_clean:
--- a/docker/server/alpine-build.sh
+++ b/docker/server/alpine-build.sh
@ -47,13 +47,13 @@ cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh"      "${CONTAINER_ROOT_FOLDER}/
 ## get glibc components from ubuntu 20.04 and put them to expected place
 docker pull ubuntu:20.04
 ubuntu20image=$(docker create --rm ubuntu:20.04)
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libc.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libdl.so.2      "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libm.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/librt.so.1      "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2      "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1      "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"

-docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
+docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
--- a/docker/server/entrypoint.alpine.sh
+++ b/docker/server/entrypoint.alpine.sh
@ -26,17 +26,17 @@ fi
 CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"

 # port is needed to check if clickhouse-server is ready for connections
-HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
+HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"

 # get CH directories locations
-DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
-TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
-USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
-LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
-LOG_DIR="$(dirname $LOG_PATH || true)"
-ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
-ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
-FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
+DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
+TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
+USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
+LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
+LOG_DIR="$(dirname "${LOG_PATH}" || true)"
+ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
+ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
+FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"

 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
 CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@ -92,7 +92,7 @@ fi

 if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
    # Listen only on localhost until the initialization is done
-    $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
+    $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
    pid="$!"

    # check if clickhouse is ready to accept connections
@ -107,7 +107,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
        sleep 1
    done

-    if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
+    if [ -n "$CLICKHOUSE_PASSWORD" ]; then
        printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
    fi

@ -130,7 +130,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
                    . "$f"
                fi
                ;;
-            *.sql)    echo "$0: running $f"; cat "$f" | "$clickhouseclient" ; echo ;;
+            *.sql)    echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
            *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
            *)        echo "$0: ignoring $f" ;;
        esac
@ -145,7 +145,7 @@ fi

 # if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
 if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
-    exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
+    exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
 fi

 # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
--- a/docs/_description_templates/template-server-setting.md
+++ b/docs/_description_templates/template-server-setting.md
@ -0,0 +1,33 @@
+## server_setting_name {#server_setting_name}
+
+Description.
+
+Describe what is configured in this section of settings.
+
+Possible value: ...
+
+Default value: ...
+
+Settings: (Optional)
+
+If the section contains several settings, list them here. Specify possible values and default values:
+
+-   setting_1 — Description.
+-   setting_2 — Description.
+
+**Example:**
+
+```xml
+<server_setting_name>
+    <setting_1> ... </setting_1>
+    <setting_2> ... </setting_2>
+</server_setting_name>
+```
+
+**Additional Info** (Optional)
+
+The name of an additional section can be any, for example, **Usage**.
+
+**See Also** (Optional)
+
+-   [link](#)
--- a/docs/en/getting-started/example-datasets/index.md
+++ b/docs/en/getting-started/example-datasets/index.md
@ -12,6 +12,7 @@ The list of documented datasets:

 -   [GitHub Events](../../getting-started/example-datasets/github-events.md)
 -   [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
+-   [Recipes](../../getting-started/example-datasets/recipes.md)
 -   [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
 -   [WikiStat](../../getting-started/example-datasets/wikistat.md)
 -   [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@ -324,7 +324,7 @@ Consider the table:
 CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
 ```

-When the `input_format_tsv_enum_as_number` setting is enabled:  
+When the `input_format_tsv_enum_as_number` setting is enabled:

 ```sql
 SET input_format_tsv_enum_as_number = 1;
@ -1248,7 +1248,7 @@ Consider the table:
 CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
 ```

-When the `input_format_csv_enum_as_number` setting is enabled:  
+When the `input_format_csv_enum_as_number` setting is enabled:

 ```sql
 SET input_format_csv_enum_as_number = 1;
@ -1841,7 +1841,7 @@ Default value: 0.

 Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.

-By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in asynchronous mode. When `insert_distributed_sync=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true). 
+By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in asynchronous mode. When `insert_distributed_sync=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).

 Possible values:

@ -2109,8 +2109,8 @@ Enables [ORDER BY](../../sql-reference/statements/select/order-by.md#optimize_re

 Possible values:

-   0 — `ORDER BY` optimization is disabled. 
-   1 — `ORDER BY` optimization is enabled. 
+-   0 — `ORDER BY` optimization is disabled.
+-   1 — `ORDER BY` optimization is enabled.

 Default value: `1`.

@ -2124,8 +2124,8 @@ Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql

 Possible values:

-   0 - Mutations execute asynchronously. 
-   1 - The query waits for all mutations to complete on the current server. 
+-   0 - Mutations execute asynchronously.
+-   1 - The query waits for all mutations to complete on the current server.
 -   2 - The query waits for all mutations to complete on all replicas (if they exist).

 Default value: `0`.
@ -2137,11 +2137,11 @@ Default value: `0`.

 ## ttl_only_drop_parts {#ttl_only_drop_parts}

-Enables or disables complete dropping of data parts where all rows are expired in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. 
+Enables or disables complete dropping of data parts where all rows are expired in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.

-When `ttl_only_drop_parts` is disabled (by default), the ClickHouse server only deletes expired rows according to their TTL. 
+When `ttl_only_drop_parts` is disabled (by default), the ClickHouse server only deletes expired rows according to their TTL.

-When `ttl_only_drop_parts` is enabled, the ClickHouse server drops a whole part when all rows in it are expired. 
+When `ttl_only_drop_parts` is enabled, the ClickHouse server drops a whole part when all rows in it are expired.

 Dropping whole parts instead of partial cleaning TTL-d rows allows having shorter `merge_with_ttl_timeout` times and lower impact on system performance.

@ -2152,14 +2152,14 @@ Possible values:

 Default value: `0`.

-**See Also** 
+**See Also**

 -   [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) (`merge_with_ttl_timeout` setting)
 -   [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl)

 ## lock_acquire_timeout {#lock_acquire_timeout}

-Defines how many seconds a locking request waits before failing. 
+Defines how many seconds a locking request waits before failing.

 Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`.

@ -2179,11 +2179,11 @@ When the setting is enabled and the argument of `CAST` function is `Nullable`, t
 Possible values:

 -  0 — The `CAST` result has exactly the destination type specified.
-  1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`. 
+-  1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`.

 Default value: `0`.

-**Examples** 
+**Examples**

 The following query results in the destination data type exactly:

@ -2215,17 +2215,17 @@ Result:
 └───┴───────────────────────────────────────────────────┘
 ```

-**See Also** 
+**See Also**

 -   [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function

 ## output_format_pretty_max_value_width {#output_format_pretty_max_value_width}

-Limits the width of value displayed in [Pretty](../../interfaces/formats.md#pretty) formats. If the value width exceeds the limit, the value is cut. 
+Limits the width of value displayed in [Pretty](../../interfaces/formats.md#pretty) formats. If the value width exceeds the limit, the value is cut.

 Possible values:

-   Positive integer. 
+-   Positive integer.
 -   0 — The value is cut completely.

 Default value: `10000` symbols.
@ -2352,7 +2352,7 @@ Default value: `0`.

 ## persistent {#persistent}

-Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines. 
+Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines.

 Reduces the I/O overhead. Suitable for scenarios that pursue performance and do not require persistence.

@ -2426,7 +2426,7 @@ Result:
 [
 {"number":"0"},
 {"number":"1"},
-{"number":"2"}                                                                                                                                                                                  
+{"number":"2"}
 ]
 ```

@ -2447,7 +2447,6 @@ Result:
 {"number":"2"}
 ```

-=======
 ## allow_nullable_key {#allow-nullable-key}

 Allows using of the [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable)-typed values in a sorting and a primary key for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) tables.
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@ -29,12 +29,12 @@ These actions are described in detail below.
 ## ADD COLUMN {#alter_add-column}

 ``` sql
-ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
+ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
 ```

 Adds a new column to the table with the specified `name`, `type`, [`codec`](../../../sql-reference/statements/create/table.md#codecs) and `default_expr` (see the section [Default expressions](../../../sql-reference/statements/create/table.md#create-default-values)).

-If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. Otherwise, the column is added to the end of the table. Note that there is no way to add a column to the beginning of a table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
+If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.

 Adding a column just changes the table structure, without performing any actions with data. The data doesn’t appear on the disk after `ALTER`. If the data is missing for a column when reading from the table, it is filled in with default values (by performing the default expression if there is one, or using zeros or empty strings). The column appears on the disk after merging data parts (see [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)).

@ -43,9 +43,24 @@ This approach allows us to complete the `ALTER` query instantly, without increas
 Example:

 ``` sql
-ALTER TABLE visits ADD COLUMN browser String AFTER user_id
+ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST;
+ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn;
+ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop;
+DESC alter_test FORMAT TSV;
 ```

+``` text
+Added1  UInt32
+CounterID       UInt32
+StartDate       Date
+UserID  UInt32
+VisitID UInt32
+NestedColumn.A  Array(UInt8)
+NestedColumn.S  Array(String)
+Added2  UInt32
+ToDrop  UInt32
+Added3  UInt32
+```
 ## DROP COLUMN {#alter_drop-column}

 ``` sql
@ -99,7 +114,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for
 ## MODIFY COLUMN {#alter_modify-column}

 ``` sql
-MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]
+MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
 ```

 This query changes the `name` column properties:
@ -114,6 +129,8 @@ This query changes the `name` column properties:

 If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist.

+The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description.
+
 When changing the type, values are converted as if the [toType](../../../sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query doesn’t do anything complex, and is completed almost instantly.

 Example:
@ -124,15 +141,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)

 Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time.

-There are several processing stages:
-
-   Preparing temporary (new) files with modified data.
-   Renaming old files.
-   Renaming the temporary (new) files to the old names.
-   Deleting the old files.
-
-Only the first stage takes time. If there is a failure at this stage, the data is not changed.
-If there is a failure during one of the successive stages, data can be restored manually. The exception is if the old files were deleted from the file system but the data for the new files did not get written to the disk and was lost.
+The `ALTER` query is atomic. For MergeTree tables it is also lock-free.

 The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.

--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@ -9,7 +9,6 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT

 Поддерживаемые форматы и возможность использовать их в запросах `INSERT` и `SELECT` перечислены в таблице ниже.

-=======
 | Формат                                                                                  | INSERT | SELECT |
 |-----------------------------------------------------------------------------------------|--------|--------|
 | [TabSeparated](#tabseparated)                                                           | ✔     | ✔      |
@ -1252,7 +1251,7 @@ SELECT * FROM line_as_string;

 ## RawBLOB {#rawblob}

-В этом формате все входные данные считываются в одно значение. Парсить можно только таблицу с одним полем типа [String](../sql-reference/data-types/string.md) или подобным ему. 
+В этом формате все входные данные считываются в одно значение. Парсить можно только таблицу с одним полем типа [String](../sql-reference/data-types/string.md) или подобным ему.
 Результат выводится в бинарном виде без разделителей и экранирования. При выводе более одного значения формат неоднозначен и будет невозможно прочитать данные снова.

 Ниже приведено сравнение форматов `RawBLOB` и [TabSeparatedRaw](#tabseparatedraw).
@ -1272,7 +1271,7 @@ SELECT * FROM line_as_string;
 -   строки представлены как длина в формате varint (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), а затем байты строки.

 При передаче на вход `RawBLOB` пустых данных, ClickHouse бросает исключение:
- 
+
 ``` text
 Code: 108. DB::Exception: No data to insert
 ```
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@ -18,12 +18,12 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
 ## ADD COLUMN {#alter_add-column}

 ``` sql
-ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
+ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
 ```

 Добавляет в таблицу новый столбец с именем `name`, типом `type`, [кодеком](../create/table.md#codecs) `codec` и выражением для умолчания `default_expr` (смотрите раздел [Значения по умолчанию](../create/index.md#create-default-values)).

-Если указано `IF NOT EXISTS`, запрос не будет возвращать ошибку, если столбец уже существует. Если указано `AFTER name_after` (имя другого столбца), то столбец добавляется (в список столбцов таблицы) после указанного. Иначе, столбец добавляется в конец таблицы. Обратите внимание, ClickHouse не позволяет добавлять столбцы в начало таблицы. Для цепочки действий, `name_after` может быть именем столбца, который добавляется в одном из предыдущих действий.
+Если указано `IF NOT EXISTS`, запрос не будет возвращать ошибку, если столбец уже существует. Если указано `AFTER name_after` (имя другого столбца), то столбец добавляется (в список столбцов таблицы) после указанного. Если вы хотите добавить столбец в начало таблицы, используйте `FIRST`. Иначе столбец добавляется в конец таблицы. Для цепочки действий `name_after` может быть именем столбца, который добавляется в одном из предыдущих действий.

 Добавление столбца всего лишь меняет структуру таблицы, и не производит никаких действий с данными - соответствующие данные не появляются на диске после ALTER-а. При чтении из таблицы, если для какого-либо столбца отсутствуют данные, то он заполняется значениями по умолчанию (выполняя выражение по умолчанию, если такое есть, или нулями, пустыми строками). Также, столбец появляется на диске при слиянии кусков данных (см. [MergeTree](../../../sql-reference/statements/alter/index.md)).

@ -32,7 +32,23 @@ ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
 Пример:

 ``` sql
-ALTER TABLE visits ADD COLUMN browser String AFTER user_id
+ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST;
+ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn;
+ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop;
+DESC alter_test FORMAT TSV;
+```
+
+``` text
+Added1  UInt32
+CounterID       UInt32
+StartDate       Date
+UserID  UInt32
+VisitID UInt32
+NestedColumn.A  Array(UInt8)
+NestedColumn.S  Array(String)
+Added2  UInt32
+ToDrop  UInt32
+Added3  UInt32
 ```

 ## DROP COLUMN {#alter_drop-column}
@ -88,7 +104,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'Столбец показывает,
 ## MODIFY COLUMN {#alter_modify-column}

 ``` sql
-MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]
+MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
 ```

 Запрос изменяет следующие свойства столбца `name`:
@ -103,6 +119,8 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]

 Если указано `IF EXISTS`, запрос не возвращает ошибку, если столбца не существует.

+Запрос также может изменять порядок столбцов при помощи `FIRST | AFTER`, смотрите описание [ADD COLUMN](#alter_add-column).
+
 При изменении типа, значения преобразуются так, как если бы к ним была применена функция [toType](../../../sql-reference/statements/alter/index.md). Если изменяется только выражение для умолчания, запрос не делает никакой сложной работы и выполняется мгновенно.

 Пример запроса:
@ -113,15 +131,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)

 Изменение типа столбца - это единственное действие, которое выполняет сложную работу - меняет содержимое файлов с данными. Для больших таблиц, выполнение может занять длительное время.

-Выполнение производится в несколько стадий:
-
-   подготовка временных (новых) файлов с изменёнными данными;
-   переименование старых файлов;
-   переименование временных (новых) файлов в старые;
-   удаление старых файлов.
-
-Из них, длительной является только первая стадия. Если на этой стадии возникнет сбой, то данные не поменяются.
-Если на одной из следующих стадий возникнет сбой, то данные будет можно восстановить вручную. За исключением случаев, когда старые файлы удалены из файловой системы, а данные для новых файлов не доехали на диск и потеряны.
+Выполнение запроса ALTER атомарно.

 Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.

@ -137,4 +147,4 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)

 Для таблиц, которые не хранят данные самостоятельно (типа [Merge](../../../sql-reference/statements/alter/index.md) и [Distributed](../../../sql-reference/statements/alter/index.md)), `ALTER` всего лишь меняет структуру таблицы, но не меняет структуру подчинённых таблиц. Для примера, при ALTER-е таблицы типа `Distributed`, вам также потребуется выполнить запрос `ALTER` для таблиц на всех удалённых серверах.

-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/column/) <!--hide-->
+[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/column/) <!--hide-->
--- a/docs/tools/README.md
+++ b/docs/tools/README.md
@ -28,8 +28,8 @@ Follow the instructions on it's official website: <https://wkhtmltopdf.org/downl

 #### 2. Install CLI tools from npm

-1. `apt-get install npm` for Debian/Ubuntu or `brew install npm` on Mac OS X.
-2. `npm install -g purifycss amphtml-validator`.
+1. `sudo apt-get install npm` for Debian/Ubuntu or `brew install npm` on Mac OS X.
+2. `sudo npm install -g purify-css amphtml-validator`.

 #### 3. Set up virtualenv

--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@ -48,11 +48,6 @@ def build_for_lang(lang, args):
    logging.info(f'Building {lang} docs')
    os.environ['SINGLE_PAGE'] = '0'

-    config_path = os.path.join(args.docs_dir, f'toc_{lang}.yml')
-    if args.is_stable_release and not os.path.exists(config_path):
-        logging.warning(f'Skipping {lang} docs, because {config} does not exist')
-        return
-
    try:
        theme_cfg = {
            'name': None,
@ -73,9 +68,7 @@ def build_for_lang(lang, args):
            'es': 'Español',
            'fr': 'Français',
            'ru': 'Русский',
-            'ja': '日本語',
-            'tr': 'Türkçe',
-            'fa': 'فارسی'
+            'ja': '日本語'
        }

        site_names = {
@ -84,31 +77,26 @@ def build_for_lang(lang, args):
            'es': 'Documentación de ClickHouse %s',
            'fr': 'Documentation ClickHouse %s',
            'ru': 'Документация ClickHouse %s',
-            'ja': 'ClickHouseドキュメント %s',
-            'tr': 'ClickHouse Belgeleri %s',
-            'fa': 'مستندات %sClickHouse'
+            'ja': 'ClickHouseドキュメント %s'
        }

        assert len(site_names) == len(languages)

-        if args.version_prefix:
-            site_dir = os.path.join(args.docs_output_dir, args.version_prefix, lang)
-        else:
-            site_dir = os.path.join(args.docs_output_dir, lang)
+        site_dir = os.path.join(args.docs_output_dir, lang)

        plugins = ['macros']
        if args.htmlproofer:
            plugins.append('htmlproofer')

        website_url = 'https://clickhouse.tech'
-        site_name = site_names.get(lang, site_names['en']) % args.version_prefix
+        site_name = site_names.get(lang, site_names['en']) % ''
        site_name = site_name.replace('  ', ' ')
        raw_config = dict(
            site_name=site_name,
            site_url=f'{website_url}/docs/{lang}/',
            docs_dir=os.path.join(args.docs_dir, lang),
            site_dir=site_dir,
-            strict=not args.version_prefix,
+            strict=True,
            theme=theme_cfg,
            copyright='©2016–2020 Yandex LLC',
            use_directory_urls=True,
@ -119,8 +107,6 @@ def build_for_lang(lang, args):
            plugins=plugins,
            extra=dict(
                now=datetime.datetime.now().isoformat(),
-                stable_releases=args.stable_releases,
-                version_prefix=args.version_prefix,
                single_page=False,
                rev=args.rev,
                rev_short=args.rev_short,
@ -134,23 +120,14 @@ def build_for_lang(lang, args):
            )
        )

-        if os.path.exists(config_path):
-            raw_config['config_file'] = config_path
-        else:
-            raw_config['nav'] = nav.build_docs_nav(lang, args)
+        raw_config['nav'] = nav.build_docs_nav(lang, args)

        cfg = config.load_config(**raw_config)

        if not args.skip_multi_page:
-            try:
-                mkdocs.commands.build.build(cfg)
-            except jinja2.exceptions.TemplateError:
-                if not args.version_prefix:
-                    raise
-                mdx_clickhouse.PatchedMacrosPlugin.disabled = True
-                mkdocs.commands.build.build(cfg)
+            mkdocs.commands.build.build(cfg)

-        if not (args.skip_amp or args.version_prefix):
+        if not args.skip_amp:
            amp.build_amp(lang, args, cfg)

        if not args.skip_single_page:
@ -170,8 +147,7 @@ def build_docs(args):
        if lang:
            tasks.append((lang, args,))
    util.run_function_in_parallel(build_for_lang, tasks, threads=False)
-    if not args.version_prefix:
-        redirects.build_docs_redirects(args)
+    redirects.build_docs_redirects(args)


 def build(args):
@ -188,8 +164,6 @@ def build(args):
        generate_cmake_flags_files()

        build_docs(args)
-        from github import build_releases
-        build_releases(args, build_docs)

    if not args.skip_blog:
        blog.build_blog(args)
@ -209,7 +183,7 @@ if __name__ == '__main__':
    website_dir = os.path.join(src_dir, 'website')

    arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa')
+    arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja')
    arg_parser.add_argument('--blog-lang', default='en,ru')
    arg_parser.add_argument('--docs-dir', default='.')
    arg_parser.add_argument('--theme-dir', default=website_dir)
@ -217,12 +191,7 @@ if __name__ == '__main__':
    arg_parser.add_argument('--src-dir', default=src_dir)
    arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
    arg_parser.add_argument('--output-dir', default='build')
-    arg_parser.add_argument('--enable-stable-releases', action='store_true')
-    arg_parser.add_argument('--stable-releases-limit', type=int, default='3')
-    arg_parser.add_argument('--lts-releases-limit', type=int, default='2')
    arg_parser.add_argument('--nav-limit', type=int, default='0')
-    arg_parser.add_argument('--version-prefix', type=str, default='')
-    arg_parser.add_argument('--is-stable-release', action='store_true')
    arg_parser.add_argument('--skip-multi-page', action='store_true')
    arg_parser.add_argument('--skip-single-page', action='store_true')
    arg_parser.add_argument('--skip-amp', action='store_true')
@ -252,8 +221,7 @@ if __name__ == '__main__':
    args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
    args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')

-    from github import choose_latest_releases, get_events
-    args.stable_releases = choose_latest_releases(args) if args.enable_stable_releases else []
+    from github import get_events
    args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
    args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
    args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
--- a/docs/tools/github.py
+++ b/docs/tools/github.py
@ -13,88 +13,6 @@ import requests
 import util


-def yield_candidates():
-    for page in range(1, 100):
-        url = f'https://api.github.com/repos/ClickHouse/ClickHouse/tags?per_page=100&page={page}'
-        github_token = os.getenv('GITHUB_TOKEN')
-        if github_token:
-            headers = {'authorization': f'OAuth {github_token}'}
-        else:
-            headers = {}
-        for candidate in requests.get(url, headers=headers).json():
-            yield candidate
-    time.sleep(random.random() * 3)
-
-
-def choose_latest_releases(args):
-    logging.info('Collecting release candidates')
-    seen_stable = collections.OrderedDict()
-    seen_lts = collections.OrderedDict()
-    candidates = []
-    stable_count = 0
-    lts_count = 0
-
-    for tag in yield_candidates():
-        if isinstance(tag, dict):
-            name = tag.get('name', '')
-            is_stable = 'stable' in name
-            is_lts = 'lts' in name
-            is_unstable = not (is_stable or is_lts)
-            is_in_blacklist = ('v18' in name) or ('prestable' in name) or ('v1.1' in name)
-            if is_unstable or is_in_blacklist:
-                continue
-            major_version = '.'.join((name.split('.', 2))[:2])
-            if major_version not in seen_lts:
-                if (stable_count >= args.stable_releases_limit) and (lts_count >= args.lts_releases_limit):
-                    break
-
-                payload = (name, tag.get('tarball_url'), is_lts,)
-                logging.debug(payload)
-                if is_lts:
-                    if lts_count < args.lts_releases_limit:
-                        seen_lts[major_version] = payload
-                        try:
-                            del seen_stable[major_version]
-                        except KeyError:
-                            pass
-                    lts_count += 1
-                else:
-                    if stable_count < args.stable_releases_limit:
-                        if major_version not in seen_stable:
-                            seen_stable[major_version] = payload
-                            stable_count += 1
-
-            logging.debug(
-                f'Stables: {stable_count}/{args.stable_releases_limit} LTS: {lts_count}/{args.lts_releases_limit}'
-            )
-        else:
-            logging.fatal('Unexpected GitHub response: %s', str(candidates))
-            sys.exit(1)
-
-    logging.info('Found LTS releases: %s', ', '.join(list(seen_lts.keys())))
-    logging.info('Found stable releases: %s', ', '.join(list(seen_stable.keys())))
-    return sorted(list(seen_lts.items()) + list(seen_stable.items()))
-
-
-def process_release(args, callback, release):
-    name, (full_name, tarball_url, is_lts,) = release
-    logging.info(f'Building docs for {full_name}')
-    buf = io.BytesIO(requests.get(tarball_url).content)
-    tar = tarfile.open(mode='r:gz', fileobj=buf)
-    with util.temp_dir() as base_dir:
-        tar.extractall(base_dir)
-        args = copy.copy(args)
-        args.version_prefix = name
-        args.is_stable_release = True
-        args.docs_dir = os.path.join(base_dir, os.listdir(base_dir)[0], 'docs')
-        callback(args)
-
-
-def build_releases(args, callback):
-    for release in args.stable_releases:
-        process_release(args, callback, release)
-
-
 def get_events(args):
    events = []
    skip = True
@ -118,12 +36,7 @@ def get_events(args):


 if __name__ == '__main__':
-    class DummyArgs(object):
-        lts_releases_limit = 1
-        stable_releases_limit = 3
    logging.basicConfig(
        level=logging.DEBUG,
        stream=sys.stderr
    )
-    for item in choose_latest_releases(DummyArgs()):
-        print(item)
--- a/docs/tools/mdx_clickhouse.py
+++ b/docs/tools/mdx_clickhouse.py
@ -145,24 +145,9 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
        if self.skip_git_log:
            return markdown
        src_path = page.file.abs_src_path
-        try:
-            git_log = subprocess.check_output(f'git log --follow --date=iso8601 "{src_path}"', shell=True)
-        except subprocess.CalledProcessError:
-            return markdown
-        max_date = None
-        min_date = None
-        for line in git_log.decode('utf-8').split('\n'):
-            if line.startswith('Date:'):
-                line = line.replace('Date:', '').strip().replace(' ', 'T', 1).replace(' ', '')
-                current_date = datetime.datetime.fromisoformat(line[:-2] + ':' + line[-2:])
-                if (not max_date) or current_date > max_date:
-                    max_date = current_date
-                if (not min_date) or current_date < min_date:
-                    min_date = current_date
-        if min_date:
-            page.meta['published_date'] = min_date
-        if max_date:
-            page.meta['modified_date'] = max_date
+
+        # There was a code that determined the minimum and maximum modification dates for a page.
+        # It was removed due to being obnoxiously slow.
        return markdown

    def render_impl(self, markdown):
--- a/docs/tools/redirects.py
+++ b/docs/tools/redirects.py
@ -30,9 +30,8 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
        output_dir, lang,
        from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
    )
-    version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/'
    target_path = to_path.replace('/index.md', '/').replace('.md', '/')
-    to_url = f'/{base_prefix}{version_prefix}{lang}/{target_path}'
+    to_url = f'/{base_prefix}/{lang}/{target_path}'
    to_url = to_url.strip()
    write_redirect_html(out_path, to_url)

--- a/docs/tools/release.sh
+++ b/docs/tools/release.sh
@ -7,19 +7,22 @@ PUBLISH_DIR="${BASE_DIR}/../publish"
 BASE_DOMAIN="${BASE_DOMAIN:-content.clickhouse.tech}"
 GIT_TEST_URI="${GIT_TEST_URI:-git@github.com:ClickHouse/clickhouse-website-content.git}"
 GIT_PROD_URI="git@github.com:ClickHouse/clickhouse-website-content.git"
-EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---enable-stable-releases --minify --verbose}"
-HISTORY_SIZE="${HISTORY_SIZE:-5}"
+EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---minify --verbose}"

 if [[ -z "$1" ]]
 then
    source "${BASE_DIR}/venv/bin/activate"
    python3 "${BASE_DIR}/build.py" ${EXTRA_BUILD_ARGS}
-    rm -rf "${PUBLISH_DIR}" || true
-    git clone "${GIT_TEST_URI}" "${PUBLISH_DIR}"
-    cd "${PUBLISH_DIR}"
+    rm -rf "${PUBLISH_DIR}"
+    mkdir "${PUBLISH_DIR}" && cd "${PUBLISH_DIR}"
+
+    # Will make a repository with website content as the only commit.
+    git init
+    git remote add origin "${GIT_TEST_URI}"
    git config user.email "robot-clickhouse@yandex-team.ru"
    git config user.name "robot-clickhouse"
-    git rm -rf *
+
+    # Add files.
    cp -R "${BUILD_DIR}"/* .
    echo -n "${BASE_DOMAIN}" > CNAME
    echo -n "" > README.md
@ -27,13 +30,11 @@ then
    cp "${BASE_DIR}/../../LICENSE" .
    git add *
    git add ".nojekyll"
-    git commit -a -m "add new release at $(date)"
-    NEW_ROOT_COMMIT=$(git rev-parse "HEAD~${HISTORY_SIZE}")
-    git checkout --orphan temp "${NEW_ROOT_COMMIT}"
-    git commit -m "root commit"
-    git rebase --onto temp "${NEW_ROOT_COMMIT}" master
-    git branch -D temp
-    git push -f origin master
+
+    # Push to GitHub rewriting the existing contents.
+    git commit -a -m "Add new release at $(date)"
+    git push --force origin master
+
    if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
    then
        sleep 1m
--- a/docs/tools/single_page.py
+++ b/docs/tools/single_page.py
@ -111,10 +111,7 @@ def build_single_page_version(lang, args, nav, cfg):
                if not args.test_only:
                    mkdocs.commands.build.build(cfg)

-                    if args.version_prefix:
-                        single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, args.version_prefix, lang, 'single')
-                    else:
-                        single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
+                    single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')

                    if os.path.exists(single_page_output_path):
                        shutil.rmtree(single_page_output_path)
@ -157,10 +154,9 @@ def build_single_page_version(lang, args, nav, cfg):
                    if args.save_raw_single_page:
                        shutil.copytree(test_dir, args.save_raw_single_page)

-                    if not args.version_prefix:  # maybe enable in future
-                        logging.info(f'Running tests for {lang}')
-                        test.test_single_page(
-                            os.path.join(test_dir, 'single', 'index.html'), lang)
+                    logging.info(f'Running tests for {lang}')
+                    test.test_single_page(
+                        os.path.join(test_dir, 'single', 'index.html'), lang)

                    if not args.skip_pdf:
                        single_page_index_html = os.path.join(test_dir, 'single', 'index.html')
--- a/docs/tools/translate/translate.py
+++ b/docs/tools/translate/translate.py
@ -11,8 +11,6 @@ import googletrans
 import requests
 import yaml

-import typograph_ru
-

 translator = googletrans.Translator()
 default_target_language = os.environ.get('TARGET_LANGUAGE', 'ru')
@ -25,8 +23,6 @@ def translate_impl(text, target_language=None):
    target_language = target_language or default_target_language
    if target_language == 'en':
        return text
-    elif target_language == 'typograph_ru':
-        return typograph_ru.typograph(text)
    elif is_yandex:
        text = text.replace('‘', '\'')
        text = text.replace('’', '\'')
@ -59,25 +55,10 @@ def translate(text, target_language=None):
    )


-def translate_toc(root, lang):
-    global is_yandex
-    is_yandex = True
-    if isinstance(root, dict):
-        result = []
-        for key, value in root.items():
-            key = translate(key, lang) if key != 'hidden' and not key.isupper() else key
-            result.append((key, translate_toc(value, lang),))
-        return dict(result)
-    elif isinstance(root, list):
-        return [translate_toc(item, lang) for item in root]
-    elif isinstance(root, str):
-        return root
-
-
 def translate_po():
    import babel.messages.pofile
    base_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'website', 'locale')
-    for lang in ['en', 'zh', 'es', 'fr', 'ru', 'ja', 'tr', 'fa']:
+    for lang in ['en', 'zh', 'es', 'fr', 'ru', 'ja']:
        po_path = os.path.join(base_dir, lang, 'LC_MESSAGES', 'messages.po')
        with open(po_path, 'r') as f:
            po_file = babel.messages.pofile.read_po(f, locale=lang, domain='messages')
--- a/docs/tools/website.py
+++ b/docs/tools/website.py
@ -232,6 +232,7 @@ def minify_website(args):
            f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
    else:
        command = f'cat {css_in} > {css_out}'
+
    logging.info(command)
    output = subprocess.check_output(command, shell=True)
    logging.debug(output)
--- a/programs/main.cpp
+++ b/programs/main.cpp
@ -308,53 +308,11 @@ void checkRequiredInstructions()
    }
 }

-#ifdef __linux__
-/// clickhouse uses jemalloc as a production allocator
-/// and jemalloc relies on working MADV_DONTNEED,
-/// which doesn't work under qemu
-///
-/// but do this only under for linux, since only it return zeroed pages after MADV_DONTNEED
-/// (and jemalloc assumes this too, see contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in)
-void checkRequiredMadviseFlags()
-{
-    size_t size = 1 << 16;
-    void * addr = mmap(nullptr, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-    if (addr == MAP_FAILED)
-    {
-        writeError("Can not mmap pages for MADV_DONTNEED check\n");
-        _Exit(1);
-    }
-    memset(addr, 'A', size);
-
-    if (!madvise(addr, size, MADV_DONTNEED))
-    {
-        /// Suboptimal, but should be simple.
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (reinterpret_cast<unsigned char *>(addr)[i] != 0)
-            {
-                writeError("MADV_DONTNEED does not zeroed page. jemalloc will be broken\n");
-                _Exit(1);
-            }
-        }
-    }
-
-    if (munmap(addr, size))
-    {
-        writeError("Can not munmap pages for MADV_DONTNEED check\n");
-        _Exit(1);
-    }
-}
-#endif
-
 struct Checker
 {
    Checker()
    {
        checkRequiredInstructions();
-#ifdef __linux__
-        checkRequiredMadviseFlags();
-#endif
    }
 } checker;

--- a/programs/odbc-bridge/ODBCBridge.cpp
+++ b/programs/odbc-bridge/ODBCBridge.cpp
@ -89,7 +89,7 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options)
 {
    options.addOption(Poco::Util::Option("http-port", "", "port to listen").argument("http-port", true).binding("http-port"));
    options.addOption(
-        Poco::Util::Option("listen-host", "", "hostname to listen, default localhost").argument("listen-host").binding("listen-host"));
+        Poco::Util::Option("listen-host", "", "hostname or address to listen, default 127.0.0.1").argument("listen-host").binding("listen-host"));
    options.addOption(
        Poco::Util::Option("http-timeout", "", "http timeout for socket, default 1800").argument("http-timeout").binding("http-timeout"));

@ -161,7 +161,7 @@ void ODBCBridge::initialize(Application & self)
    BaseDaemon::logRevision();

    log = &logger();
-    hostname = config().getString("listen-host", "localhost");
+    hostname = config().getString("listen-host", "127.0.0.1");
    port = config().getUInt("http-port");
    if (port > 0xFFFF)
        throw Exception("Out of range 'http-port': " + std::to_string(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
--- a/src/Access/ContextAccess.cpp
+++ b/src/Access/ContextAccess.cpp
@ -376,7 +376,7 @@ bool ContextAccess::checkAccessImpl2(const AccessFlags & flags, const Args &...
        return true;
    };

-    auto access_denied = [&](const String & error_msg, int error_code)
+    auto access_denied = [&](const String & error_msg, int error_code [[maybe_unused]])
    {
        if (trace_log)
            LOG_TRACE(trace_log, "Access denied: {}{}", (AccessRightsElement{flags, args...}.toString()),
@ -558,7 +558,7 @@ bool ContextAccess::checkAdminOptionImpl2(const Container & role_ids, const GetN
    if (!std::size(role_ids) || is_full_access)
        return true;

-    auto show_error = [this](const String & msg, int error_code)
+    auto show_error = [this](const String & msg, int error_code [[maybe_unused]])
    {
        UNUSED(this);
        if constexpr (throw_if_denied)
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -365,7 +365,7 @@ endif ()

 if (USE_PARQUET)
    dbms_target_link_libraries(PRIVATE ${PARQUET_LIBRARY})
-    if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
+    if (NOT USE_INTERNAL_PARQUET_LIBRARY)
        dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR})
        if (USE_STATIC_LIBRARIES)
            dbms_target_link_libraries(PRIVATE ${ARROW_LIBRARY})
--- a/src/Common/SimpleIncrement.h
+++ b/src/Common/SimpleIncrement.h
@ -8,9 +8,7 @@
  */
 struct SimpleIncrement
 {
-    std::atomic<UInt64> value;
-
-    SimpleIncrement(UInt64 start = 0) : value(start) {}
+    std::atomic<UInt64> value{0};

    void set(UInt64 new_value)
    {
--- a/src/Common/XDBCBridgeHelper.h
+++ b/src/Common/XDBCBridgeHelper.h
@ -76,7 +76,7 @@ public:
    const Context & context;
    const Configuration & config;

-    static constexpr inline auto DEFAULT_HOST = "localhost";
+    static constexpr inline auto DEFAULT_HOST = "127.0.0.1";
    static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
    static constexpr inline auto PING_HANDLER = "/ping";
    static constexpr inline auto MAIN_HANDLER = "/";
--- a/src/Core/MultiEnum.h
+++ b/src/Core/MultiEnum.h
@ -13,12 +13,12 @@ struct MultiEnum
    MultiEnum() = default;

    template <typename ... EnumValues, typename = std::enable_if_t<std::conjunction_v<std::is_same<EnumTypeT, EnumValues>...>>>
-    explicit MultiEnum(EnumValues ... v)
+    constexpr explicit MultiEnum(EnumValues ... v)
        : MultiEnum((toBitFlag(v) | ... | 0u))
    {}

    template <typename ValueType, typename = std::enable_if_t<std::is_convertible_v<ValueType, StorageType>>>
-    explicit MultiEnum(ValueType v)
+    constexpr explicit MultiEnum(ValueType v)
        : bitset(v)
    {
        static_assert(std::is_unsigned_v<ValueType>);
@ -95,5 +95,5 @@ struct MultiEnum
 private:
    StorageType bitset = 0;

-    static StorageType toBitFlag(EnumType v) { return StorageType{1} << static_cast<StorageType>(v); }
+    static constexpr StorageType toBitFlag(EnumType v) { return StorageType{1} << static_cast<StorageType>(v); }
 };
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -414,7 +414,9 @@ class IColumn;
    M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
    M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0) \
    M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
-    M(UnionMode, union_default_mode, UnionMode::DISTINCT, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0)
+    M(UnionMode, union_default_mode, UnionMode::DISTINCT, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \
+    M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
+    M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \

 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.
@ -426,10 +428,10 @@ class IColumn;
    M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
    M(Bool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
    M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \
+    M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \
    M(Bool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
    M(Bool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
    M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
-    M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
    M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
    M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
    M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
@ -438,7 +440,6 @@ class IColumn;
    M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
    M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
    \
-    M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
    M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
    M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
    M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@ -300,7 +300,7 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe


 template <typename Reader>
-static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested)
+static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
 {
    ColumnArray & column_array = assert_cast<ColumnArray &>(column);
    ColumnArray::Offsets & offsets = column_array.getOffsets();
@ -308,7 +308,12 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
    IColumn & nested_column = column_array.getData();

    size_t size = 0;
-    assertChar('[', istr);
+
+    bool has_braces = false;
+    if (checkChar('[', istr))
+        has_braces = true;
+    else if (!allow_unenclosed)
+        throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");

    try
    {
@ -320,7 +325,9 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
                if (*istr.position() == ',')
                    ++istr.position();
                else
-                    throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
+                    throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
+                        "Cannot read array from text, expected comma or end of array, found '{}'",
+                        *istr.position());
            }

            first = false;
@ -335,7 +342,11 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r

            skipWhitespaceIfAny(istr);
        }
-        assertChar(']', istr);
+
+        if (has_braces)
+            assertChar(']', istr);
+        else /// If array is not enclosed in braces, we read until EOF.
+            assertEOF(istr);
    }
    catch (...)
    {
@ -364,7 +375,7 @@ void DataTypeArray::deserializeText(IColumn & column, ReadBuffer & istr, const F
        [&](IColumn & nested_column)
        {
            nested->deserializeAsTextQuoted(nested_column, istr, settings);
-        });
+        }, false);
 }

 void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@ -390,7 +401,11 @@ void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, Wr

 void DataTypeArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    deserializeTextImpl(column, istr, [&](IColumn & nested_column) { nested->deserializeAsTextJSON(nested_column, istr, settings); });
+    deserializeTextImpl(column, istr,
+        [&](IColumn & nested_column)
+        {
+            nested->deserializeAsTextJSON(nested_column, istr, settings);
+        }, false);
 }


@ -429,7 +444,23 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
    String s;
    readCSV(s, istr, settings.csv);
    ReadBufferFromString rb(s);
-    deserializeText(column, rb, settings);
+
+    if (settings.csv.input_format_arrays_as_nested_csv)
+    {
+        deserializeTextImpl(column, rb,
+            [&](IColumn & nested_column)
+            {
+                nested->deserializeAsTextCSV(nested_column, rb, settings);
+            }, true);
+    }
+    else
+    {
+        deserializeTextImpl(column, rb,
+            [&](IColumn & nested_column)
+            {
+                nested->deserializeAsTextQuoted(nested_column, rb, settings);
+            }, true);
+    }
 }


--- a/src/Dictionaries/ExternalQueryBuilder.h
+++ b/src/Dictionaries/ExternalQueryBuilder.h
@ -2,6 +2,7 @@

 #include <string>
 #include <Columns/IColumn.h>
+#include <Dictionaries/DictionaryStructure.h>
 #include <Formats/FormatSettings.h>
 #include <Parsers/IdentifierQuotingStyle.h>

@ -16,11 +17,11 @@ class WriteBuffer;
  */
 struct ExternalQueryBuilder
 {
-    const DictionaryStructure & dict_struct;
-    std::string db;
-    std::string schema;
-    std::string table;
-    const std::string & where;
+    const DictionaryStructure dict_struct;
+    const std::string db;
+    const std::string schema;
+    const std::string table;
+    const std::string where;

    IdentifierQuotingStyle quoting_style;

--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@ -64,6 +64,7 @@ FormatSettings getFormatSettings(const Context & context,
    format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
    format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
    format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null;
+    format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
    format_settings.custom.escaping_rule = settings.format_custom_escaping_rule;
    format_settings.custom.field_delimiter = settings.format_custom_field_delimiter;
    format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;
@ -258,7 +259,6 @@ InputFormatPtr FormatFactory::getInputFormat(

    auto format = input_getter(buf, sample, params, format_settings);

-
    /// It's a kludge. Because I cannot remove context from values format.
    if (auto * values = typeid_cast<ValuesBlockInputFormat *>(format.get()))
        values->setContext(context);
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@ -71,6 +71,7 @@ struct FormatSettings
        bool empty_as_default = false;
        bool crlf_end_of_line = false;
        bool input_format_enum_as_number = false;
+        bool input_format_arrays_as_nested_csv = false;
    } csv;

    struct Custom
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@ -1216,7 +1216,10 @@ public:
        {
            return res;
        }
-        else if (isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))
+        else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))
+                 // Comparing Date and DateTime64 requires implicit conversion,
+                 // otherwise Date is treated as number.
+                 && !(date_and_datetime && (isDate(left_type) || isDate(right_type))))
        {
            // compare
            if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@ -105,11 +105,11 @@ namespace detail
        RemoteHostFilter remote_host_filter;
        std::function<void(size_t)> next_callback;

-        std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
+        std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response)
        {
            // With empty path poco will send "POST  HTTP/1.1" its bug.
-            if (uri.getPath().empty())
-                uri.setPath("/");
+            if (uri_.getPath().empty())
+                uri_.setPath("/");

            Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
            request.setHost(uri_.getHost()); // use original, not resolved host name in header
@ -125,7 +125,7 @@ namespace detail
            if (!credentials.getUsername().empty())
                credentials.authenticate(request);

-            LOG_TRACE((&Poco::Logger::get("ReadWriteBufferFromHTTP")), "Sending request to {}", uri.toString());
+            LOG_TRACE((&Poco::Logger::get("ReadWriteBufferFromHTTP")), "Sending request to {}", uri_.toString());

            auto sess = session->getSession();

--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@ -34,6 +34,7 @@
 #include <Interpreters/QueryLog.h>
 #include <Interpreters/TranslateQualifiedNamesVisitor.h>
 #include <Interpreters/getTableExpressions.h>
+#include <Interpreters/processColumnTransformers.h>

 namespace
 {
@ -52,7 +53,6 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

-
 InterpreterInsertQuery::InterpreterInsertQuery(
    const ASTPtr & query_ptr_, const Context & context_, bool allow_materialized_, bool no_squash_, bool no_destination_)
    : query_ptr(query_ptr_)
@ -95,27 +95,7 @@ Block InterpreterInsertQuery::getSampleBlock(

    Block table_sample = metadata_snapshot->getSampleBlock();

-    /// Process column transformers (e.g. * EXCEPT(a)), asterisks and qualified columns.
-    const auto & columns = metadata_snapshot->getColumns();
-    auto names_and_types = columns.getOrdinary();
-    removeDuplicateColumns(names_and_types);
-    auto table_expr = std::make_shared<ASTTableExpression>();
-    table_expr->database_and_table_name = createTableIdentifier(table->getStorageID());
-    table_expr->children.push_back(table_expr->database_and_table_name);
-    TablesWithColumns tables_with_columns;
-    tables_with_columns.emplace_back(DatabaseAndTableWithAlias(*table_expr, context.getCurrentDatabase()), names_and_types);
-
-    tables_with_columns[0].addHiddenColumns(columns.getMaterialized());
-    tables_with_columns[0].addHiddenColumns(columns.getAliases());
-    tables_with_columns[0].addHiddenColumns(table->getVirtuals());
-
-    NameSet source_columns_set;
-    for (const auto & identifier : query.columns->children)
-        source_columns_set.insert(identifier->getColumnName());
-    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
-    TranslateQualifiedNamesVisitor visitor(visitor_data);
-    auto columns_ast = query.columns->clone();
-    visitor.visit(columns_ast);
+    const auto columns_ast = processColumnTransformers(context.getCurrentDatabase(), table, metadata_snapshot, query.columns);

    /// Form the block based on the column names from the query
    Block res;
--- a/src/Interpreters/InterpreterOptimizeQuery.cpp
+++ b/src/Interpreters/InterpreterOptimizeQuery.cpp
@ -5,13 +5,18 @@
 #include <Interpreters/InterpreterOptimizeQuery.h>
 #include <Access/AccessRightsElement.h>
 #include <Common/typeid_cast.h>
+#include <Parsers/ASTExpressionList.h>

+#include <Interpreters/processColumnTransformers.h>
+
+#include <memory>

 namespace DB
 {

 namespace ErrorCodes
 {
+    extern const int THERE_IS_NO_COLUMN;
 }


@ -27,7 +32,44 @@ BlockIO InterpreterOptimizeQuery::execute()
    auto table_id = context.resolveStorageID(ast, Context::ResolveOrdinary);
    StoragePtr table = DatabaseCatalog::instance().getTable(table_id, context);
    auto metadata_snapshot = table->getInMemoryMetadataPtr();
-    table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, context);
+
+    // Empty list of names means we deduplicate by all columns, but user can explicitly state which columns to use.
+    Names column_names;
+    if (ast.deduplicate_by_columns)
+    {
+        // User requested custom set of columns for deduplication, possibly with Column Transformer expression.
+        {
+            // Expand asterisk, column transformers, etc into list of column names.
+            const auto cols = processColumnTransformers(context.getCurrentDatabase(), table, metadata_snapshot, ast.deduplicate_by_columns);
+            for (const auto & col : cols->children)
+                column_names.emplace_back(col->getColumnName());
+        }
+
+        metadata_snapshot->check(column_names, NamesAndTypesList{}, table_id);
+        Names required_columns;
+        {
+            required_columns = metadata_snapshot->getColumnsRequiredForSortingKey();
+            const auto partitioning_cols = metadata_snapshot->getColumnsRequiredForPartitionKey();
+            required_columns.reserve(required_columns.size() + partitioning_cols.size());
+            required_columns.insert(required_columns.end(), partitioning_cols.begin(), partitioning_cols.end());
+        }
+        for (const auto & required_col : required_columns)
+        {
+            // Deduplication is performed only for adjacent rows in a block,
+            // and all rows in block are in the sorting key order within a single partition,
+            // hence deduplication always implicitly takes sorting keys and parition keys in account.
+            // So we just explicitly state that limitation in order to avoid confusion.
+            if (std::find(column_names.begin(), column_names.end(), required_col) == column_names.end())
+                throw Exception(ErrorCodes::THERE_IS_NO_COLUMN,
+                        "DEDUPLICATE BY expression must include all columns used in table's"
+                        " ORDER BY, PRIMARY KEY, or PARTITION BY but '{}' is missing."
+                        " Expanded DEDUPLICATE BY columns expression: ['{}']",
+                        required_col, fmt::join(column_names, "', '"));
+        }
+    }
+
+    table->optimize(query_ptr, metadata_snapshot, ast.partition, ast.final, ast.deduplicate, column_names, context);
+
    return {};
 }

--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@ -202,10 +202,10 @@ static inline std::tuple<NamesAndTypesList, NamesAndTypesList, NamesAndTypesList
                keys->arguments->children.insert(keys->arguments->children.end(),
                    index_columns->children.begin(), index_columns->children.end());
            else if (startsWith(declare_index->index_type, "UNIQUE_"))
-                unique_keys->arguments->children.insert(keys->arguments->children.end(),
+                unique_keys->arguments->children.insert(unique_keys->arguments->children.end(),
                    index_columns->children.begin(), index_columns->children.end());
            if (startsWith(declare_index->index_type, "PRIMARY_KEY_"))
-                primary_keys->arguments->children.insert(keys->arguments->children.end(),
+                primary_keys->arguments->children.insert(primary_keys->arguments->children.end(),
                    index_columns->children.begin(), index_columns->children.end());
        }
    }
--- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp
+++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp
@ -195,3 +195,15 @@ TEST(MySQLCreateRewritten, RewrittenQueryWithPrefixKey)
        "ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY (key, prefix_key)");
 }

+TEST(MySQLCreateRewritten, UniqueKeysConvert)
+{
+    tryRegisterFunctions();
+    const auto & context_holder = getContext();
+
+    EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
+        "CREATE TABLE `test_database`.`test_table_1` (code varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,name varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,"
+        " id bigint NOT NULL AUTO_INCREMENT, tenant_id bigint NOT NULL, PRIMARY KEY (id), UNIQUE KEY code_id (code, tenant_id), UNIQUE KEY name_id (name, tenant_id))"
+        " ENGINE=InnoDB AUTO_INCREMENT=100 DEFAULT CHARSET=utf8 COLLATE=utf8_bin;", context_holder.context)),
+        "CREATE TABLE test_database.test_table_1 (`code` String, `name` String, `id` Int64, `tenant_id` Int64, `_sign` Int8() MATERIALIZED 1, `_version` UInt64() MATERIALIZED 1)"
+        " ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(id, 18446744073709551) ORDER BY (code, name, tenant_id, id)");
+}
--- a/src/Interpreters/processColumnTransformers.cpp
+++ b/src/Interpreters/processColumnTransformers.cpp
@ -0,0 +1,49 @@
+#include <Interpreters/processColumnTransformers.h>
+
+#include <Interpreters/DatabaseAndTableWithAlias.h>
+#include <Interpreters/TranslateQualifiedNamesVisitor.h>
+#include <Interpreters/getTableExpressions.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/IAST.h>
+#include <Storages/IStorage.h>
+#include <Storages/StorageInMemoryMetadata.h>
+
+namespace DB
+{
+
+ASTPtr processColumnTransformers(
+        const String & current_database,
+        const StoragePtr & table,
+        const StorageMetadataPtr & metadata_snapshot,
+        ASTPtr query_columns)
+{
+    const auto & columns = metadata_snapshot->getColumns();
+    auto names_and_types = columns.getOrdinary();
+    removeDuplicateColumns(names_and_types);
+
+    TablesWithColumns tables_with_columns;
+    {
+        auto table_expr = std::make_shared<ASTTableExpression>();
+        table_expr->database_and_table_name = createTableIdentifier(table->getStorageID());
+        table_expr->children.push_back(table_expr->database_and_table_name);
+        tables_with_columns.emplace_back(DatabaseAndTableWithAlias(*table_expr, current_database), names_and_types);
+    }
+
+    tables_with_columns[0].addHiddenColumns(columns.getMaterialized());
+    tables_with_columns[0].addHiddenColumns(columns.getAliases());
+    tables_with_columns[0].addHiddenColumns(table->getVirtuals());
+
+    NameSet source_columns_set;
+    for (const auto & identifier : query_columns->children)
+        source_columns_set.insert(identifier->getColumnName());
+
+    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
+    TranslateQualifiedNamesVisitor visitor(visitor_data);
+    auto columns_ast = query_columns->clone();
+    visitor.visit(columns_ast);
+
+    return columns_ast;
+}
+
+}
--- a/src/Interpreters/processColumnTransformers.h
+++ b/src/Interpreters/processColumnTransformers.h
@ -0,0 +1,19 @@
+#pragma once
+
+#include <Parsers/IAST_fwd.h>
+#include <Storages/IStorage_fwd.h>
+
+namespace DB
+{
+
+struct StorageInMemoryMetadata;
+using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
+
+/// Process column transformers (e.g. * EXCEPT(a)), asterisks and qualified columns.
+ASTPtr processColumnTransformers(
+        const String & current_database,
+        const StoragePtr & table,
+        const StorageMetadataPtr & metadata_snapshot,
+        ASTPtr query_columns);
+
+}
--- a/src/Interpreters/ya.make
+++ b/src/Interpreters/ya.make
@ -157,6 +157,7 @@ SRCS(
    interpretSubquery.cpp
    join_common.cpp
    loadMetadata.cpp
+    processColumnTransformers.cpp
    sortBlock.cpp

 )
--- a/src/Parsers/ASTOptimizeQuery.cpp
+++ b/src/Parsers/ASTOptimizeQuery.cpp
@ -23,6 +23,12 @@ void ASTOptimizeQuery::formatQueryImpl(const FormatSettings & settings, FormatSt

    if (deduplicate)
        settings.ostr << (settings.hilite ? hilite_keyword : "") << " DEDUPLICATE" << (settings.hilite ? hilite_none : "");
+
+    if (deduplicate_by_columns)
+    {
+        settings.ostr << (settings.hilite ? hilite_keyword : "") << " BY " << (settings.hilite ? hilite_none : "");
+        deduplicate_by_columns->formatImpl(settings, state, frame);
+    }
 }

 }
--- a/src/Parsers/ASTOptimizeQuery.h
+++ b/src/Parsers/ASTOptimizeQuery.h
@ -16,9 +16,11 @@ public:
    /// The partition to optimize can be specified.
    ASTPtr partition;
    /// A flag can be specified - perform optimization "to the end" instead of one step.
-    bool final;
+    bool final = false;
    /// Do deduplicate (default: false)
-    bool deduplicate;
+    bool deduplicate = false;
+    /// Deduplicate by columns.
+    ASTPtr deduplicate_by_columns;

    /** Get the text that identifies this element. */
    String getID(char delim) const override
@ -37,6 +39,12 @@ public:
            res->children.push_back(res->partition);
        }

+        if (deduplicate_by_columns)
+        {
+            res->deduplicate_by_columns = deduplicate_by_columns->clone();
+            res->children.push_back(res->deduplicate_by_columns);
+        }
+
        return res;
    }

--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@ -1259,7 +1259,7 @@ bool ParserColumnsMatcher::parseImpl(Pos & pos, ASTPtr & node, Expected & expect
        res->children.push_back(regex_node);
    }

-    ParserColumnsTransformers transformers_p;
+    ParserColumnsTransformers transformers_p(allowed_transformers);
    ASTPtr transformer;
    while (transformers_p.parse(pos, transformer, expected))
    {
@ -1278,7 +1278,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
    ParserKeyword as("AS");
    ParserKeyword strict("STRICT");

-    if (apply.ignore(pos, expected))
+    if (allowed_transformers.isSet(ColumnTransformer::APPLY) && apply.ignore(pos, expected))
    {
        bool with_open_round_bracket = false;

@ -1331,7 +1331,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
        node = std::move(res);
        return true;
    }
-    else if (except.ignore(pos, expected))
+    else if (allowed_transformers.isSet(ColumnTransformer::EXCEPT) && except.ignore(pos, expected))
    {
        if (strict.ignore(pos, expected))
            is_strict = true;
@ -1371,7 +1371,7 @@ bool ParserColumnsTransformers::parseImpl(Pos & pos, ASTPtr & node, Expected & e
        node = std::move(res);
        return true;
    }
-    else if (replace.ignore(pos, expected))
+    else if (allowed_transformers.isSet(ColumnTransformer::REPLACE) && replace.ignore(pos, expected))
    {
        if (strict.ignore(pos, expected))
            is_strict = true;
@ -1434,7 +1434,7 @@ bool ParserAsterisk::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
    {
        ++pos;
        auto asterisk = std::make_shared<ASTAsterisk>();
-        ParserColumnsTransformers transformers_p;
+        ParserColumnsTransformers transformers_p(allowed_transformers);
        ASTPtr transformer;
        while (transformers_p.parse(pos, transformer, expected))
        {
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@ -1,6 +1,7 @@
 #pragma once

 #include <Core/Field.h>
+#include <Core/MultiEnum.h>
 #include <Parsers/IParserBase.h>


@ -70,12 +71,47 @@ protected:
    bool allow_query_parameter;
 };

+/** *, t.*, db.table.*, COLUMNS('<regular expression>') APPLY(...) or EXCEPT(...) or REPLACE(...)
+  */
+class ParserColumnsTransformers : public IParserBase
+{
+public:
+    enum class ColumnTransformer : UInt8
+    {
+        APPLY,
+        EXCEPT,
+        REPLACE,
+    };
+    using ColumnTransformers = MultiEnum<ColumnTransformer, UInt8>;
+    static constexpr auto AllTransformers = ColumnTransformers{ColumnTransformer::APPLY, ColumnTransformer::EXCEPT, ColumnTransformer::REPLACE};
+
+    ParserColumnsTransformers(ColumnTransformers allowed_transformers_ = AllTransformers, bool is_strict_ = false)
+        : allowed_transformers(allowed_transformers_)
+        , is_strict(is_strict_)
+    {}
+
+protected:
+    const char * getName() const override { return "COLUMNS transformers"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+    ColumnTransformers allowed_transformers;
+    bool is_strict;
+};
+
+
 /// Just *
 class ParserAsterisk : public IParserBase
 {
+public:
+    using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers;
+    ParserAsterisk(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers)
+        : allowed_transformers(allowed_transformers_)
+    {}
+
 protected:
    const char * getName() const override { return "asterisk"; }
    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+
+    ColumnTransformers allowed_transformers;
 };

 /** Something like t.* or db.table.*
@ -91,21 +127,17 @@ protected:
  */
 class ParserColumnsMatcher : public IParserBase
 {
+public:
+    using ColumnTransformers = ParserColumnsTransformers::ColumnTransformers;
+    ParserColumnsMatcher(ColumnTransformers allowed_transformers_ = ParserColumnsTransformers::AllTransformers)
+        : allowed_transformers(allowed_transformers_)
+    {}
+
 protected:
    const char * getName() const override { return "COLUMNS matcher"; }
    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-};

-/** *, t.*, db.table.*, COLUMNS('<regular expression>') APPLY(...) or EXCEPT(...) or REPLACE(...)
-  */
-class ParserColumnsTransformers : public IParserBase
-{
-public:
-    ParserColumnsTransformers(bool is_strict_ = false): is_strict(is_strict_) {}
-protected:
-    const char * getName() const override { return "COLUMNS transformers"; }
-    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
-    bool is_strict;
+    ColumnTransformers allowed_transformers;
 };

 /** A function, for example, f(x, y + 1, g(z)).
--- a/src/Parsers/ParserOptimizeQuery.cpp
+++ b/src/Parsers/ParserOptimizeQuery.cpp
@ -4,11 +4,24 @@

 #include <Parsers/ASTOptimizeQuery.h>
 #include <Parsers/ASTIdentifier.h>
+#include <Parsers/ExpressionListParsers.h>


 namespace DB
 {

+bool ParserOptimizeQueryColumnsSpecification::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    // Do not allow APPLY and REPLACE transformers.
+    // Since we use Columns Transformers only to get list of columns,
+    // ad we can't actuall modify content of the columns for deduplication.
+    const auto allowed_transformers = ParserColumnsTransformers::ColumnTransformers{ParserColumnsTransformers::ColumnTransformer::EXCEPT};
+
+    return ParserColumnsMatcher(allowed_transformers).parse(pos, node, expected)
+        || ParserAsterisk(allowed_transformers).parse(pos, node, expected)
+        || ParserIdentifier(false).parse(pos, node, expected);
+}
+

 bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
@ -16,6 +29,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    ParserKeyword s_partition("PARTITION");
    ParserKeyword s_final("FINAL");
    ParserKeyword s_deduplicate("DEDUPLICATE");
+    ParserKeyword s_by("BY");
    ParserToken s_dot(TokenType::Dot);
    ParserIdentifier name_p;
    ParserPartition partition_p;
@ -55,6 +69,14 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
    if (s_deduplicate.ignore(pos, expected))
        deduplicate = true;

+    ASTPtr deduplicate_by_columns;
+    if (deduplicate && s_by.ignore(pos, expected))
+    {
+        if (!ParserList(std::make_unique<ParserOptimizeQueryColumnsSpecification>(), std::make_unique<ParserToken>(TokenType::Comma), false)
+                .parse(pos, deduplicate_by_columns, expected))
+            return false;
+    }
+
    auto query = std::make_shared<ASTOptimizeQuery>();
    node = query;

@ -66,6 +88,7 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte
        query->children.push_back(partition);
    query->final = final;
    query->deduplicate = deduplicate;
+    query->deduplicate_by_columns = deduplicate_by_columns;

    return true;
 }
--- a/src/Parsers/ParserOptimizeQuery.h
+++ b/src/Parsers/ParserOptimizeQuery.h
@ -7,6 +7,13 @@
 namespace DB
 {

+class ParserOptimizeQueryColumnsSpecification : public IParserBase
+{
+protected:
+    const char * getName() const override { return "column specification for OPTIMIZE ... DEDUPLICATE BY"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 /** Query OPTIMIZE TABLE [db.]name [PARTITION partition] [FINAL] [DEDUPLICATE]
  */
 class ParserOptimizeQuery : public IParserBase
--- a/src/Parsers/tests/gtest_Parser.cpp
+++ b/src/Parsers/tests/gtest_Parser.cpp
@ -0,0 +1,134 @@
+#include <Parsers/ParserOptimizeQuery.h>
+
+#include <Parsers/ParserQueryWithOutput.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/formatAST.h>
+#include <IO/WriteBufferFromOStream.h>
+
+#include <string_view>
+
+#include <gtest/gtest.h>
+
+namespace
+{
+using namespace DB;
+using namespace std::literals;
+}
+
+struct ParserTestCase
+{
+    std::shared_ptr<IParser> parser;
+    const std::string_view input_text;
+    const char * expected_ast = nullptr;
+};
+
+std::ostream & operator<<(std::ostream & ostr, const ParserTestCase & test_case)
+{
+    return ostr << "parser: " << test_case.parser->getName() << ", input: " << test_case.input_text;
+}
+
+class ParserTest : public ::testing::TestWithParam<ParserTestCase>
+{};
+
+TEST_P(ParserTest, parseQuery)
+{
+    const auto & [parser, input_text, expected_ast] = GetParam();
+
+    ASSERT_NE(nullptr, parser);
+
+    if (expected_ast)
+    {
+        ASTPtr ast;
+        ASSERT_NO_THROW(ast = parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0));
+        EXPECT_EQ(expected_ast, serializeAST(*ast->clone(), false));
+    }
+    else
+    {
+        ASSERT_THROW(parseQuery(*parser, input_text.begin(), input_text.end(), 0, 0), DB::Exception);
+    }
+}
+
+
+INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery, ParserTest, ::testing::Values(
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('a, b')",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('a, b')"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]')",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]')"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT b",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT b"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT (a, b)",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') EXCEPT (a, b)"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY a, b, c",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY a, b, c"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY *",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY *"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT a",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT a"
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT (a, b)",
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY * EXCEPT (a, b)"
+    }
+));
+
+INSTANTIATE_TEST_SUITE_P(ParserOptimizeQuery_FAIL, ParserTest, ::testing::Values(
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY",
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') APPLY(x)",
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY COLUMNS('[a]') REPLACE(y)",
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY * APPLY(x)",
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY * REPLACE(y)",
+    },
+    ParserTestCase
+    {
+        std::make_shared<ParserOptimizeQuery>(),
+        "OPTIMIZE TABLE table_name DEDUPLICATE BY db.a, db.b, db.c",
+    }
+));
--- a/src/Processors/Formats/IRowInputFormat.cpp
+++ b/src/Processors/Formats/IRowInputFormat.cpp
@ -53,18 +53,20 @@ Chunk IRowInputFormat::generate()
    ///auto chunk_missing_values = std::make_unique<ChunkMissingValues>();
    block_missing_values.clear();

+    size_t num_rows = 0;
+
    try
    {
        RowReadExtension info;
-        for (size_t rows = 0; rows < params.max_block_size; ++rows)
+        bool continue_reading = true;
+        for (size_t rows = 0; rows < params.max_block_size && continue_reading; ++rows)
        {
            try
            {
                ++total_rows;

                info.read_columns.clear();
-                if (!readRow(columns, info))
-                    break;
+                continue_reading = readRow(columns, info);

                for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx)
                {
@ -76,6 +78,18 @@ Chunk IRowInputFormat::generate()
                        block_missing_values.setBit(column_idx, column_size - 1);
                    }
                }
+
+                /// Some formats may read row AND say the read is finished.
+                /// For such a case, get the number or rows from first column.
+                if (!columns.empty())
+                    num_rows = columns.front()->size();
+
+                if (!continue_reading)
+                    break;
+
+                /// The case when there is no columns. Just count rows.
+                if (columns.empty())
+                    ++num_rows;
            }
            catch (Exception & e)
            {
@ -107,17 +121,13 @@ Chunk IRowInputFormat::generate()

                syncAfterError();

-                /// Truncate all columns in block to minimal size (remove values, that was appended to only part of columns).
-
-                size_t min_size = std::numeric_limits<size_t>::max();
-                for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
-                    min_size = std::min(min_size, columns[column_idx]->size());
+                /// Truncate all columns in block to initial size (remove values, that was appended to only part of columns).

                for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
                {
                    auto & column = columns[column_idx];
-                    if (column->size() > min_size)
-                        column->popBack(column->size() - min_size);
+                    if (column->size() > num_rows)
+                        column->popBack(column->size() - num_rows);
                }
            }
        }
@ -157,7 +167,6 @@ Chunk IRowInputFormat::generate()
        return {};
    }

-    auto num_rows = columns.front()->size();
    Chunk chunk(std::move(columns), num_rows);
    //chunk.setChunkInfo(std::move(chunk_missing_values));
    return chunk;
--- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
@ -62,9 +62,9 @@ void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr<arrow::Schema>

    // TODO: should we use arrow::ipc::IpcOptions::alignment?
    if (stream)
-        writer_status = arrow::ipc::NewStreamWriter(arrow_ostream.get(), schema);
+        writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema);
    else
-        writer_status = arrow::ipc::NewFileWriter(arrow_ostream.get(), schema);
+        writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema);

    if (!writer_status.ok())
        throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@ -380,6 +380,7 @@ public:
        const ASTPtr & /*partition*/,
        bool /*final*/,
        bool /*deduplicate*/,
+        const Names & /* deduplicate_by_columns */,
        const Context & /*context*/)
    {
        throw Exception("Method optimize is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp
@ -652,7 +652,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
    time_t time_of_merge,
    const Context & context,
    const ReservationPtr & space_reservation,
-    bool deduplicate)
+    bool deduplicate,
+    const Names & deduplicate_by_columns)
 {
    static const String TMP_PREFIX = "tmp_merge_";

@ -667,6 +668,13 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
    const MergeTreeData::DataPartsVector & parts = future_part.parts;

    LOG_DEBUG(log, "Merging {} parts: from {} to {} into {}", parts.size(), parts.front()->name, parts.back()->name, future_part.type.toString());
+    if (deduplicate)
+    {
+        if (deduplicate_by_columns.empty())
+            LOG_DEBUG(log, "DEDUPLICATE BY all columns");
+        else
+            LOG_DEBUG(log, "DEDUPLICATE BY ('{}')", fmt::join(deduplicate_by_columns, "', '"));
+    }

    auto disk = space_reservation->getDisk();
    String part_path = data.relative_data_path;
@ -891,7 +899,7 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor
    BlockInputStreamPtr merged_stream = std::make_shared<PipelineExecutingBlockInputStream>(std::move(pipeline));

    if (deduplicate)
-        merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, Names());
+        merged_stream = std::make_shared<DistinctSortedBlockInputStream>(merged_stream, sort_description, SizeLimits(), 0 /*limit_hint*/, deduplicate_by_columns);

    if (need_remove_expired_values)
        merged_stream = std::make_shared<TTLBlockInputStream>(merged_stream, data, metadata_snapshot, new_data_part, time_of_merge, force_ttl);
--- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
+++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h
@ -127,7 +127,8 @@ public:
        time_t time_of_merge,
        const Context & context,
        const ReservationPtr & space_reservation,
-        bool deduplicate);
+        bool deduplicate,
+        const Names & deduplicate_by_columns);

    /// Mutate a single data part with the specified commands. Will create and return a temporary part.
    MergeTreeData::MutableDataPartPtr mutatePartToTemporaryPart(
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.cpp
@ -6,6 +6,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/WriteBufferFromString.h>
 #include <IO/ReadHelpers.h>
+#include <IO/WriteHelpers.h>


 namespace DB
@ -16,15 +17,29 @@ namespace ErrorCodes
    extern const int LOGICAL_ERROR;
 }

+enum FormatVersion : UInt8
+{
+    FORMAT_WITH_CREATE_TIME = 2,
+    FORMAT_WITH_BLOCK_ID = 3,
+    FORMAT_WITH_DEDUPLICATE = 4,
+    FORMAT_WITH_UUID = 5,
+    FORMAT_WITH_DEDUPLICATE_BY_COLUMNS = 6,
+
+    FORMAT_LAST
+};
+

 void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
 {
-    UInt8 format_version = 4;
+    UInt8 format_version = FORMAT_WITH_DEDUPLICATE;
+
+    if (!deduplicate_by_columns.empty())
+        format_version = std::max<UInt8>(format_version, FORMAT_WITH_DEDUPLICATE_BY_COLUMNS);

    /// Conditionally bump format_version only when uuid has been assigned.
    /// If some other feature requires bumping format_version to >= 5 then this code becomes no-op.
    if (new_part_uuid != UUIDHelpers::Nil)
-        format_version = std::max(format_version, static_cast<UInt8>(5));
+        format_version = std::max<UInt8>(format_version, FORMAT_WITH_UUID);

    out << "format version: " << format_version << "\n"
        << "create_time: " << LocalDateTime(create_time ? create_time : time(nullptr)) << "\n"
@ -50,6 +65,17 @@ void ReplicatedMergeTreeLogEntryData::writeText(WriteBuffer & out) const
            if (new_part_uuid != UUIDHelpers::Nil)
                out << "\ninto_uuid: " << new_part_uuid;

+            if (!deduplicate_by_columns.empty())
+            {
+                out << "\ndeduplicate_by_columns: ";
+                for (size_t i = 0; i < deduplicate_by_columns.size(); ++i)
+                {
+                    out << quote << deduplicate_by_columns[i];
+                    if (i != deduplicate_by_columns.size() - 1)
+                        out << ",";
+                }
+            }
+
            break;

        case DROP_RANGE:
@ -129,10 +155,10 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)

    in >> "format version: " >> format_version >> "\n";

-    if (format_version < 1 || format_version > 5)
+    if (format_version < 1 || format_version >= FORMAT_LAST)
        throw Exception("Unknown ReplicatedMergeTreeLogEntry format version: " + DB::toString(format_version), ErrorCodes::UNKNOWN_FORMAT_VERSION);

-    if (format_version >= 2)
+    if (format_version >= FORMAT_WITH_CREATE_TIME)
    {
        LocalDateTime create_time_dt;
        in >> "create_time: " >> create_time_dt >> "\n";
@ -141,7 +167,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)

    in >> "source replica: " >> source_replica >> "\n";

-    if (format_version >= 3)
+    if (format_version >= FORMAT_WITH_BLOCK_ID)
    {
        in >> "block_id: " >> escape >> block_id >> "\n";
    }
@ -167,7 +193,7 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
        }
        in >> new_part_name;

-        if (format_version >= 4)
+        if (format_version >= FORMAT_WITH_DEDUPLICATE)
        {
            in >> "\ndeduplicate: " >> deduplicate;

@ -184,6 +210,20 @@ void ReplicatedMergeTreeLogEntryData::readText(ReadBuffer & in)
                }
                else if (checkString("into_uuid: ", in))
                    in >> new_part_uuid;
+                else if (checkString("deduplicate_by_columns: ", in))
+                {
+                    Strings new_deduplicate_by_columns;
+                    for (;;)
+                    {
+                        String tmp_column_name;
+                        in >> quote >> tmp_column_name;
+                        new_deduplicate_by_columns.emplace_back(std::move(tmp_column_name));
+                        if (!checkString(",", in))
+                            break;
+                    }
+
+                    deduplicate_by_columns = std::move(new_deduplicate_by_columns);
+                }
                else
                    trailing_newline_found = true;
            }
--- a/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeLogEntry.h
@ -81,6 +81,7 @@ struct ReplicatedMergeTreeLogEntryData

    Strings source_parts;
    bool deduplicate = false; /// Do deduplicate on merge
+    Strings deduplicate_by_columns = {}; // Which columns should be checked for duplicates, empty means 'all' (default).
    MergeType merge_type = MergeType::REGULAR;
    String column_name;
    String index_name;
@ -111,10 +112,10 @@ struct ReplicatedMergeTreeLogEntryData
    /// Version of metadata which will be set after this alter
    /// Also present in MUTATE_PART command, to track mutations
    /// required for complete alter execution.
-    int alter_version; /// May be equal to -1, if it's normal mutation, not metadata update.
+    int alter_version = -1; /// May be equal to -1, if it's normal mutation, not metadata update.

    /// only ALTER METADATA command
-    bool have_mutation; /// If this alter requires additional mutation step, for data update
+    bool have_mutation = false; /// If this alter requires additional mutation step, for data update

    String columns_str; /// New columns data corresponding to alter_version
    String metadata_str; /// New metadata corresponding to alter_version
--- a/src/Storages/MergeTree/tests/gtest_ReplicatedMergeTreeLogEntry.cpp
+++ b/src/Storages/MergeTree/tests/gtest_ReplicatedMergeTreeLogEntry.cpp
@ -0,0 +1,348 @@
+#include <Storages/MergeTree/ReplicatedMergeTreeLogEntry.h>
+
+#include <IO/ReadBufferFromString.h>
+
+#include <Core/iostream_debug_helpers.h>
+
+#include <type_traits>
+#include <regex>
+
+#include <gtest/gtest.h>
+
+namespace DB
+{
+std::ostream & operator<<(std::ostream & ostr, const MergeTreeDataPartType & type)
+{
+    return ostr << type.toString();
+}
+
+std::ostream & operator<<(std::ostream & ostr, const UInt128 & v)
+{
+    return ostr << v.toHexString();
+}
+
+template <typename T, typename Tag>
+std::ostream & operator<<(std::ostream & ostr, const StrongTypedef<T, Tag> & v)
+{
+    return ostr << v.toUnderType();
+}
+
+std::ostream & operator<<(std::ostream & ostr, const MergeType & v)
+{
+    return ostr << toString(v);
+}
+
+}
+
+namespace std
+{
+
+std::ostream & operator<<(std::ostream & ostr, const std::exception_ptr & exception)
+{
+    try
+    {
+        if (exception)
+        {
+            std::rethrow_exception(exception);
+        }
+        return ostr << "<NULL EXCEPTION>";
+    }
+    catch (const std::exception& e)
+    {
+        return ostr << e.what();
+    }
+}
+
+template <typename T>
+inline std::ostream& operator<<(std::ostream & ostr, const std::vector<T> & v)
+{
+    ostr << "[";
+    for (size_t i = 0; i < v.size(); ++i)
+    {
+        ostr << i;
+        if (i != v.size() - 1)
+            ostr << ", ";
+    }
+    return ostr << "] (" << v.size() << ") items";
+}
+
+}
+
+namespace
+{
+using namespace DB;
+
+template <typename T>
+void compareAttributes(::testing::AssertionResult & result, const char * name, const T & expected_value, const T & actual_value);
+
+#define CMP_ATTRIBUTE(attribute) compareAttributes(result, #attribute, expected.attribute, actual.attribute)
+
+::testing::AssertionResult compare(
+        const ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry & expected,
+        const ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry & actual)
+{
+    auto result = ::testing::AssertionSuccess();
+
+    CMP_ATTRIBUTE(drop_range_part_name);
+    CMP_ATTRIBUTE(from_database);
+    CMP_ATTRIBUTE(from_table);
+    CMP_ATTRIBUTE(src_part_names);
+    CMP_ATTRIBUTE(new_part_names);
+    CMP_ATTRIBUTE(part_names_checksums);
+    CMP_ATTRIBUTE(columns_version);
+
+    return result;
+}
+
+template <typename T>
+bool compare(const T & expected, const T & actual)
+{
+    return expected == actual;
+}
+
+template <typename T>
+::testing::AssertionResult compare(const std::shared_ptr<T> & expected, const std::shared_ptr<T> & actual)
+{
+    if (!!expected != !!actual)
+        return ::testing::AssertionFailure()
+                << "expected : " << static_cast<const void*>(expected.get())
+                << "\nactual   : " << static_cast<const void*>(actual.get());
+
+    if (expected && actual)
+        return compare(*expected, *actual);
+
+    return ::testing::AssertionSuccess();
+}
+
+template <typename T>
+void compareAttributes(::testing::AssertionResult & result, const char * name, const T & expected_value, const T & actual_value)
+{
+    const auto cmp_result = compare(expected_value, actual_value);
+    if (cmp_result == false)
+    {
+        if (result)
+            result = ::testing::AssertionFailure();
+
+        result << "\nMismatching attribute: \"" << name << "\"";
+        if constexpr (std::is_same_v<std::decay_t<decltype(cmp_result)>, ::testing::AssertionResult>)
+            result << "\n" << cmp_result.message();
+        else
+            result << "\n\texpected: " << expected_value
+                   << "\n\tactual  : " << actual_value;
+    }
+};
+
+::testing::AssertionResult compare(const ReplicatedMergeTreeLogEntryData & expected, const ReplicatedMergeTreeLogEntryData & actual)
+{
+    ::testing::AssertionResult result = ::testing::AssertionSuccess();
+
+    CMP_ATTRIBUTE(znode_name);
+    CMP_ATTRIBUTE(type);
+    CMP_ATTRIBUTE(source_replica);
+    CMP_ATTRIBUTE(new_part_name);
+    CMP_ATTRIBUTE(new_part_type);
+    CMP_ATTRIBUTE(block_id);
+    CMP_ATTRIBUTE(actual_new_part_name);
+    CMP_ATTRIBUTE(new_part_uuid);
+    CMP_ATTRIBUTE(source_parts);
+    CMP_ATTRIBUTE(deduplicate);
+    CMP_ATTRIBUTE(deduplicate_by_columns);
+    CMP_ATTRIBUTE(merge_type);
+    CMP_ATTRIBUTE(column_name);
+    CMP_ATTRIBUTE(index_name);
+    CMP_ATTRIBUTE(detach);
+    CMP_ATTRIBUTE(replace_range_entry);
+    CMP_ATTRIBUTE(alter_version);
+    CMP_ATTRIBUTE(have_mutation);
+    CMP_ATTRIBUTE(columns_str);
+    CMP_ATTRIBUTE(metadata_str);
+    CMP_ATTRIBUTE(currently_executing);
+    CMP_ATTRIBUTE(removed_by_other_entry);
+    CMP_ATTRIBUTE(num_tries);
+    CMP_ATTRIBUTE(exception);
+    CMP_ATTRIBUTE(last_attempt_time);
+    CMP_ATTRIBUTE(num_postponed);
+    CMP_ATTRIBUTE(postpone_reason);
+    CMP_ATTRIBUTE(last_postpone_time);
+    CMP_ATTRIBUTE(create_time);
+    CMP_ATTRIBUTE(quorum);
+
+    return result;
+}
+}
+
+
+class ReplicatedMergeTreeLogEntryDataTest : public ::testing::TestWithParam<std::tuple<ReplicatedMergeTreeLogEntryData, const char* /* serialized RE*/>>
+{};
+
+TEST_P(ReplicatedMergeTreeLogEntryDataTest, transcode)
+{
+    const auto & [expected, match_regex] = GetParam();
+    const auto str = expected.toString();
+
+    if (match_regex)
+    {
+        try
+        {
+            // egrep since "." matches newline and we can also use "\n" explicitly
+            std::regex re(match_regex, std::regex::egrep);
+            EXPECT_TRUE(std::regex_match(str, re))
+                    << "Failed to match serialized ReplicatedMergeTreeLogEntryData: {\n"
+                    << str << "} \nwith regex: \"" << match_regex << "\"\n";
+        }
+        catch (const std::regex_error &e)
+        {
+            FAIL() << e.what()
+                   << " on regex: " << match_regex
+                   << " (" << strlen(match_regex) << " bytes)" << std::endl;
+        }
+        catch (...)
+        {
+            throw;
+        }
+    }
+
+    ReplicatedMergeTreeLogEntryData actual;
+    {
+        DB::ReadBufferFromString buffer(str);
+        EXPECT_NO_THROW(actual.readText(buffer)) << "While reading:\n" << str;
+    }
+
+    ASSERT_TRUE(compare(expected, actual)) << "Via text:\n" << str;
+}
+
+// Enabling this warning would ruin test brievity without adding anything else in return,
+// since most of the fields have default constructors or be will be zero-initialized as by standard,
+// so values are predicatable and stable accross runs.
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmissing-field-initializers"
+
+INSTANTIATE_TEST_SUITE_P(Merge, ReplicatedMergeTreeLogEntryDataTest,
+        ::testing::ValuesIn(std::initializer_list<std::tuple<ReplicatedMergeTreeLogEntryData, const char*>>{
+    {
+        {
+            // Basic: minimal set of attributes.
+            .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+            .new_part_type = MergeTreeDataPartType::WIDE,
+            .create_time = 123, // 0 means 'now' which could cause flaky tests.
+        },
+        R"re(^format version: 4.+merge.+into.+deduplicate: 0.+$)re"
+    },
+    {
+        {
+            .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+            .new_part_type = MergeTreeDataPartType::WIDE,
+
+            // Format version 4
+            .deduplicate = true,
+
+            .create_time = 123,
+        },
+        R"re(^format version: 4.+merge.+into.+deduplicate: 1.+$)re"
+    },
+    {
+        {
+            .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+            .new_part_type = MergeTreeDataPartType::WIDE,
+
+            // Format version 5
+            .new_part_uuid = UUID(UInt128(123456789, 10111213141516)),
+
+            .create_time = 123,
+        },
+        R"re(^format version: 5.+merge.+into.+deduplicate: 0.+into_uuid: 00000000-075b-cd15-0000-093233447e0c.+$)re"
+    },
+    {
+        {
+            .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+            .new_part_type = MergeTreeDataPartType::WIDE,
+
+            // Format version 6
+            .deduplicate = true,
+            .deduplicate_by_columns = {"foo", "bar", "qux"},
+
+            .create_time = 123,
+        },
+        R"re(^format version: 6.+merge.+into.+deduplicate: 1.+deduplicate_by_columns: 'foo','bar','qux'.*$)re"
+    },
+    {
+        {
+            .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+            .new_part_type = MergeTreeDataPartType::WIDE,
+
+            // Mixing features
+            .new_part_uuid = UUID(UInt128(123456789, 10111213141516)),
+            .deduplicate = true,
+            .deduplicate_by_columns = {"foo", "bar", "qux"},
+
+            .create_time = 123,
+        },
+        R"re(^format version: 6.+merge.+into.+deduplicate: 1.+into_uuid: 00000000-075b-cd15-0000-093233447e0c.+deduplicate_by_columns: 'foo','bar','qux'.*$)re"
+    },
+    {
+        // Validate that exotic column names are serialized/deserialized properly
+        {
+            .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+            .new_part_type = MergeTreeDataPartType::WIDE,
+
+            // Mixing features
+            .new_part_uuid = UUID(UInt128(123456789, 10111213141516)),
+            .deduplicate = true,
+            .deduplicate_by_columns = {"name with space", "\"column\"", "'column'", "колонка", "\u30ab\u30e9\u30e0", "\x01\x03 column \x10\x11\x12"},
+
+            .create_time = 123,
+        },
+        R"re(^format version: 6.+merge.+deduplicate_by_columns: 'name with space','"column"','\\'column\\'','колонка')re"
+                ",'\u30ab\u30e9\u30e0','\x01\x03 column \x10\x11\x12'.*$"
+    },
+}));
+
+#pragma GCC diagnostic pop
+
+// This is just an example of how to set all fields. Can't be used as is since depending on type,
+// only some fields are serialized/deserialized, and even if everything works perfectly,
+// some fileds in deserialized object would be unset (hence differ from expected).
+// INSTANTIATE_TEST_SUITE_P(Full, ReplicatedMergeTreeLogEntryDataTest,
+//         ::testing::ValuesIn(std::initializer_list<ReplicatedMergeTreeLogEntryData>{
+//     {
+//         .znode_name = "znode name",
+//         .type = ReplicatedMergeTreeLogEntryData::MERGE_PARTS,
+//         .source_replica = "source replica",
+//         .new_part_name = "new part name",
+//         .new_part_type = MergeTreeDataPartType::WIDE,
+//         .block_id = "block id",
+//         .actual_new_part_name = "new part name",
+//         .new_part_uuid = UUID(UInt128(123456789, 10111213141516)),
+//         .source_parts = {"part1", "part2"},
+//         .deduplicate = true,
+//         .deduplicate_by_columns = {"col1", "col2"},
+//         .merge_type = MergeType::REGULAR,
+//         .column_name = "column name",
+//         .index_name = "index name",
+//         .detach = false,
+//         .replace_range_entry = std::make_shared<ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry>(
+//             ReplicatedMergeTreeLogEntryData::ReplaceRangeEntry
+//             {
+//                 .drop_range_part_name = "drop range part name",
+//                 .from_database = "from database",
+//                 .src_part_names = {"src part name1", "src part name2"},
+//                 .new_part_names = {"new part name1", "new part name2"},
+//                 .columns_version = 123456,
+//             }),
+//         .alter_version = 56789,
+//         .have_mutation = false,
+//         .columns_str = "columns str",
+//         .metadata_str = "metadata str",
+//         // Those attributes are not serialized to string, hence it makes no sense to set.
+//         // .currently_executing
+//         // .removed_by_other_entry
+//         // .num_tries
+//         // .exception
+//         // .last_attempt_time
+//         // .num_postponed
+//         // .postpone_reason
+//         // .last_postpone_time,
+//         .create_time = static_cast<time_t>(123456789),
+//         .quorum = 321,
+//     },
+// }));
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@ -386,16 +386,17 @@ static void appendBlock(const Block & from, Block & to)

    MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;

+    MutableColumnPtr last_col;
    try
    {
        for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
        {
            const IColumn & col_from = *from.getByPosition(column_no).column.get();
-            MutableColumnPtr col_to = IColumn::mutate(std::move(to.getByPosition(column_no).column));
+            last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));

-            col_to->insertRangeFrom(col_from, 0, rows);
+            last_col->insertRangeFrom(col_from, 0, rows);

-            to.getByPosition(column_no).column = std::move(col_to);
+            to.getByPosition(column_no).column = std::move(last_col);
        }
    }
    catch (...)
@ -406,6 +407,16 @@ static void appendBlock(const Block & from, Block & to)
            for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
            {
                ColumnPtr & col_to = to.getByPosition(column_no).column;
+                /// If there is no column, then the exception was thrown in the middle of append, in the insertRangeFrom()
+                if (!col_to)
+                {
+                    col_to = std::move(last_col);
+                    /// Suppress clang-tidy [bugprone-use-after-move]
+                    last_col = {};
+                }
+                /// But if there is still nothing, abort
+                if (!col_to)
+                    throw Exception("No column to rollback", ErrorCodes::LOGICAL_ERROR);
                if (col_to->size() != old_rows)
                    col_to = col_to->cut(0, old_rows);
            }
@ -583,7 +594,7 @@ void StorageBuffer::shutdown()

    try
    {
-        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, global_context);
+        optimize(nullptr /*query*/, getInMemoryMetadataPtr(), {} /*partition*/, false /*final*/, false /*deduplicate*/, {}, global_context);
    }
    catch (...)
    {
@ -608,6 +619,7 @@ bool StorageBuffer::optimize(
    const ASTPtr & partition,
    bool final,
    bool deduplicate,
+    const Names & /* deduplicate_by_columns */,
    const Context & /*context*/)
 {
    if (partition)
@ -906,7 +918,7 @@ void StorageBuffer::alter(const AlterCommands & params, const Context & context,
    /// Flush all buffers to storages, so that no non-empty blocks of the old
    /// structure remain. Structure of empty blocks will be updated during first
    /// insert.
-    optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, context);
+    optimize({} /*query*/, metadata_snapshot, {} /*partition_id*/, false /*final*/, false /*deduplicate*/, {}, context);

    StorageInMemoryMetadata new_metadata = *metadata_snapshot;
    params.apply(new_metadata, context);
--- a/src/Storages/StorageBuffer.h
+++ b/src/Storages/StorageBuffer.h
@ -87,6 +87,7 @@ public:
        const ASTPtr & partition,
        bool final,
        bool deduplicate,
+        const Names & deduplicate_by_columns,
        const Context & context) override;

    bool supportsSampling() const override { return true; }
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@ -233,12 +233,13 @@ bool StorageMaterializedView::optimize(
    const ASTPtr & partition,
    bool final,
    bool deduplicate,
+    const Names & deduplicate_by_columns,
    const Context & context)
 {
    checkStatementCanBeForwarded();
    auto storage_ptr = getTargetTable();
    auto metadata_snapshot = storage_ptr->getInMemoryMetadataPtr();
-    return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, context);
+    return getTargetTable()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context);
 }

 void StorageMaterializedView::alter(
--- a/src/Storages/StorageMaterializedView.h
+++ b/src/Storages/StorageMaterializedView.h
@ -46,6 +46,7 @@ public:
        const ASTPtr & partition,
        bool final,
        bool deduplicate,
+        const Names & deduplicate_by_columns,
        const Context & context) override;

    void alter(const AlterCommands & params, const Context & context, TableLockHolder & table_lock_holder) override;
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@ -741,6 +741,7 @@ bool StorageMergeTree::merge(
    const String & partition_id,
    bool final,
    bool deduplicate,
+    const Names & deduplicate_by_columns,
    String * out_disable_reason,
    bool optimize_skip_merged_partitions)
 {
@ -758,10 +759,15 @@ bool StorageMergeTree::merge(
    if (!merge_mutate_entry)
        return false;

-    return mergeSelectedParts(metadata_snapshot, deduplicate, *merge_mutate_entry, table_lock_holder);
+    return mergeSelectedParts(metadata_snapshot, deduplicate, deduplicate_by_columns, *merge_mutate_entry, table_lock_holder);
 }

-bool StorageMergeTree::mergeSelectedParts(const StorageMetadataPtr & metadata_snapshot, bool deduplicate, MergeMutateSelectedEntry & merge_mutate_entry, TableLockHolder & table_lock_holder)
+bool StorageMergeTree::mergeSelectedParts(
+    const StorageMetadataPtr & metadata_snapshot,
+    bool deduplicate,
+    const Names & deduplicate_by_columns,
+    MergeMutateSelectedEntry & merge_mutate_entry,
+    TableLockHolder & table_lock_holder)
 {
    auto & future_part = merge_mutate_entry.future_part;
    Stopwatch stopwatch;
@ -786,7 +792,7 @@ bool StorageMergeTree::mergeSelectedParts(const StorageMetadataPtr & metadata_sn
    {
        new_part = merger_mutator.mergePartsToTemporaryPart(
            future_part, metadata_snapshot, *(merge_list_entry), table_lock_holder, time(nullptr),
-            global_context, merge_mutate_entry.tagger->reserved_space, deduplicate);
+            global_context, merge_mutate_entry.tagger->reserved_space, deduplicate, deduplicate_by_columns);

        merger_mutator.renameMergedTemporaryPart(new_part, future_part.parts, nullptr);
        write_part_log({});
@ -953,7 +959,7 @@ std::optional<JobAndPool> StorageMergeTree::getDataProcessingJob()
        return JobAndPool{[this, metadata_snapshot, merge_entry, mutate_entry, share_lock] () mutable
        {
            if (merge_entry)
-                mergeSelectedParts(metadata_snapshot, false, *merge_entry, share_lock);
+                mergeSelectedParts(metadata_snapshot, false, {}, *merge_entry, share_lock);
            else if (mutate_entry)
                mutateSelectedPart(metadata_snapshot, *mutate_entry, share_lock);
        }, PoolType::MERGE_MUTATE};
@ -1036,8 +1042,17 @@ bool StorageMergeTree::optimize(
    const ASTPtr & partition,
    bool final,
    bool deduplicate,
+    const Names & deduplicate_by_columns,
    const Context & context)
 {
+    if (deduplicate)
+    {
+        if (deduplicate_by_columns.empty())
+            LOG_DEBUG(log, "DEDUPLICATE BY all columns");
+        else
+            LOG_DEBUG(log, "DEDUPLICATE BY ('{}')", fmt::join(deduplicate_by_columns, "', '"));
+    }
+
    String disable_reason;
    if (!partition && final)
    {
@ -1049,7 +1064,7 @@ bool StorageMergeTree::optimize(

        for (const String & partition_id : partition_ids)
        {
-            if (!merge(true, partition_id, true, deduplicate, &disable_reason, context.getSettingsRef().optimize_skip_merged_partitions))
+            if (!merge(true, partition_id, true, deduplicate, deduplicate_by_columns, &disable_reason, context.getSettingsRef().optimize_skip_merged_partitions))
            {
                constexpr const char * message = "Cannot OPTIMIZE table: {}";
                if (disable_reason.empty())
@ -1068,7 +1083,7 @@ bool StorageMergeTree::optimize(
        if (partition)
            partition_id = getPartitionIDFromQuery(partition, context);

-        if (!merge(true, partition_id, final, deduplicate, &disable_reason, context.getSettingsRef().optimize_skip_merged_partitions))
+        if (!merge(true, partition_id, final, deduplicate, deduplicate_by_columns, &disable_reason, context.getSettingsRef().optimize_skip_merged_partitions))
        {
            constexpr const char * message = "Cannot OPTIMIZE table: {}";
            if (disable_reason.empty())
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@ -70,6 +70,7 @@ public:
        const ASTPtr & partition,
        bool final,
        bool deduplicate,
+        const Names & deduplicate_by_columns,
        const Context & context) override;

    void mutate(const MutationCommands & commands, const Context & context) override;
@ -106,7 +107,7 @@ private:
    BackgroundMovesExecutor background_moves_executor;

    /// For block numbers.
-    SimpleIncrement increment{0};
+    SimpleIncrement increment;

    /// For clearOldParts, clearOldTemporaryDirectories.
    AtomicStopwatch time_after_previous_cleanup;
@ -132,7 +133,7 @@ private:
      * If aggressive - when selects parts don't takes into account their ratio size and novelty (used for OPTIMIZE query).
      * Returns true if merge is finished successfully.
      */
-    bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate, String * out_disable_reason = nullptr, bool optimize_skip_merged_partitions = false);
+    bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate, const Names & deduplicate_by_columns, String * out_disable_reason = nullptr, bool optimize_skip_merged_partitions = false);

    ActionLock stopMergesAndWait();

@ -183,7 +184,8 @@ private:
        TableLockHolder & table_lock_holder,
        bool optimize_skip_merged_partitions = false,
        SelectPartsDecision * select_decision_out = nullptr);
-    bool mergeSelectedParts(const StorageMetadataPtr & metadata_snapshot, bool deduplicate, MergeMutateSelectedEntry & entry, TableLockHolder & table_lock_holder);
+
+    bool mergeSelectedParts(const StorageMetadataPtr & metadata_snapshot, bool deduplicate, const Names & deduplicate_by_columns, MergeMutateSelectedEntry & entry, TableLockHolder & table_lock_holder);

    std::shared_ptr<MergeMutateSelectedEntry> selectPartsToMutate(const StorageMetadataPtr & metadata_snapshot, String * disable_reason, TableLockHolder & table_lock_holder);
    bool mutateSelectedPart(const StorageMetadataPtr & metadata_snapshot, MergeMutateSelectedEntry & entry, TableLockHolder & table_lock_holder);
--- a/src/Storages/StorageProxy.h
+++ b/src/Storages/StorageProxy.h
@ -122,9 +122,10 @@ public:
            const ASTPtr & partition,
            bool final,
            bool deduplicate,
+            const Names & deduplicate_by_columns,
            const Context & context) override
    {
-        return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, context);
+        return getNested()->optimize(query, metadata_snapshot, partition, final, deduplicate, deduplicate_by_columns, context);
    }

    void mutate(const MutationCommands & commands, const Context & context) override { getNested()->mutate(commands, context); }
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@ -1508,7 +1508,7 @@ bool StorageReplicatedMergeTree::tryExecuteMerge(const LogEntry & entry)
    {
        part = merger_mutator.mergePartsToTemporaryPart(
            future_merged_part, metadata_snapshot, *merge_entry,
-            table_lock, entry.create_time, global_context, reserved_space, entry.deduplicate);
+            table_lock, entry.create_time, global_context, reserved_space, entry.deduplicate, entry.deduplicate_by_columns);

        merger_mutator.renameMergedTemporaryPart(part, parts, &transaction);

@ -2712,6 +2712,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask()

    const auto storage_settings_ptr = getSettings();
    const bool deduplicate = false; /// TODO: read deduplicate option from table config
+    const Names deduplicate_by_columns = {};
    CreateMergeEntryResult create_result = CreateMergeEntryResult::Other;

    try
@ -2762,6 +2763,7 @@ void StorageReplicatedMergeTree::mergeSelectingTask()
                    future_merged_part.uuid,
                    future_merged_part.type,
                    deduplicate,
+                    deduplicate_by_columns,
                    nullptr,
                    merge_pred.getVersion(),
                    future_merged_part.merge_type);
@ -2851,6 +2853,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
    const UUID & merged_part_uuid,
    const MergeTreeDataPartType & merged_part_type,
    bool deduplicate,
+    const Names & deduplicate_by_columns,
    ReplicatedMergeTreeLogEntryData * out_log_entry,
    int32_t log_version,
    MergeType merge_type)
@ -2888,6 +2891,7 @@ StorageReplicatedMergeTree::CreateMergeEntryResult StorageReplicatedMergeTree::c
    entry.new_part_type = merged_part_type;
    entry.merge_type = merge_type;
    entry.deduplicate = deduplicate;
+    entry.deduplicate_by_columns = deduplicate_by_columns;
    entry.merge_type = merge_type;
    entry.create_time = time(nullptr);

@ -3862,6 +3866,7 @@ BlockOutputStreamPtr StorageReplicatedMergeTree::write(const ASTPtr & /*query*/,
    const Settings & query_settings = context.getSettingsRef();
    bool deduplicate = storage_settings_ptr->replicated_deduplication_window != 0 && query_settings.insert_deduplicate;

+    // TODO: should we also somehow pass list of columns to deduplicate on to the ReplicatedMergeTreeBlockOutputStream ?
    return std::make_shared<ReplicatedMergeTreeBlockOutputStream>(
        *this, metadata_snapshot, query_settings.insert_quorum,
        query_settings.insert_quorum_timeout.totalMilliseconds(),
@ -3878,6 +3883,7 @@ bool StorageReplicatedMergeTree::optimize(
    const ASTPtr & partition,
    bool final,
    bool deduplicate,
+    const Names & deduplicate_by_columns,
    const Context & query_context)
 {
    assertNotReadonly();
@ -3935,7 +3941,8 @@ bool StorageReplicatedMergeTree::optimize(
                    ReplicatedMergeTreeLogEntryData merge_entry;
                    CreateMergeEntryResult create_result = createLogEntryToMergeParts(
                        zookeeper, future_merged_part.parts,
-                        future_merged_part.name, future_merged_part.uuid, future_merged_part.type, deduplicate,
+                        future_merged_part.name, future_merged_part.uuid, future_merged_part.type,
+                        deduplicate, deduplicate_by_columns,
                        &merge_entry, can_merge.getVersion(), future_merged_part.merge_type);

                    if (create_result == CreateMergeEntryResult::MissingPart)
@ -3996,7 +4003,8 @@ bool StorageReplicatedMergeTree::optimize(
                ReplicatedMergeTreeLogEntryData merge_entry;
                CreateMergeEntryResult create_result = createLogEntryToMergeParts(
                    zookeeper, future_merged_part.parts,
-                    future_merged_part.name, future_merged_part.uuid, future_merged_part.type, deduplicate,
+                    future_merged_part.name, future_merged_part.uuid, future_merged_part.type,
+                    deduplicate, deduplicate_by_columns,
                    &merge_entry, can_merge.getVersion(), future_merged_part.merge_type);

                if (create_result == CreateMergeEntryResult::MissingPart)
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@ -120,6 +120,7 @@ public:
        const ASTPtr & partition,
        bool final,
        bool deduplicate,
+        const Names & deduplicate_by_columns,
        const Context & query_context) override;

    void alter(const AlterCommands & commands, const Context & query_context, TableLockHolder & table_lock_holder) override;
@ -470,6 +471,7 @@ private:
        const UUID & merged_part_uuid,
        const MergeTreeDataPartType & merged_part_type,
        bool deduplicate,
+        const Names & deduplicate_by_columns,
        ReplicatedMergeTreeLogEntryData * out_log_entry,
        int32_t log_version,
        MergeType merge_type);
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@ -7,7 +7,7 @@ else ()
    include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
 endif ()

-install (PROGRAMS clickhouse-test clickhouse-test-server DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 install (
    DIRECTORY queries performance config
    DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test
@ -17,30 +17,6 @@ install (
    PATTERN ".gitignore" EXCLUDE
 )

-install (FILES server-test.xml DESTINATION  ${CLICKHOUSE_ETC_DIR}/clickhouse-server COMPONENT clickhouse)
-install (FILES client-test.xml DESTINATION  ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse)
-
-if (ENABLE_TESTS)
-    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CTestCustom.cmake ${ClickHouse_BINARY_DIR})
-
-    # maybe add --no-long ?
-    # if you want disable some tests: env TEST_OPT0='--skip compile'
-    if(TEST_CMAKE_PARALLEL)
-        # NUMBER_OF_LOGICAL_CORES
-        if (TEST_CMAKE_PARALLEL GREATER 1)
-            set(TOTAL_TESTS ${TEST_CMAKE_PARALLEL})
-        else()
-            set(TOTAL_TESTS ${NUMBER_OF_LOGICAL_CORES})
-        endif()
-        foreach(proc RANGE 1 ${TOTAL_TESTS})
-            add_test(NAME with_server${proc} COMMAND bash -c "env BUILD_DIR=${ClickHouse_BINARY_DIR} TEST_OPT0=--parallel=${proc}/${TOTAL_TESTS} ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse-test-server")
-        endforeach()
-    else()
-        add_test(NAME with_server COMMAND bash -c "env BUILD_DIR=${ClickHouse_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse-test-server")
-    endif()
-
-endif ()
-
 if (ENABLE_TEST_INTEGRATION)
    add_subdirectory (integration)
 endif ()
--- a/tests/CTestCustom.cmake
+++ b/tests/CTestCustom.cmake
@ -1,5 +0,0 @@
-set(CTEST_CUSTOM_TESTS_IGNORE
-  example
-  example64
-  capnp-heavy-tests-run
-)
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@ -417,6 +417,18 @@
                "with_coverage": false
            }
        },
+        "Stress test (debug)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "debug",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
        "Integration tests (asan)": {
            "required_build_properties": {
                "compiler": "clang-11",
--- a/tests/clickhouse-client.xml
+++ b/tests/clickhouse-client.xml
@ -1,3 +0,0 @@
-<yandex>
-    <insert_format_max_block_size>100000</insert_format_max_block_size>
-</yandex>
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@ -251,7 +251,7 @@ stop_time = None

 # def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total):
 def run_tests_array(all_tests_with_params):
-    all_tests, suite, suite_dir, suite_tmp_dir, run_total = all_tests_with_params
+    all_tests, suite, suite_dir, suite_tmp_dir = all_tests_with_params
    global exit_code
    global SERVER_DIED
    global stop_time
@ -688,7 +688,7 @@ def main(args):
                prefix, suffix = item.split('_', 1)

                try:
-                    return reverse * int(prefix), suffix
+                    return reverse * int(prefix)
                except ValueError:
                    return 99997

@ -698,6 +698,7 @@ def main(args):
                all_tests = [t for t in all_tests if any([re.search(r, t) for r in args.test])]
            all_tests.sort(key=key_func)

+            jobs = args.jobs
            parallel_tests = []
            sequential_tests = []
            for test in all_tests:
@ -706,35 +707,32 @@ def main(args):
                else:
                    parallel_tests.append(test)

-            print("Found", len(parallel_tests), "parallel tests and", len(sequential_tests), "sequential tests")
-            run_n, run_total = args.parallel.split('/')
-            run_n = float(run_n)
-            run_total = float(run_total)
-            tests_n = len(parallel_tests)
-            if run_total > tests_n:
-                run_total = tests_n
-            if run_n > run_total:
-                continue
+            if jobs > 1 and len(parallel_tests) > 0:
+                print("Found", len(parallel_tests), "parallel tests and", len(sequential_tests), "sequential tests")
+                run_n, run_total = args.parallel.split('/')
+                run_n = float(run_n)
+                run_total = float(run_total)
+                tests_n = len(parallel_tests)
+                if run_total > tests_n:
+                    run_total = tests_n

-            jobs = args.jobs
-            if jobs > tests_n:
-                jobs = tests_n
-            if jobs > run_total:
-                run_total = jobs
+                if jobs > tests_n:
+                    jobs = tests_n
+                if jobs > run_total:
+                    run_total = jobs

-            batch_size = len(parallel_tests) // jobs
-            parallel_tests_array = []
-            for i in range(0, len(parallel_tests), batch_size):
-                parallel_tests_array.append((parallel_tests[i:i+batch_size], suite, suite_dir, suite_tmp_dir, run_total))
+                batch_size = len(parallel_tests) // jobs
+                parallel_tests_array = []
+                for i in range(0, len(parallel_tests), batch_size):
+                    parallel_tests_array.append((parallel_tests[i:i+batch_size], suite, suite_dir, suite_tmp_dir))

-            if jobs > 1:
                with closing(multiprocessing.Pool(processes=jobs)) as pool:
                    pool.map(run_tests_array, parallel_tests_array)

-                run_tests_array((sequential_tests, suite, suite_dir, suite_tmp_dir, run_total))
+                run_tests_array((sequential_tests, suite, suite_dir, suite_tmp_dir))
                total_tests_run += len(sequential_tests) + len(parallel_tests)
            else:
-                run_tests_array((all_tests, suite, suite_dir, suite_tmp_dir, run_total))
+                run_tests_array((all_tests, suite, suite_dir, suite_tmp_dir))
                total_tests_run += len(all_tests)

    if args.hung_check:
--- a/tests/clickhouse-test-server
+++ b/tests/clickhouse-test-server
@ -1,166 +0,0 @@
-#!/usr/bin/env bash
-
-set -x
-set -o errexit
-set -o pipefail
-
-CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd)
-DATA_DIR=${DATA_DIR:=`mktemp -d /tmp/clickhouse.test..XXXXX`}
-DATA_DIR_PATTERN=${DATA_DIR_PATTERN:=/tmp/clickhouse} # path from config file, will be replaced to temporary
-LOG_DIR=${LOG_DIR:=$DATA_DIR/log}
-export CLICKHOUSE_BINARY_NAME=${CLICKHOUSE_BINARY_NAME:="clickhouse"}
-( [ -x "$ROOT_DIR/programs/${CLICKHOUSE_BINARY_NAME}-server" ] || [ -x "$ROOT_DIR/programs/${CLICKHOUSE_BINARY_NAME}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
-[ -d "$ROOT_DIR/build${BUILD_TYPE}" ] && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR/build${BUILD_TYPE}}
-BUILD_DIR=${BUILD_DIR:=$ROOT_DIR}
-[ -x ${CLICKHOUSE_BINARY_NAME}-server" ] && [ -x ${CLICKHOUSE_BINARY_NAME}-client" ] && BIN_DIR= # Allow run in /usr/bin
-( [ -x "$BUILD_DIR/programs/${CLICKHOUSE_BINARY_NAME}" ] || [ -x "$BUILD_DIR/programs/${CLICKHOUSE_BINARY_NAME}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/programs/}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} server}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} client}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} extract-from-config}
-
-[ -f "$CUR_DIR/server-test.xml" ] && CONFIG_DIR=${CONFIG_DIR=$CUR_DIR}/
-CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR=$CONFIG_DIR}
-CONFIG_SERVER_DIR=${CONFIG_SERVER_DIR=$CONFIG_DIR}
-[ ! -f "${CONFIG_CLIENT_DIR}client-test.xml" ] && CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR:=/etc/clickhouse-client/}
-[ ! -f "${CONFIG_SERVER_DIR}server-test.xml" ] && CONFIG_SERVER_DIR=${CONFIG_SERVER_DIR:=/etc/clickhouse-server/}
-export CLICKHOUSE_CONFIG_CLIENT=${CLICKHOUSE_CONFIG_CLIENT:=${CONFIG_CLIENT_DIR}client-test.xml}
-export CLICKHOUSE_CONFIG=${CLICKHOUSE_CONFIG:=${CONFIG_SERVER_DIR}server-test.xml}
-CLICKHOUSE_CONFIG_USERS=${CONFIG_SERVER_DIR}users.xml
-[ ! -f "$CLICKHOUSE_CONFIG_USERS" ] && CLICKHOUSE_CONFIG_USERS=$CUR_DIR/../programs/server/users.xml
-CLICKHOUSE_CONFIG_USERS_D=${CONFIG_SERVER_DIR}users.d
-[ ! -d "$CLICKHOUSE_CONFIG_USERS_D" ] && CLICKHOUSE_CONFIG_USERS_D=$CUR_DIR/../programs/server/users.d
-[ -x "$CUR_DIR/clickhouse-test" ] && TEST_DIR=${TEST_DIR=$CUR_DIR/}
-[ -d "$CUR_DIR/queries" ] && QUERIES_DIR=${QUERIES_DIR=$CUR_DIR/queries}
-[ ! -d "$QUERIES_DIR" ] && [ -d "/usr/local/share/clickhouse-test/queries" ] && QUERIES_DIR=${QUERIES_DIR=/usr/local/share/clickhouse-test/queries}
-[ ! -d "$QUERIES_DIR" ] && [ -d "/usr/share/clickhouse-test/queries" ] && QUERIES_DIR=${QUERIES_DIR=/usr/share/clickhouse-test/queries}
-
-TEST_PORT_RANDOM=${TEST_PORT_RANDOM=1}
-if [ "${TEST_PORT_RANDOM}" ]; then
-    CLICKHOUSE_PORT_BASE=${CLICKHOUSE_PORT_BASE:=$(( ( RANDOM % 50000 ) + 10000 ))}
-    CLICKHOUSE_PORT_TCP=${CLICKHOUSE_PORT_TCP:=$(($CLICKHOUSE_PORT_BASE + 1))}
-    CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:=$(($CLICKHOUSE_PORT_BASE + 2))}
-    CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=$(($CLICKHOUSE_PORT_BASE + 3))}
-    CLICKHOUSE_PORT_TCP_SECURE=${CLICKHOUSE_PORT_TCP_SECURE:=$(($CLICKHOUSE_PORT_BASE + 4))}
-    CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=$(($CLICKHOUSE_PORT_BASE + 5))}
-    CLICKHOUSE_PORT_ODBC_BRIDGE=${CLICKHOUSE_ODBC_BRIDGE:=$(($CLICKHOUSE_PORT_BASE + 6))}
-fi
-
-rm -rf $DATA_DIR || true
-mkdir -p $LOG_DIR $DATA_DIR/etc || true
-
-if [ "$DATA_DIR_PATTERN" != "$DATA_DIR" ]; then
-    cat $CLICKHOUSE_CONFIG | sed -e s!$DATA_DIR_PATTERN!$DATA_DIR! > $DATA_DIR/etc/server-config.xml
-    export CLICKHOUSE_CONFIG=$DATA_DIR/etc/server-config.xml
-    cp $CLICKHOUSE_CONFIG_USERS $DATA_DIR/etc
-    cp -R -L $CLICKHOUSE_CONFIG_USERS_D $DATA_DIR/etc
-    cat ${CONFIG_SERVER_DIR}/ints_dictionary.xml | sed -e s!9000!$CLICKHOUSE_PORT_TCP! > $DATA_DIR/etc/ints_dictionary.xml
-    cat ${CONFIG_SERVER_DIR}/strings_dictionary.xml | sed -e s!9000!$CLICKHOUSE_PORT_TCP! > $DATA_DIR/etc/strings_dictionary.xml
-    cat ${CONFIG_SERVER_DIR}/decimals_dictionary.xml | sed -e s!9000!$CLICKHOUSE_PORT_TCP! > $DATA_DIR/etc/decimals_dictionary.xml
-fi
-
-CLICKHOUSE_EXTRACT_CONFIG=${CLICKHOUSE_EXTRACT_CONFIG:="${CLICKHOUSE_EXTRACT} --config=$CLICKHOUSE_CONFIG"}
-CLICKHOUSE_LOG=${CLICKHOUSE_LOG:=${LOG_DIR}clickhouse-server.log}
-export CLICKHOUSE_PORT_TCP=${CLICKHOUSE_PORT_TCP:=`$CLICKHOUSE_EXTRACT_CONFIG --key=tcp_port || echo 9000`}
-export CLICKHOUSE_PORT_HTTP=${CLICKHOUSE_PORT_HTTP:=`$CLICKHOUSE_EXTRACT_CONFIG --key=http_port || echo 8123`}
-export CLICKHOUSE_PORT_INTERSERVER=${CLICKHOUSE_PORT_INTERSERVER:=`$CLICKHOUSE_EXTRACT_CONFIG --key=interserver_http_port || echo 9009`}
-export CLICKHOUSE_PORT_TCP_SECURE=${CLICKHOUSE_PORT_TCP_SECURE:=`$CLICKHOUSE_EXTRACT_CONFIG --key=tcp_port_secure`}
-export CLICKHOUSE_PORT_HTTPS=${CLICKHOUSE_PORT_HTTPS:=`$CLICKHOUSE_EXTRACT_CONFIG --key=https_port`}
-export CLICKHOUSE_ODBC_BRIDGE=${CLICKHOUSE_ODBC_BRIDGE:=`$CLICKHOUSE_EXTRACT_CONFIG --key=odbc_bridge.port || echo 9018`}
-
-DHPARAM=`$CLICKHOUSE_EXTRACT_CONFIG --key=openSSL.server.dhParamsFile`
-PRIVATEKEY=`$CLICKHOUSE_EXTRACT_CONFIG --key=openSSL.server.privateKeyFile`
-CERT=`$CLICKHOUSE_EXTRACT_CONFIG --key=openSSL.server.certificateFile`
-# Do not generate in case broken extract-config
-[ -n "$DHPARAM" ] && openssl dhparam -out $DHPARAM 256
-[ -n "$PRIVATEKEY" ] && [ -n "$CERT" ] && openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout $PRIVATEKEY -out $CERT
-
-if [ "$TEST_GDB" ] || [ "$GDB" ]; then
-    echo -e "run \nset pagination off \nset logging file $LOG_DIR/server.gdb.log \nset logging on \nbacktrace \nthread apply all backtrace \nbacktrace \ndetach \nquit " > $DATA_DIR/gdb.cmd
-    GDB=${GDB:="gdb -x $DATA_DIR/gdb.cmd --args "}
-fi
-
-# Start a local clickhouse server which will be used to run tests
-
-# TODO: fix change shard ports:
-#    --remote_servers.test_shard_localhost_secure.shard.replica.port=$CLICKHOUSE_PORT_TCP_SECURE \
-#    --remote_servers.test_shard_localhost.shard.replica.port=$CLICKHOUSE_PORT_TCP \
-
-VERSION=`$CLICKHOUSE_CLIENT --version-clean`
-# If run from compile dir - use in-place compile binary and headers
-[ -n "$BIN_DIR" ] && INTERNAL_COMPILER_PARAMS="--compiler_executable_root=${INTERNAL_COMPILER_BIN_ROOT:=$BUILD_DIR/programs/} --compiler_headers=$BUILD_DIR/programs/clang/headers/$VERSION/ --compiler_headers_root=$BUILD_DIR/programs/clang/headers/$VERSION/"
-
-$GDB $CLICKHOUSE_SERVER --config-file=$CLICKHOUSE_CONFIG --log=$CLICKHOUSE_LOG $TEST_SERVER_PARAMS -- \
-    --http_port=$CLICKHOUSE_PORT_HTTP \
-    --tcp_port=$CLICKHOUSE_PORT_TCP \
-    --https_port=$CLICKHOUSE_PORT_HTTPS \
-    --tcp_port_secure=$CLICKHOUSE_PORT_TCP_SECURE \
-    --interserver_http_port=$CLICKHOUSE_PORT_INTERSERVER \
-    --odbc_bridge.port=$CLICKHOUSE_ODBC_BRIDGE \
-    $INTERNAL_COMPILER_PARAMS \
-    $TEST_SERVER_CONFIG_PARAMS \
-    2>&1 > $LOG_DIR/server.stdout.log &
-CH_PID=$!
-sleep ${TEST_SERVER_STARTUP_WAIT:=5}
-
-if [ "$GDB" ]; then
-    # Long symbols read
-    sleep ${TEST_GDB_SLEEP:=60}
-fi
-
-tail -n50 $LOG_DIR/*.log || true
-
-# Define needed stuff to kill test clickhouse server after tests completion
-function finish {
-    kill $CH_PID || true
-    wait
-    tail -n 50 $LOG_DIR/*.log || true
-    if [ "$GDB" ]; then
-        cat $LOG_DIR/server.gdb.log || true
-    fi
-    rm -rf $DATA_DIR
-}
-trap finish EXIT SIGINT SIGQUIT SIGTERM
-
-# Do tests
-if [ -n "$*" ]; then
-    $*
-else
-    TEST_RUN=${TEST_RUN=1}
-    TEST_DICT=${TEST_DICT=1}
-    CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q"
-    $CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;'
-    CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --force-color --binary ${BIN_DIR}${CLICKHOUSE_BINARY_NAME} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
-    if [ "${TEST_RUN_STRESS}" ]; then
-        # Running test in parallel will fail some results (tests can create/fill/drop same tables)
-        TEST_NPROC=${TEST_NPROC:=$(( `nproc || sysctl -n hw.ncpu || echo 2` * 2))}
-        for i in `seq 1 ${TEST_NPROC}`; do
-             $CLICKHOUSE_TEST --order=random --testname --tmp=$DATA_DIR/tmp/tmp${i} &
-        done
-    fi
-
-    if [ "${TEST_RUN_PARALLEL}" ]; then
-        # Running test in parallel will fail some results (tests can create/fill/drop same tables)
-        TEST_NPROC=${TEST_NPROC:=$(( `nproc || sysctl -n hw.ncpu || echo 2` * 2))}
-        for i in `seq 1 ${TEST_NPROC}`; do
-             $CLICKHOUSE_TEST --testname --tmp=$DATA_DIR/tmp/tmp${i} --database=test${i} --parallel=${i}/${TEST_NPROC} &
-        done
-        for job in `jobs -p`; do
-            #echo wait $job
-            wait $job || let "FAIL+=1"
-        done
-
-        #echo $FAIL
-        if [ "$FAIL" != "0" ]; then
-            return $FAIL
-        fi
-    else
-        ( [ "$TEST_RUN" ] && $CLICKHOUSE_TEST ) || ${TEST_TRUE:=false}
-    fi
-
-    $CLICKHOUSE_CLIENT_QUERY "SELECT event, value FROM system.events; SELECT metric, value FROM system.metrics; SELECT metric, value FROM system.asynchronous_metrics;"
-    $CLICKHOUSE_CLIENT_QUERY "SELECT 'Still alive'"
-fi
--- a/tests/client-test.xml
+++ b/tests/client-test.xml
@ -1,17 +0,0 @@
-<!-- Config for connecting to test server -->
-<yandex>
-    <tcp_port>59000</tcp_port>
-    <tcp_port_secure>59440</tcp_port_secure>
-    <openSSL>
-        <client>
-            <loadDefaultCAFile>true</loadDefaultCAFile>
-            <cacheSessions>true</cacheSessions>
-            <disableProtocols>sslv2,sslv3</disableProtocols>
-            <preferServerCiphers>true</preferServerCiphers>
-            <verificationMode>none</verificationMode>
-            <invalidCertificateHandler>
-                <name>AcceptCertificateHandler</name>
-            </invalidCertificateHandler>
-        </client>
-    </openSSL>
-</yandex>
--- a/tests/performance/ColumnMap.xml
+++ b/tests/performance/ColumnMap.xml
@ -0,0 +1,36 @@
+<test>
+
+    <settings>
+        <allow_experimental_map_type>1</allow_experimental_map_type>
+    </settings>
+
+    <substitutions>
+        <substitution>
+            <name>key_suffix</name>
+            <values>
+                <value>''</value>
+                <value>'-miss'</value>
+            </values>
+        </substitution>
+    </substitutions>
+
+    <create_query>
+CREATE TABLE column_map_test
+ENGINE = MergeTree ORDER BY number
+AS
+SELECT number, map
+FROM
+(
+    SELECT
+        number,
+        arrayMap(x -> toString(x), range(100)) AS keys,
+        arrayMap(x -> toString(x * x), range(100)) AS values,
+        cast((keys, values), 'Map(String, String)') AS map
+    FROM numbers(10000)
+)
+    </create_query>
+
+    <query>SELECT count() FROM column_map_test WHERE NOT ignore(arrayMap(x -> map[CONCAT(toString(x), {key_suffix})], range(0, 100, 10)))</query>
+
+    <drop_query>DROP TABLE IF EXISTS column_map_test</drop_query>
+</test>
--- a/tests/queries/0_stateless/00900_parquet_create_table_columns.pl
+++ b/tests/queries/0_stateless/00900_parquet_create_table_columns.pl
@ -1,54 +0,0 @@
-#!/usr/bin/env perl
-package parquet_create_table_columns;
-use strict;
-no warnings 'experimental';
-use feature 'signatures';
-use JSON::XS;
-#use Data::Dumper;
-
-sub file_read($file) {
-    open my $f, '<', $file or return;
-    local $/ = undef;
-    my $ret = <$f>;
-    close $f;
-    return $ret;
-}
-
-our $type_parquet_logical_to_clickhouse = {
-    DECIMAL    => 'Decimal128(1)',
-    TIMESTAMP_MICROS => 'DateTime',
-    TIMESTAMP_MILLIS => 'DateTime',
-};
-our $type_parquet_physical_to_clickhouse = {
-    BOOLEAN    => 'UInt8',
-    INT32      => 'Int32',
-    INT64      => 'Int64',
-    FLOAT      => 'Float32',
-    DOUBLE     => 'Float64',
-    BYTE_ARRAY => 'String',
-    FIXED_LEN_BYTE_ARRAY => 'String', # Maybe FixedString?
-    INT96      => 'Int64',     # TODO!
-};
-
-sub columns ($json) {
-    my @list;
-    my %uniq;
-    for my $column (@{$json->{Columns}}) {
-        #warn Data::Dumper::Dumper $column;
-        my $name = $column->{'Name'};
-        my $type = $type_parquet_logical_to_clickhouse->{$column->{'LogicalType'}} || $type_parquet_physical_to_clickhouse->{$column->{'PhysicalType'}};
-        unless ($type) {
-            warn "Unknown type [$column->{'PhysicalType'}:$column->{'LogicalType'}] of column [$name]";
-        }
-        $type = "Nullable($type)";
-        $name .= $column->{'Id'} if $uniq{$name}++; # Names can be non-unique
-        push @list, {name => $name, type => $type};
-    }
-    print join ', ', map {"`$_->{name}` $_->{type}"} @list;
-}
-
-sub columns_file ($file) {
-    return columns(JSON::XS::decode_json(file_read($file)));
-}
-
-columns_file(shift) unless caller;
--- a/tests/queries/0_stateless/00900_parquet_load.reference
+++ b/tests/queries/0_stateless/00900_parquet_load.reference
@ -13,134 +13,220 @@
 === Try load data from alltypes_plain.snappy.parquet
 6	1	0	0	0	0	0	0	04/01/09	0	1238544000
 7	0	1	1	1	10	1.1	10.1	04/01/09	1	1238544060
+=== Try load data from binary.parquet
+\0
+
+
+
+
+
+
+
+\b
+\t
+\n
+
 === Try load data from byte_array_decimal.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from datapage_v2.snappy.parquet
 Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin

+=== Try load data from dict-page-offset-zero.parquet
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+=== Try load data from fixed_length_decimal.parquet
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from fixed_length_decimal_1.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from fixed_length_decimal_legacy.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
+=== Try load data from hadoop_lz4_compressed.parquet
+1593604800	abc	42
+1593604800	def	7.7
+1593604801	abc	42.125
+1593604801	def	7.7
 === Try load data from int32_decimal.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from int64_decimal.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
+=== Try load data from list_columns.parquet
+Code: 70. DB::Ex---tion: The type "list" of an input column "int64_list" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin
+
 === Try load data from nation.dict-malformed.parquet
 0	ALGERIA	0	 haggle. carefully final deposits detect slyly agai
 1	ARGENTINA	1	al foxes promise slyly according to the regular accounts. bold requests alon
@ -168,23 +254,25 @@ Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Not yet impl
 23	UNITED KINGDOM	3	eans boost carefully special requests. accounts are. carefull
 24	UNITED STATES	1	y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
 === Try load data from nested_lists.snappy.parquet
-Code: 8. DB::Ex---tion: Column "element" is not presented in input data: data for INSERT was parsed from stdin
+Code: 70. DB::Ex---tion: The type "list" of an input column "a" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin

 === Try load data from nested_maps.snappy.parquet
-Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: key_value: list<key_value: struct<key: string not null, value: struct<key_value: list<key_value: struct<key: int32 not null, value: bool not null> not null> not null>> not null> not null: data for INSERT was parsed from stdin
+Code: 70. DB::Ex---tion: The type "map" of an input column "a" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin

+=== Try load data from non_hadoop_lz4_compressed.parquet
+1593604800	abc	42
+1593604800	def	7.7
+1593604801	abc	42.125
+1593604801	def	7.7
 === Try load data from nonnullable.impala.parquet
-Code: 8. DB::Ex---tion: Column "element" is not presented in input data: data for INSERT was parsed from stdin
-
+../contrib/arrow/cpp/src/arrow/array/array_nested.cc:192:  Check failed: (self->list_type_->value_type()->id()) == (data->child_data[0]->type->id()) 
 === Try load data from nullable.impala.parquet
-Code: 8. DB::Ex---tion: Column "element" is not presented in input data: data for INSERT was parsed from stdin
-
+../contrib/arrow/cpp/src/arrow/array/array_nested.cc:192:  Check failed: (self->list_type_->value_type()->id()) == (data->child_data[0]->type->id()) 
 === Try load data from nulls.snappy.parquet
-Code: 8. DB::Ex---tion: Column "b_c_int" is not presented in input data: data for INSERT was parsed from stdin
-
-=== Try load data from repeated_no_annotation.parquet
-Code: 8. DB::Ex---tion: Column "number" is not presented in input data: data for INSERT was parsed from stdin
+Code: 70. DB::Ex---tion: The type "struct" of an input column "b_struct" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin

+=== Try load data from single_nan.parquet
+\N
 === Try load data from userdata1.parquet
 1454486129	1	Amanda	Jordan	ajordan0@com.com	Female	1.197.201.2	6759521864920116	Indonesia	3/8/1971	49756.53	Internal Auditor	1E+02
 1454519043	2	Albert	Freeman	afreeman1@is.gd	Male	218.111.175.34		Canada	1/16/1968	150280.17	Accountant IV	
--- a/tests/queries/0_stateless/00900_parquet_load.sh
+++ b/tests/queries/0_stateless/00900_parquet_load.sh
@ -5,8 +5,6 @@
 # TODO: Add more files.
 #

-# To regenerate data install perl JSON::XS module: sudo apt install libjson-xs-perl
-
 # Also 5 sample files from
 # wget https://github.com/Teradata/kylo/raw/master/samples/sample-data/parquet/userdata1.parquet
 # ...
@ -19,38 +17,46 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh

 CB_DIR=$(dirname "$CLICKHOUSE_CLIENT_BINARY")
-[ "$CB_DIR" == "." ] && ROOT_DIR=$CUR_DIR/../../../..
-[ "$CB_DIR" != "." ] && BUILD_DIR=$CB_DIR/../..
-[ -z "$ROOT_DIR" ] && ROOT_DIR=$CB_DIR/../../..
+[ "$CB_DIR" == "." ] && ROOT_DIR=$CUR_DIR/../../..
+[ -z "$ROOT_DIR" ] && ROOT_DIR=$CB_DIR/../..

 DATA_DIR=$CUR_DIR/data_parquet

+[ -n "$ROOT_DIR" ] && [ -z "$PARQUET_READER" ] && PARQUET_READER="$ROOT_DIR"/contrib/arrow/cpp/build/release/parquet-reader
+
 # To update:
 # cp $ROOT_DIR/contrib/arrow/cpp/submodules/parquet-testing/data/*.parquet $ROOT_DIR/contrib/arrow/python/pyarrow/tests/data/parquet/*.parquet $CUR_DIR/data_parquet/

-# BUG! nulls.snappy.parquet - parquet-reader shows wrong structure. Actual structure is {"type":"struct","fields":[{"name":"b_struct","type":{"type":"struct","fields":[{"name":"b_c_int","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]}
-# why? repeated_no_annotation.parquet
+# ClickHouse Parquet reader doesn't support such complex types, so I didn't burrow into the issue.
+# There is failure due parsing nested arrays or nested maps with NULLs:
+# ../contrib/arrow/cpp/src/arrow/array/array_nested.cc:192:  Check failed: (self->list_type_->value_type()->id()) == (data->child_data[0]->type->id())

-for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | sort); do
+# Strange behaviour for repeated_no_annotation.parquet around __buitin_expect, so this file was disabled:
+# debug:
+#   ../contrib/arrow/cpp/src/arrow/array/array_nested.cc:193:  Check failed: self->list_type_->value_type()->Equals(data->child_data[0]->type)
+# release:
+#   Code: 349. DB::Ex---tion: Can not insert NULL data into non-nullable column "phoneNumbers": data for INSERT was parsed from stdin
+
+for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | LC_ALL=C sort); do
    echo === Try load data from "$NAME"

    JSON=$DATA_DIR/$NAME.json
    COLUMNS_FILE=$DATA_DIR/$NAME.columns

    # If you want change or add .parquet file - rm data_parquet/*.json data_parquet/*.columns
-    [ -n "$BUILD_DIR" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$BUILD_DIR"/contrib/arrow-cmake/parquet-reader --json "$DATA_DIR"/"$NAME" > "$JSON"
-    [ -n "$BUILD_DIR" ] && [ ! -s "$COLUMNS_FILE" ] && "$CUR_DIR"/00900_parquet_create_table_columns.pl "$JSON" > "$COLUMNS_FILE"
+    [ -n "$PARQUET_READER" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$PARQUET_READER" --json "$DATA_DIR"/"$NAME" > "$JSON"
+    [ ! -s "$COLUMNS_FILE" ] && "$CUR_DIR"/helpers/00900_parquet_create_table_columns.py "$JSON" > "$COLUMNS_FILE"

    # Debug only:
-    # [ -n "$BUILD_DIR" ] && $BUILD_DIR/contrib/arrow-cmake/parquet-reader $DATA_DIR/$NAME > $DATA_DIR/$NAME.dump
+    # [ -n "$PARQUET_READER" ] && $PARQUET_READER $DATA_DIR/$NAME > $DATA_DIR/$NAME.dump

-    #COLUMNS=`$CUR_DIR/00900_parquet_create_table_columns.pl $JSON` 2>&1 || continue
+    # COLUMNS=`$CUR_DIR/00900_parquet_create_table_columns.py $JSON` 2>&1 || continue
    COLUMNS=$(cat "$COLUMNS_FILE") || continue

    ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
    ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load ($COLUMNS) ENGINE = Memory"

-    # Some files is broken, exception is ok.
+    # Some files contain unsupported data structures, exception is ok.
    cat "$DATA_DIR"/"$NAME" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/'

    ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load LIMIT 100"
--- a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh
+++ b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh
@ -103,8 +103,10 @@ done
 sleep 1

 counter=0
+have_undone_mutations_query="select * from system.mutations where table like 'concurrent_mutate_mt_%' and is_done=0 and database='${CLICKHOUSE_DATABASE}'"
+have_all_tables_query="select count() FROM system.tables WHERE name LIKE 'concurrent_mutate_mt_%' and database='${CLICKHOUSE_DATABASE}'"

-while [[ $($CLICKHOUSE_CLIENT --query "select * from system.mutations where table like 'concurrent_mutate_mt_%' and is_done=0" 2>&1) ]]; do
+while true ; do
    if [ "$counter" -gt 120 ]
    then
        break
@ -113,7 +115,13 @@ while [[ $($CLICKHOUSE_CLIENT --query "select * from system.mutations where tabl
    for i in $(seq $REPLICAS); do
        $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_mutate_mt_$i" 2> /dev/null
    done
+
    counter=$(($counter + 1))
+
+    # no active mutations and all tables attached
+    if [[ -z $($CLICKHOUSE_CLIENT --query "$have_undone_mutations_query" 2>&1) && $($CLICKHOUSE_CLIENT --query "$have_all_tables_query" 2>&1) == "$REPLICAS" ]]; then
+        break
+    fi
 done

 for i in $(seq $REPLICAS); do
--- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference
+++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference
@ -2,4 +2,6 @@ Instruction check fail. The CPU does not support SSSE3 instruction set.
 Instruction check fail. The CPU does not support SSE4.1 instruction set.
 Instruction check fail. The CPU does not support SSE4.2 instruction set.
 Instruction check fail. The CPU does not support POPCNT instruction set.
-MADV_DONTNEED does not zeroed page. jemalloc will be broken
+<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)
+<jemalloc>: (This is the expected behaviour if you are running under QEMU)
+1
--- a/tests/queries/0_stateless/01238_http_memory_tracking.sh
+++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh
@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)
 yes 'SELECT 1' 2>/dev/null | {
    head -n1000
 } | {
-    xargs -i ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_memory_usage_for_user=$((1<<30))" -d '{}'
+    xargs -I{} ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_memory_usage_for_user=$((1<<30))" -d '{}'
 } | grep -x -c 1

 wait
--- a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference
+++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference
@ -20,9 +20,7 @@ attempt to parse with input_format_allow_errors_ratio=0.3
 1	0
 2	0
 3	0
-4	0
 5	0
-6	0
 Return code: 0
 ******************
 attempt to parse with input_format_allow_errors_num=1
@ -34,7 +32,5 @@ attempt to parse with input_format_allow_errors_num=2
 1	0
 2	0
 3	0
-4	0
 5	0
-6	0
 Return code: 0
--- a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh
+++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh
@ -11,34 +11,34 @@ cat "$SAMPLE_FILE"

 echo '******************'
 echo 'attempt to parse w/o flags'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 expected_error_message='is not like Int64'
 cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong"

 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_ratio=0.1'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.1 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.1 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 expected_error_message='Already have 1 errors out of 5 rows, which is 0.2'
 cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong"

 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_ratio=0.3'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.3 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.3 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 cat "$STD_ERROR_CAPTURED"

 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_num=1'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=1 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=1 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 expected_error_message='Already have 2 errors out of 7 rows'
 cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong"

 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_num=2'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=2 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=2 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 cat "$STD_ERROR_CAPTURED"

--- a/tests/queries/0_stateless/01383_log_broken_table.sh
+++ b/tests/queries/0_stateless/01383_log_broken_table.sh
@ -5,7 +5,7 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 . "$CURDIR"/../shell_config.sh


-function test()
+function test_func()
 {
    ENGINE=$1
    MAX_MEM=4096
@ -32,9 +32,9 @@ function test()
    $CLICKHOUSE_CLIENT --query "DROP TABLE log";
 }

-test TinyLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
-test StripeLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
-test Log | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
+test_func TinyLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
+test_func StripeLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
+test_func Log | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'

 rm "${CLICKHOUSE_TMP}/insert_result"
 rm "${CLICKHOUSE_TMP}/select_result"
--- a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh
+++ b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh
@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)

 # Regression for UAF in ThreadPool.
 # (Triggered under TSAN)
-for i in {1..10}; do
+for _ in {1..10}; do
    ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null'
    # Binding to specific CPU is not required, but this makes the test more reliable.
    taskset --cpu-list 0 ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null'
--- a/Show More
+++ b/Show More