diff --git a/.github/workflows/anchore-analysis.yml b/.github/workflows/anchore-analysis.yml
index 01cd0e271c8..50eaf45e2ef 100644
--- a/.github/workflows/anchore-analysis.yml
+++ b/.github/workflows/anchore-analysis.yml
@@ -1,8 +1,8 @@
 # This workflow checks out code, performs an Anchore container image
 # vulnerability and compliance scan, and integrates the results with
-# GitHub Advanced Security code scanning feature.  For more information on
+# GitHub Advanced Security code scanning feature. For more information on
 # the Anchore scan action usage and parameters, see
-# https://github.com/anchore/scan-action.  For more information on
+# https://github.com/anchore/scan-action. For more information on
 # Anchore container image scanning in general, see
 # https://docs.anchore.com.
 
@@ -28,18 +28,12 @@ jobs:
         perl -pi -e 's|=\$version||g' Dockerfile
         docker build . --file Dockerfile --tag localbuild/testimage:latest      
     - name: Run the local Anchore scan action itself with GitHub Advanced Security code scanning integration enabled
-      uses: anchore/scan-action@master
+      uses: anchore/scan-action@v2
+      id: scan
       with:
-        image-reference: "localbuild/testimage:latest"
-        dockerfile-path: "docker/server/Dockerfile"
+        image: "localbuild/testimage:latest"
         acs-report-enable: true
-        fail-build: true
-    - name: Upload artifact
-      uses: actions/upload-artifact@v1.0.0
-      with:
-        name: AnchoreReports
-        path: ./anchore-reports/
     - name: Upload Anchore Scan Report
       uses: github/codeql-action/upload-sarif@v1
       with:
-        sarif_file: results.sarif
+        sarif_file: ${{ steps.scan.outputs.sarif }}
diff --git a/.gitmodules b/.gitmodules
index c7710529cd8..03cd3ef94ae 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -53,7 +53,8 @@
 	url = https://github.com/ClickHouse-Extras/Turbo-Base64.git
 [submodule "contrib/arrow"]
 	path = contrib/arrow
-	url = https://github.com/apache/arrow
+	url = https://github.com/ClickHouse-Extras/arrow
+	branch = clickhouse-arrow-2.0.0
 [submodule "contrib/thrift"]
 	path = contrib/thrift
 	url = https://github.com/apache/thrift.git
diff --git a/base/common/ReplxxLineReader.cpp b/base/common/ReplxxLineReader.cpp
index 111428447af..28c7990c353 100644
--- a/base/common/ReplxxLineReader.cpp
+++ b/base/common/ReplxxLineReader.cpp
@@ -6,6 +6,12 @@
 #include <unistd.h>
 #include <functional>
 #include <sys/file.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <csignal>
+#include <dlfcn.h>
+#include <fcntl.h>
+#include <fstream>
 
 namespace
 {
@@ -83,6 +89,8 @@ ReplxxLineReader::ReplxxLineReader(
     /// it also binded to M-p/M-n).
     rx.bind_key(Replxx::KEY::meta('N'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_NEXT, code); });
     rx.bind_key(Replxx::KEY::meta('P'), [this](char32_t code) { return rx.invoke(Replxx::ACTION::COMPLETE_PREVIOUS, code); });
+
+    rx.bind_key(Replxx::KEY::meta('E'), [this](char32_t) { openEditor(); return Replxx::ACTION_RESULT::CONTINUE; });
 }
 
 ReplxxLineReader::~ReplxxLineReader()
@@ -127,7 +135,114 @@ void ReplxxLineReader::addToHistory(const String & line)
         rx.print("Unlock of history file failed: %s\n", errnoToString(errno).c_str());
 }
 
+int ReplxxLineReader::execute(const std::string & command)
+{
+    std::vector<char> argv0("sh", &("sh"[3]));
+    std::vector<char> argv1("-c", &("-c"[3]));
+    std::vector<char> argv2(command.data(), command.data() + command.size() + 1);
+
+    const char * filename = "/bin/sh";
+    char * const argv[] = {argv0.data(), argv1.data(), argv2.data(), nullptr};
+
+    static void * real_vfork = dlsym(RTLD_DEFAULT, "vfork");
+    if (!real_vfork)
+    {
+        rx.print("Cannot find symbol vfork in myself: %s\n", errnoToString(errno).c_str());
+        return -1;
+    }
+
+    pid_t pid = reinterpret_cast<pid_t (*)()>(real_vfork)();
+
+    if (-1 == pid)
+    {
+        rx.print("Cannot vfork: %s\n", errnoToString(errno).c_str());
+        return -1;
+    }
+
+    if (0 == pid)
+    {
+        sigset_t mask;
+        sigemptyset(&mask);
+        sigprocmask(0, nullptr, &mask);
+        sigprocmask(SIG_UNBLOCK, &mask, nullptr);
+
+        execv(filename, argv);
+        _exit(-1);
+    }
+
+    int status = 0;
+    if (-1 == waitpid(pid, &status, 0))
+    {
+        rx.print("Cannot waitpid: %s\n", errnoToString(errno).c_str());
+        return -1;
+    }
+    return status;
+}
+
+void ReplxxLineReader::openEditor()
+{
+    char filename[] = "clickhouse_replxx_XXXXXX.sql";
+    int fd = ::mkstemps(filename, 4);
+    if (-1 == fd)
+    {
+        rx.print("Cannot create temporary file to edit query: %s\n", errnoToString(errno).c_str());
+        return;
+    }
+
+    String editor = std::getenv("EDITOR");
+    if (editor.empty())
+        editor = "vim";
+
+    replxx::Replxx::State state(rx.get_state());
+
+    size_t bytes_written = 0;
+    const char * begin = state.text();
+    size_t offset = strlen(state.text());
+    while (bytes_written != offset)
+    {
+        ssize_t res = ::write(fd, begin + bytes_written, offset - bytes_written);
+        if ((-1 == res || 0 == res) && errno != EINTR)
+        {
+            rx.print("Cannot write to temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+            return;
+        }
+        bytes_written += res;
+    }
+
+    if (0 != ::close(fd))
+    {
+        rx.print("Cannot close temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+        return;
+    }
+
+    if (0 == execute(editor + " " + filename))
+    {
+        try
+        {
+            std::ifstream t(filename);
+            std::string str;
+            t.seekg(0, std::ios::end);
+            str.reserve(t.tellg());
+            t.seekg(0, std::ios::beg);
+            str.assign((std::istreambuf_iterator<char>(t)), std::istreambuf_iterator<char>());
+            rx.set_state(replxx::Replxx::State(str.c_str(), str.size()));
+        }
+        catch (...)
+        {
+            rx.print("Cannot read from temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+            return;
+        }
+    }
+
+    if (bracketed_paste_enabled)
+        enableBracketedPaste();
+
+    if (0 != ::unlink(filename))
+        rx.print("Cannot remove temporary query file %s: %s\n", filename, errnoToString(errno).c_str());
+}
+
 void ReplxxLineReader::enableBracketedPaste()
 {
+    bracketed_paste_enabled = true;
     rx.enable_bracketed_paste();
 };
diff --git a/base/common/ReplxxLineReader.h b/base/common/ReplxxLineReader.h
index 1fbfd53457b..9aa32a1e26d 100644
--- a/base/common/ReplxxLineReader.h
+++ b/base/common/ReplxxLineReader.h
@@ -22,10 +22,13 @@ public:
 private:
     InputStatus readOneLine(const String & prompt) override;
     void addToHistory(const String & line) override;
+    int execute(const std::string & command);
+    void openEditor();
 
     replxx::Replxx rx;
     replxx::Replxx::highlighter_callback_t highlighter;
 
     // used to call flock() to synchronize multiple clients using same history file
     int history_file_fd = -1;
+    bool bracketed_paste_enabled = false;
 };
diff --git a/base/common/defines.h b/base/common/defines.h
index 6dc61155649..39df4698b88 100644
--- a/base/common/defines.h
+++ b/base/common/defines.h
@@ -61,6 +61,20 @@
 #    endif
 #endif
 
+#if defined(ADDRESS_SANITIZER)
+#    define BOOST_USE_ASAN 1
+#    define BOOST_USE_UCONTEXT 1
+#endif
+
+#if defined(THREAD_SANITIZER)
+#    define BOOST_USE_TSAN 1
+#    define BOOST_USE_UCONTEXT 1
+#endif
+
+#if defined(ARCADIA_BUILD) && defined(BOOST_USE_UCONTEXT)
+#    undef BOOST_USE_UCONTEXT
+#endif
+
 /// TODO: Strange enough, there is no way to detect UB sanitizer.
 
 /// Explicitly allow undefined behaviour for certain functions. Use it as a function attribute.
diff --git a/base/common/wide_integer_impl.h b/base/common/wide_integer_impl.h
index 9a80660ea85..a34e757eaa5 100644
--- a/base/common/wide_integer_impl.h
+++ b/base/common/wide_integer_impl.h
@@ -5,9 +5,11 @@
 /// (See at http://www.boost.org/LICENSE_1_0.txt)
 
 #include "throwError.h"
+#include <cmath>
 #include <cfloat>
-#include <limits>
 #include <cassert>
+#include <limits>
+
 
 namespace wide
 {
@@ -239,6 +241,14 @@ struct integer<Bits, Signed>::_impl
     template <class T>
     constexpr static void set_multiplier(integer<Bits, Signed> & self, T t) noexcept {
         constexpr uint64_t max_int = std::numeric_limits<uint64_t>::max();
+
+        /// Implementation specific behaviour on overflow (if we don't check here, stack overflow will triggered in bigint_cast).
+        if (!std::isfinite(t))
+        {
+            self = 0;
+            return;
+        }
+
         const T alpha = t / max_int;
 
         if (alpha <= max_int)
diff --git a/base/daemon/BaseDaemon.cpp b/base/daemon/BaseDaemon.cpp
index 331f9da56dd..f25bcdb91e1 100644
--- a/base/daemon/BaseDaemon.cpp
+++ b/base/daemon/BaseDaemon.cpp
@@ -4,6 +4,11 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/resource.h>
+#if defined(__linux__)
+    #include <sys/prctl.h>
+#endif
 #include <fcntl.h>
 #include <errno.h>
 #include <string.h>
@@ -12,7 +17,6 @@
 #include <unistd.h>
 
 #include <typeinfo>
-#include <sys/resource.h>
 #include <iostream>
 #include <fstream>
 #include <sstream>
@@ -22,7 +26,6 @@
 #include <Poco/Observer.h>
 #include <Poco/AutoPtr.h>
 #include <Poco/PatternFormatter.h>
-#include <Poco/TaskManager.h>
 #include <Poco/File.h>
 #include <Poco/Path.h>
 #include <Poco/Message.h>
@@ -470,7 +473,6 @@ BaseDaemon::~BaseDaemon()
 
 void BaseDaemon::terminate()
 {
-    getTaskManager().cancelAll();
     if (::raise(SIGTERM) != 0)
         throw Poco::SystemException("cannot terminate process");
 }
@@ -478,22 +480,11 @@ void BaseDaemon::terminate()
 void BaseDaemon::kill()
 {
     dumpCoverageReportIfPossible();
-    pid.reset();
+    pid_file.reset();
     if (::raise(SIGKILL) != 0)
         throw Poco::SystemException("cannot kill process");
 }
 
-void BaseDaemon::sleep(double seconds)
-{
-    wakeup_event.reset();
-    wakeup_event.tryWait(seconds * 1000);
-}
-
-void BaseDaemon::wakeup()
-{
-    wakeup_event.set();
-}
-
 std::string BaseDaemon::getDefaultCorePath() const
 {
     return "/opt/cores/";
@@ -564,7 +555,6 @@ void BaseDaemon::initialize(Application & self)
 {
     closeFDs();
 
-    task_manager = std::make_unique<Poco::TaskManager>();
     ServerApplication::initialize(self);
 
     /// now highest priority (lowest value) is PRIO_APPLICATION = -100, we want higher!
@@ -648,10 +638,6 @@ void BaseDaemon::initialize(Application & self)
             throw Poco::OpenFileException("Cannot attach stdout to " + stdout_path);
     }
 
-    /// Create pid file.
-    if (config().has("pid"))
-        pid.emplace(config().getString("pid"), DB::StatusFile::write_pid);
-
     /// Change path for logging.
     if (!log_path.empty())
     {
@@ -667,9 +653,17 @@ void BaseDaemon::initialize(Application & self)
             throw Poco::Exception("Cannot change directory to /tmp");
     }
 
-    // sensitive data masking rules are not used here
+    /// sensitive data masking rules are not used here
     buildLoggers(config(), logger(), self.commandName());
 
+    /// After initialized loggers but before initialized signal handling.
+    if (should_setup_watchdog)
+        setupWatchdog();
+
+    /// Create pid file.
+    if (config().has("pid"))
+        pid_file.emplace(config().getString("pid"), DB::StatusFile::write_pid);
+
     if (is_daemon)
     {
         /** Change working directory to the directory to write core dumps.
@@ -704,54 +698,71 @@ void BaseDaemon::initialize(Application & self)
 }
 
 
+static void addSignalHandler(const std::vector<int> & signals, signal_function handler, std::vector<int> * out_handled_signals)
+{
+    struct sigaction sa;
+    memset(&sa, 0, sizeof(sa));
+    sa.sa_sigaction = handler;
+    sa.sa_flags = SA_SIGINFO;
+
+#if defined(OS_DARWIN)
+    sigemptyset(&sa.sa_mask);
+    for (auto signal : signals)
+        sigaddset(&sa.sa_mask, signal);
+#else
+    if (sigemptyset(&sa.sa_mask))
+        throw Poco::Exception("Cannot set signal handler.");
+
+    for (auto signal : signals)
+        if (sigaddset(&sa.sa_mask, signal))
+            throw Poco::Exception("Cannot set signal handler.");
+#endif
+
+    for (auto signal : signals)
+        if (sigaction(signal, &sa, nullptr))
+            throw Poco::Exception("Cannot set signal handler.");
+
+    if (out_handled_signals)
+        std::copy(signals.begin(), signals.end(), std::back_inserter(*out_handled_signals));
+};
+
+
+static void blockSignals(const std::vector<int> & signals)
+{
+    sigset_t sig_set;
+
+#if defined(OS_DARWIN)
+    sigemptyset(&sig_set);
+    for (auto signal : signals)
+        sigaddset(&sig_set, signal);
+#else
+    if (sigemptyset(&sig_set))
+        throw Poco::Exception("Cannot block signal.");
+
+    for (auto signal : signals)
+        if (sigaddset(&sig_set, signal))
+            throw Poco::Exception("Cannot block signal.");
+#endif
+
+    if (pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
+        throw Poco::Exception("Cannot block signal.");
+};
+
+
 void BaseDaemon::initializeTerminationAndSignalProcessing()
 {
     SentryWriter::initialize(config());
     std::set_terminate(terminate_handler);
 
     /// We want to avoid SIGPIPE when working with sockets and pipes, and just handle return value/errno instead.
-    {
-        sigset_t sig_set;
-        if (sigemptyset(&sig_set) || sigaddset(&sig_set, SIGPIPE) || pthread_sigmask(SIG_BLOCK, &sig_set, nullptr))
-            throw Poco::Exception("Cannot block signal.");
-    }
+    blockSignals({SIGPIPE});
 
     /// Setup signal handlers.
-    auto add_signal_handler =
-        [this](const std::vector<int> & signals, signal_function handler)
-        {
-            struct sigaction sa;
-            memset(&sa, 0, sizeof(sa));
-            sa.sa_sigaction = handler;
-            sa.sa_flags = SA_SIGINFO;
-
-            {
-#if defined(OS_DARWIN)
-                sigemptyset(&sa.sa_mask);
-                for (auto signal : signals)
-                    sigaddset(&sa.sa_mask, signal);
-#else
-                if (sigemptyset(&sa.sa_mask))
-                    throw Poco::Exception("Cannot set signal handler.");
-
-                for (auto signal : signals)
-                    if (sigaddset(&sa.sa_mask, signal))
-                        throw Poco::Exception("Cannot set signal handler.");
-#endif
-
-                for (auto signal : signals)
-                    if (sigaction(signal, &sa, nullptr))
-                        throw Poco::Exception("Cannot set signal handler.");
-
-                std::copy(signals.begin(), signals.end(), std::back_inserter(handled_signals));
-            }
-        };
-
     /// SIGTSTP is added for debugging purposes. To output a stack trace of any running thread at anytime.
 
-    add_signal_handler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler);
-    add_signal_handler({SIGHUP, SIGUSR1}, closeLogsSignalHandler);
-    add_signal_handler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler);
+    addSignalHandler({SIGABRT, SIGSEGV, SIGILL, SIGBUS, SIGSYS, SIGFPE, SIGPIPE, SIGTSTP}, signalHandler, &handled_signals);
+    addSignalHandler({SIGHUP, SIGUSR1}, closeLogsSignalHandler, &handled_signals);
+    addSignalHandler({SIGINT, SIGQUIT, SIGTERM}, terminateRequestedSignalHandler, &handled_signals);
 
 #if defined(SANITIZER)
     __sanitizer_set_death_callback(sanitizerDeathCallback);
@@ -786,23 +797,6 @@ void BaseDaemon::logRevision() const
         + ", PID " + std::to_string(getpid()));
 }
 
-/// Makes server shutdown if at least one Poco::Task have failed.
-void BaseDaemon::exitOnTaskError()
-{
-    Poco::Observer<BaseDaemon, Poco::TaskFailedNotification> obs(*this, &BaseDaemon::handleNotification);
-    getTaskManager().addObserver(obs);
-}
-
-/// Used for exitOnTaskError()
-void BaseDaemon::handleNotification(Poco::TaskFailedNotification *_tfn)
-{
-    task_failed = true;
-    Poco::AutoPtr<Poco::TaskFailedNotification> fn(_tfn);
-    Poco::Logger * lg = &(logger());
-    LOG_ERROR(lg, "Task '{}' failed. Daemon is shutting down. Reason - {}", fn->task()->name(), fn->reason().displayText());
-    ServerApplication::terminate();
-}
-
 void BaseDaemon::defineOptions(Poco::Util::OptionSet & new_options)
 {
     new_options.addOption(
@@ -863,13 +857,144 @@ void BaseDaemon::onInterruptSignals(int signal_id)
     if (sigint_signals_counter >= 2)
     {
         LOG_INFO(&logger(), "Received second signal Interrupt. Immediately terminate.");
-        kill();
+        call_default_signal_handler(signal_id);
+        /// If the above did not help.
+        _exit(128 + signal_id);
     }
 }
 
 
 void BaseDaemon::waitForTerminationRequest()
 {
+    /// NOTE: as we already process signals via pipe, we don't have to block them with sigprocmask in threads
     std::unique_lock<std::mutex> lock(signal_handler_mutex);
     signal_event.wait(lock, [this](){ return terminate_signals_counter > 0; });
 }
+
+
+void BaseDaemon::shouldSetupWatchdog(char * argv0_)
+{
+    should_setup_watchdog = true;
+    argv0 = argv0_;
+}
+
+
+void BaseDaemon::setupWatchdog()
+{
+    /// Initialize in advance to avoid double initialization in forked processes.
+    DateLUT::instance();
+
+    std::string original_process_name;
+    if (argv0)
+        original_process_name = argv0;
+
+    while (true)
+    {
+        static pid_t pid = -1;
+        pid = fork();
+
+        if (-1 == pid)
+            throw Poco::Exception("Cannot fork");
+
+        if (0 == pid)
+        {
+            logger().information("Forked a child process to watch");
+#if defined(__linux__)
+            if (0 != prctl(PR_SET_PDEATHSIG, SIGKILL))
+                logger().warning("Cannot do prctl to ask termination with parent.");
+#endif
+            return;
+        }
+
+        /// Change short thread name and process name.
+        setThreadName("clckhouse-watch");   /// 15 characters
+
+        if (argv0)
+        {
+            const char * new_process_name = "clickhouse-watchdog";
+            memset(argv0, 0, original_process_name.size());
+            memcpy(argv0, new_process_name, std::min(strlen(new_process_name), original_process_name.size()));
+        }
+
+        logger().information(fmt::format("Will watch for the process with pid {}", pid));
+
+        /// Forward signals to the child process.
+        addSignalHandler(
+            {SIGHUP, SIGUSR1, SIGINT, SIGQUIT, SIGTERM},
+            [](int sig, siginfo_t *, void *)
+            {
+                /// Forward all signals except INT as it can be send by terminal to the process group when user press Ctrl+C,
+                /// and we process double delivery of this signal as immediate termination.
+                if (sig == SIGINT)
+                    return;
+
+                const char * error_message = "Cannot forward signal to the child process.\n";
+                if (0 != ::kill(pid, sig))
+                {
+                    auto res = write(STDERR_FILENO, error_message, strlen(error_message));
+                    (void)res;
+                }
+            },
+            nullptr);
+
+        int status = 0;
+        do
+        {
+            if (-1 != waitpid(pid, &status, WUNTRACED | WCONTINUED) || errno == ECHILD)
+            {
+                if (WIFSTOPPED(status))
+                    logger().warning(fmt::format("Child process was stopped by signal {}.", WSTOPSIG(status)));
+                else if (WIFCONTINUED(status))
+                    logger().warning(fmt::format("Child process was continued."));
+                else
+                    break;
+            }
+            else if (errno != EINTR)
+                throw Poco::Exception("Cannot waitpid, errno: " + std::string(strerror(errno)));
+        } while (true);
+
+        if (errno == ECHILD)
+        {
+            logger().information("Child process no longer exists.");
+            _exit(status);
+        }
+
+        if (WIFEXITED(status))
+        {
+            logger().information(fmt::format("Child process exited normally with code {}.", WEXITSTATUS(status)));
+            _exit(status);
+        }
+
+        if (WIFSIGNALED(status))
+        {
+            int sig = WTERMSIG(status);
+
+            if (sig == SIGKILL)
+            {
+                logger().fatal(fmt::format("Child process was terminated by signal {} (KILL)."
+                    " If it is not done by 'forcestop' command or manually,"
+                    " the possible cause is OOM Killer (see 'dmesg' and look at the '/var/log/kern.log' for the details).", sig));
+            }
+            else
+            {
+                logger().fatal(fmt::format("Child process was terminated by signal {}.", sig));
+
+                if (sig == SIGINT || sig == SIGTERM || sig == SIGQUIT)
+                    _exit(status);
+            }
+        }
+        else
+        {
+            logger().fatal("Child process was not exited normally by unknown reason.");
+        }
+
+        /// Automatic restart is not enabled but you can play with it.
+#if 1
+        _exit(status);
+#else
+        logger().information("Will restart.");
+        if (argv0)
+            memcpy(argv0, original_process_name.c_str(), original_process_name.size());
+#endif
+    }
+}
diff --git a/base/daemon/BaseDaemon.h b/base/daemon/BaseDaemon.h
index f4d3f3dfe98..090d4997606 100644
--- a/base/daemon/BaseDaemon.h
+++ b/base/daemon/BaseDaemon.h
@@ -12,7 +12,6 @@
 #include <chrono>
 #include <Poco/Process.h>
 #include <Poco/ThreadPool.h>
-#include <Poco/TaskNotification.h>
 #include <Poco/Util/Application.h>
 #include <Poco/Util/ServerApplication.h>
 #include <Poco/Net/SocketAddress.h>
@@ -26,9 +25,6 @@
 #include <loggers/Loggers.h>
 
 
-namespace Poco { class TaskManager; }
-
-
 /// \brief Base class for applications that can run as daemons.
 ///
 /// \code
@@ -52,31 +48,26 @@ public:
     BaseDaemon();
     ~BaseDaemon() override;
 
-    /// Загружает конфигурацию и "строит" логгеры на запись в файлы
+    /// Load configuration, prepare loggers, etc.
     void initialize(Poco::Util::Application &) override;
 
-    /// Читает конфигурацию
     void reloadConfiguration();
 
-    /// Определяет параметр командной строки
+    /// Process command line parameters
     void defineOptions(Poco::Util::OptionSet & new_options) override;
 
-    /// Заставляет демон завершаться, если хотя бы одна задача завершилась неудачно
-    void exitOnTaskError();
+    /// Graceful shutdown
+    static void terminate();
 
-    /// Завершение демона ("мягкое")
-    void terminate();
-
-    /// Завершение демона ("жёсткое")
+    /// Forceful shutdown
     void kill();
 
-    /// Получен ли сигнал на завершение?
+    /// Cancellation request has been received.
     bool isCancelled() const
     {
         return is_cancelled;
     }
 
-    /// Получение ссылки на экземпляр демона
     static BaseDaemon & instance()
     {
         return dynamic_cast<BaseDaemon &>(Poco::Util::Application::instance());
@@ -85,12 +76,6 @@ public:
     /// return none if daemon doesn't exist, reference to the daemon otherwise
     static std::optional<std::reference_wrapper<BaseDaemon>> tryGetInstance() { return tryGetInstance<BaseDaemon>(); }
 
-    /// Спит заданное количество секунд или до события wakeup
-    void sleep(double seconds);
-
-    /// Разбудить
-    void wakeup();
-
     /// В Graphite компоненты пути(папки) разделяются точкой.
     /// У нас принят путь формата root_path.hostname_yandex_ru.key
     /// root_path по умолчанию one_min
@@ -131,24 +116,23 @@ public:
     /// also doesn't close global internal pipes for signal handling
     static void closeFDs();
 
+    /// If this method is called after initialization and before run,
+    /// will fork child process and setup watchdog that will print diagnostic info, if the child terminates.
+    /// argv0 is needed to change process name (consequently, it is needed for scripts involving "pgrep", "pidof" to work correctly).
+    void shouldSetupWatchdog(char * argv0_);
+
 protected:
-    /// Возвращает TaskManager приложения
-    /// все методы task_manager следует вызывать из одного потока
-    /// иначе возможен deadlock, т.к. joinAll выполняется под локом, а любой метод тоже берет лок
-    Poco::TaskManager & getTaskManager() { return *task_manager; }
-
     virtual void logRevision() const;
 
-    /// Используется при exitOnTaskError()
-    void handleNotification(Poco::TaskFailedNotification *);
-
     /// thread safe
     virtual void handleSignal(int signal_id);
 
     /// initialize termination process and signal handlers
     virtual void initializeTerminationAndSignalProcessing();
 
-    /// реализация обработки сигналов завершения через pipe не требует блокировки сигнала с помощью sigprocmask во всех потоках
+    /// fork the main process and watch if it was killed
+    void setupWatchdog();
+
     void waitForTerminationRequest()
 #if defined(POCO_CLICKHOUSE_PATCH) || POCO_VERSION >= 0x02000000 // in old upstream poco not vitrual
     override
@@ -162,21 +146,13 @@ protected:
 
     virtual std::string getDefaultCorePath() const;
 
-    std::unique_ptr<Poco::TaskManager> task_manager;
-
-    std::optional<DB::StatusFile> pid;
+    std::optional<DB::StatusFile> pid_file;
 
     std::atomic_bool is_cancelled{false};
 
-    /// Флаг устанавливается по сообщению из Task (при аварийном завершении).
-    bool task_failed = false;
-
     bool log_to_console = false;
 
-    /// Событие, чтобы проснуться во время ожидания
-    Poco::Event wakeup_event;
-
-    /// Поток, в котором принимается сигнал HUP/USR1 для закрытия логов.
+    /// A thread that acts on HUP and USR1 signal (close logs).
     Poco::Thread signal_listener_thread;
     std::unique_ptr<Poco::Runnable> signal_listener;
 
@@ -194,6 +170,9 @@ protected:
     String build_id_info;
 
     std::vector<int> handled_signals;
+
+    bool should_setup_watchdog = false;
+    char * argv0 = nullptr;
 };
 
 
diff --git a/base/glibc-compatibility/musl/__polevll.c b/base/glibc-compatibility/musl/__polevll.c
new file mode 100644
index 00000000000..ce1a84046b8
--- /dev/null
+++ b/base/glibc-compatibility/musl/__polevll.c
@@ -0,0 +1,93 @@
+/* origin: OpenBSD /usr/src/lib/libm/src/polevll.c */
+/*
+ * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ *      Evaluate polynomial
+ *
+ *
+ * SYNOPSIS:
+ *
+ * int N;
+ * long double x, y, coef[N+1], polevl[];
+ *
+ * y = polevll( x, coef, N );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Evaluates polynomial of degree N:
+ *
+ *                     2          N
+ * y  =  C  + C x + C x  +...+ C x
+ *        0    1     2          N
+ *
+ * Coefficients are stored in reverse order:
+ *
+ * coef[0] = C  , ..., coef[N] = C  .
+ *            N                   0
+ *
+ *  The function p1evll() assumes that coef[N] = 1.0 and is
+ * omitted from the array.  Its calling arguments are
+ * otherwise the same as polevll().
+ *
+ *
+ * SPEED:
+ *
+ * In the interest of speed, there are no checks for out
+ * of bounds arithmetic.  This routine is used by most of
+ * the functions in the library.  Depending on available
+ * equipment features, the user may wish to rewrite the
+ * program in microcode or assembly language.
+ *
+ */
+
+#include "libm.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+#else
+/*
+ * Polynomial evaluator:
+ *  P[0] x^n  +  P[1] x^(n-1)  +  ...  +  P[n]
+ */
+long double __polevll(long double x, const long double *P, int n)
+{
+	long double y;
+
+	y = *P++;
+	do {
+		y = y * x + *P++;
+	} while (--n);
+
+	return y;
+}
+
+/*
+ * Polynomial evaluator:
+ *  x^n  +  P[0] x^(n-1)  +  P[1] x^(n-2)  +  ...  +  P[n]
+ */
+long double __p1evll(long double x, const long double *P, int n)
+{
+	long double y;
+
+	n -= 1;
+	y = x + *P++;
+	do {
+		y = y * x + *P++;
+	} while (--n);
+
+	return y;
+}
+#endif
diff --git a/base/glibc-compatibility/musl/mkstemps.c b/base/glibc-compatibility/musl/mkstemps.c
new file mode 100644
index 00000000000..2a0cbb7fc42
--- /dev/null
+++ b/base/glibc-compatibility/musl/mkstemps.c
@@ -0,0 +1,44 @@
+#include <errno.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+/* This assumes that a check for the
+   template size has already been made */
+static char * __randname(char * template)
+{
+    int i;
+    struct timespec ts;
+    unsigned long r;
+
+    clock_gettime(CLOCK_REALTIME, &ts);
+    r = (ts.tv_nsec * 65537) ^ ((((intptr_t)(&ts)) / 16) + ((intptr_t)template));
+    for (i = 0; i < 6; i++, r >>= 5)
+        template[i] = 'A' + (r & 15) + (r & 16) * 2;
+
+    return template;
+}
+
+int mkstemps(char * template, int len)
+{
+    size_t l = strlen(template);
+    if (l < 6 || len > l - 6 || memcmp(template + l - len - 6, "XXXXXX", 6))
+    {
+        errno = EINVAL;
+        return -1;
+    }
+
+    int fd, retries = 100;
+    do
+    {
+        __randname(template + l - len - 6);
+        if ((fd = open(template, O_RDWR | O_CREAT | O_EXCL, 0600)) >= 0)
+            return fd;
+    } while (--retries && errno == EEXIST);
+
+    memcpy(template + l - len - 6, "XXXXXX", 6);
+    return -1;
+}
diff --git a/base/glibc-compatibility/musl/powf.c b/base/glibc-compatibility/musl/powf.c
new file mode 100644
index 00000000000..de8fab54554
--- /dev/null
+++ b/base/glibc-compatibility/musl/powf.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include <math.h>
+#include <stdint.h>
+#include "libm.h"
+#include "exp2f_data.h"
+#include "powf_data.h"
+
+/*
+POWF_LOG2_POLY_ORDER = 5
+EXP2F_TABLE_BITS = 5
+
+ULP error: 0.82 (~ 0.5 + relerr*2^24)
+relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
+relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
+relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
+*/
+
+#define N (1 << POWF_LOG2_TABLE_BITS)
+#define T __powf_log2_data.tab
+#define A __powf_log2_data.poly
+#define OFF 0x3f330000
+
+/* Subnormal input is normalized so ix has negative biased exponent.
+   Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set.  */
+static inline double_t log2_inline(uint32_t ix)
+{
+	double_t z, r, r2, r4, p, q, y, y0, invc, logc;
+	uint32_t iz, top, tmp;
+	int k, i;
+
+	/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
+	   The range is split into N subintervals.
+	   The ith subinterval contains z and c is near its center.  */
+	tmp = ix - OFF;
+	i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
+	top = tmp & 0xff800000;
+	iz = ix - top;
+	k = (int32_t)top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
+	invc = T[i].invc;
+	logc = T[i].logc;
+	z = (double_t)asfloat(iz);
+
+	/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
+	r = z * invc - 1;
+	y0 = logc + (double_t)k;
+
+	/* Pipelined polynomial evaluation to approximate log1p(r)/ln2.  */
+	r2 = r * r;
+	y = A[0] * r + A[1];
+	p = A[2] * r + A[3];
+	r4 = r2 * r2;
+	q = A[4] * r + y0;
+	q = p * r2 + q;
+	y = y * r4 + q;
+	return y;
+}
+
+#undef N
+#undef T
+#define N (1 << EXP2F_TABLE_BITS)
+#define T __exp2f_data.tab
+#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))
+
+/* The output of log2 and thus the input of exp2 is either scaled by N
+   (in case of fast toint intrinsics) or not.  The unscaled xd must be
+   in [-1021,1023], sign_bias sets the sign of the result.  */
+static inline float exp2_inline(double_t xd, uint32_t sign_bias)
+{
+	uint64_t ki, ski, t;
+	double_t kd, z, r, r2, y, s;
+
+#if TOINT_INTRINSICS
+#define C __exp2f_data.poly_scaled
+	/* N*x = k + r with r in [-1/2, 1/2] */
+	kd = roundtoint(xd); /* k */
+	ki = converttoint(xd);
+#else
+#define C __exp2f_data.poly
+#define SHIFT __exp2f_data.shift_scaled
+	/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
+	kd = eval_as_double(xd + SHIFT);
+	ki = asuint64(kd);
+	kd -= SHIFT; /* k/N */
+#endif
+	r = xd - kd;
+
+	/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
+	t = T[ki % N];
+	ski = ki + sign_bias;
+	t += ski << (52 - EXP2F_TABLE_BITS);
+	s = asdouble(t);
+	z = C[0] * r + C[1];
+	r2 = r * r;
+	y = C[2] * r + 1;
+	y = z * r2 + y;
+	y = y * s;
+	return eval_as_float(y);
+}
+
+/* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
+   the bit representation of a non-zero finite floating-point value.  */
+static inline int checkint(uint32_t iy)
+{
+	int e = iy >> 23 & 0xff;
+	if (e < 0x7f)
+		return 0;
+	if (e > 0x7f + 23)
+		return 2;
+	if (iy & ((1 << (0x7f + 23 - e)) - 1))
+		return 0;
+	if (iy & (1 << (0x7f + 23 - e)))
+		return 1;
+	return 2;
+}
+
+static inline int zeroinfnan(uint32_t ix)
+{
+	return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
+}
+
+float powf(float x, float y)
+{
+	uint32_t sign_bias = 0;
+	uint32_t ix, iy;
+
+	ix = asuint(x);
+	iy = asuint(y);
+	if (predict_false(ix - 0x00800000 >= 0x7f800000 - 0x00800000 ||
+			  zeroinfnan(iy))) {
+		/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
+		if (predict_false(zeroinfnan(iy))) {
+			if (2 * iy == 0)
+				return issignalingf_inline(x) ? x + y : 1.0f;
+			if (ix == 0x3f800000)
+				return issignalingf_inline(y) ? x + y : 1.0f;
+			if (2 * ix > 2u * 0x7f800000 ||
+			    2 * iy > 2u * 0x7f800000)
+				return x + y;
+			if (2 * ix == 2 * 0x3f800000)
+				return 1.0f;
+			if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
+				return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
+			return y * y;
+		}
+		if (predict_false(zeroinfnan(ix))) {
+			float_t x2 = x * x;
+			if (ix & 0x80000000 && checkint(iy) == 1)
+				x2 = -x2;
+			/* Without the barrier some versions of clang hoist the 1/x2 and
+			   thus division by zero exception can be signaled spuriously.  */
+			return iy & 0x80000000 ? fp_barrierf(1 / x2) : x2;
+		}
+		/* x and y are non-zero finite.  */
+		if (ix & 0x80000000) {
+			/* Finite x < 0.  */
+			int yint = checkint(iy);
+			if (yint == 0)
+				return __math_invalidf(x);
+			if (yint == 1)
+				sign_bias = SIGN_BIAS;
+			ix &= 0x7fffffff;
+		}
+		if (ix < 0x00800000) {
+			/* Normalize subnormal x so exponent becomes negative.  */
+			ix = asuint(x * 0x1p23f);
+			ix &= 0x7fffffff;
+			ix -= 23 << 23;
+		}
+	}
+	double_t logx = log2_inline(ix);
+	double_t ylogx = y * logx; /* cannot overflow, y is single prec.  */
+	if (predict_false((asuint64(ylogx) >> 47 & 0xffff) >=
+			  asuint64(126.0 * POWF_SCALE) >> 47)) {
+		/* |y*log(x)| >= 126.  */
+		if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
+			return __math_oflowf(sign_bias);
+		if (ylogx <= -150.0 * POWF_SCALE)
+			return __math_uflowf(sign_bias);
+	}
+	return exp2_inline(ylogx, sign_bias);
+}
diff --git a/base/glibc-compatibility/musl/powf_data.c b/base/glibc-compatibility/musl/powf_data.c
new file mode 100644
index 00000000000..13e1d9a06a9
--- /dev/null
+++ b/base/glibc-compatibility/musl/powf_data.c
@@ -0,0 +1,34 @@
+/*
+ * Data definition for powf.
+ *
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "powf_data.h"
+
+const struct powf_log2_data __powf_log2_data = {
+  .tab = {
+  { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
+  { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
+  { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
+  { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
+  { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
+  { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
+  { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
+  { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
+  { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
+  { 0x1p+0, 0x0p+0 * POWF_SCALE },
+  { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
+  { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
+  { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
+  { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
+  { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
+  { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
+  },
+  .poly = {
+  0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
+  0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
+  0x1.71547652ab82bp0 * POWF_SCALE,
+  }
+};
diff --git a/base/glibc-compatibility/musl/powf_data.h b/base/glibc-compatibility/musl/powf_data.h
new file mode 100644
index 00000000000..5b136e28374
--- /dev/null
+++ b/base/glibc-compatibility/musl/powf_data.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017-2018, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+#ifndef _POWF_DATA_H
+#define _POWF_DATA_H
+
+#include "libm.h"
+#include "exp2f_data.h"
+
+#define POWF_LOG2_TABLE_BITS 4
+#define POWF_LOG2_POLY_ORDER 5
+#if TOINT_INTRINSICS
+#define POWF_SCALE_BITS EXP2F_TABLE_BITS
+#else
+#define POWF_SCALE_BITS 0
+#endif
+#define POWF_SCALE ((double)(1 << POWF_SCALE_BITS))
+extern hidden const struct powf_log2_data {
+	struct {
+		double invc, logc;
+	} tab[1 << POWF_LOG2_TABLE_BITS];
+	double poly[POWF_LOG2_POLY_ORDER];
+} __powf_log2_data;
+
+#endif
diff --git a/base/glibc-compatibility/musl/powl.c b/base/glibc-compatibility/musl/powl.c
new file mode 100644
index 00000000000..508c65c9c7c
--- /dev/null
+++ b/base/glibc-compatibility/musl/powl.c
@@ -0,0 +1,525 @@
+/* origin: OpenBSD /usr/src/lib/libm/src/ld80/e_powl.c */
+/*
+ * Copyright (c) 2008 Stephen L. Moshier <steve@moshier.net>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*                                                      powl.c
+ *
+ *      Power function, long double precision
+ *
+ *
+ * SYNOPSIS:
+ *
+ * long double x, y, z, powl();
+ *
+ * z = powl( x, y );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Computes x raised to the yth power.  Analytically,
+ *
+ *      x**y  =  exp( y log(x) ).
+ *
+ * Following Cody and Waite, this program uses a lookup table
+ * of 2**-i/32 and pseudo extended precision arithmetic to
+ * obtain several extra bits of accuracy in both the logarithm
+ * and the exponential.
+ *
+ *
+ * ACCURACY:
+ *
+ * The relative error of pow(x,y) can be estimated
+ * by   y dl ln(2),   where dl is the absolute error of
+ * the internally computed base 2 logarithm.  At the ends
+ * of the approximation interval the logarithm equal 1/32
+ * and its relative error is about 1 lsb = 1.1e-19.  Hence
+ * the predicted relative error in the result is 2.3e-21 y .
+ *
+ *                      Relative error:
+ * arithmetic   domain     # trials      peak         rms
+ *
+ *    IEEE     +-1000       40000      2.8e-18      3.7e-19
+ * .001 < x < 1000, with log(x) uniformly distributed.
+ * -1000 < y < 1000, y uniformly distributed.
+ *
+ *    IEEE     0,8700       60000      6.5e-18      1.0e-18
+ * 0.99 < x < 1.01, 0 < y < 8700, uniformly distributed.
+ *
+ *
+ * ERROR MESSAGES:
+ *
+ *   message         condition      value returned
+ * pow overflow     x**y > MAXNUM      INFINITY
+ * pow underflow   x**y < 1/MAXNUM       0.0
+ * pow domain      x<0 and y noninteger  0.0
+ *
+ */
+
+#include "libm.h"
+
+#if LDBL_MANT_DIG == 53 && LDBL_MAX_EXP == 1024
+long double powl(long double x, long double y)
+{
+	return pow(x, y);
+}
+#elif LDBL_MANT_DIG == 64 && LDBL_MAX_EXP == 16384
+
+/* Table size */
+#define NXT 32
+
+/* log(1+x) =  x - .5x^2 + x^3 *  P(z)/Q(z)
+ * on the domain  2^(-1/32) - 1  <=  x  <=  2^(1/32) - 1
+ */
+static const long double P[] = {
+ 8.3319510773868690346226E-4L,
+ 4.9000050881978028599627E-1L,
+ 1.7500123722550302671919E0L,
+ 1.4000100839971580279335E0L,
+};
+static const long double Q[] = {
+/* 1.0000000000000000000000E0L,*/
+ 5.2500282295834889175431E0L,
+ 8.4000598057587009834666E0L,
+ 4.2000302519914740834728E0L,
+};
+/* A[i] = 2^(-i/32), rounded to IEEE long double precision.
+ * If i is even, A[i] + B[i/2] gives additional accuracy.
+ */
+static const long double A[33] = {
+ 1.0000000000000000000000E0L,
+ 9.7857206208770013448287E-1L,
+ 9.5760328069857364691013E-1L,
+ 9.3708381705514995065011E-1L,
+ 9.1700404320467123175367E-1L,
+ 8.9735453750155359320742E-1L,
+ 8.7812608018664974155474E-1L,
+ 8.5930964906123895780165E-1L,
+ 8.4089641525371454301892E-1L,
+ 8.2287773907698242225554E-1L,
+ 8.0524516597462715409607E-1L,
+ 7.8799042255394324325455E-1L,
+ 7.7110541270397041179298E-1L,
+ 7.5458221379671136985669E-1L,
+ 7.3841307296974965571198E-1L,
+ 7.2259040348852331001267E-1L,
+ 7.0710678118654752438189E-1L,
+ 6.9195494098191597746178E-1L,
+ 6.7712777346844636413344E-1L,
+ 6.6261832157987064729696E-1L,
+ 6.4841977732550483296079E-1L,
+ 6.3452547859586661129850E-1L,
+ 6.2092890603674202431705E-1L,
+ 6.0762367999023443907803E-1L,
+ 5.9460355750136053334378E-1L,
+ 5.8186242938878875689693E-1L,
+ 5.6939431737834582684856E-1L,
+ 5.5719337129794626814472E-1L,
+ 5.4525386633262882960438E-1L,
+ 5.3357020033841180906486E-1L,
+ 5.2213689121370692017331E-1L,
+ 5.1094857432705833910408E-1L,
+ 5.0000000000000000000000E-1L,
+};
+static const long double B[17] = {
+ 0.0000000000000000000000E0L,
+ 2.6176170809902549338711E-20L,
+-1.0126791927256478897086E-20L,
+ 1.3438228172316276937655E-21L,
+ 1.2207982955417546912101E-20L,
+-6.3084814358060867200133E-21L,
+ 1.3164426894366316434230E-20L,
+-1.8527916071632873716786E-20L,
+ 1.8950325588932570796551E-20L,
+ 1.5564775779538780478155E-20L,
+ 6.0859793637556860974380E-21L,
+-2.0208749253662532228949E-20L,
+ 1.4966292219224761844552E-20L,
+ 3.3540909728056476875639E-21L,
+-8.6987564101742849540743E-22L,
+-1.2327176863327626135542E-20L,
+ 0.0000000000000000000000E0L,
+};
+
+/* 2^x = 1 + x P(x),
+ * on the interval -1/32 <= x <= 0
+ */
+static const long double R[] = {
+ 1.5089970579127659901157E-5L,
+ 1.5402715328927013076125E-4L,
+ 1.3333556028915671091390E-3L,
+ 9.6181291046036762031786E-3L,
+ 5.5504108664798463044015E-2L,
+ 2.4022650695910062854352E-1L,
+ 6.9314718055994530931447E-1L,
+};
+
+#define MEXP (NXT*16384.0L)
+/* The following if denormal numbers are supported, else -MEXP: */
+#define MNEXP (-NXT*(16384.0L+64.0L))
+/* log2(e) - 1 */
+#define LOG2EA 0.44269504088896340735992L
+
+#define F W
+#define Fa Wa
+#define Fb Wb
+#define G W
+#define Ga Wa
+#define Gb u
+#define H W
+#define Ha Wb
+#define Hb Wb
+
+static const long double MAXLOGL = 1.1356523406294143949492E4L;
+static const long double MINLOGL = -1.13994985314888605586758E4L;
+static const long double LOGE2L = 6.9314718055994530941723E-1L;
+static const long double huge = 0x1p10000L;
+/* XXX Prevent gcc from erroneously constant folding this. */
+static const volatile long double twom10000 = 0x1p-10000L;
+
+static long double reducl(long double);
+static long double powil(long double, int);
+
+long double __polevll(long double x, const long double *P, int n);
+long double __p1evll(long double x, const long double *P, int n);
+
+long double powl(long double x, long double y)
+{
+	/* double F, Fa, Fb, G, Ga, Gb, H, Ha, Hb */
+	int i, nflg, iyflg, yoddint;
+	long e;
+	volatile long double z=0;
+	long double w=0, W=0, Wa=0, Wb=0, ya=0, yb=0, u=0;
+
+	/* make sure no invalid exception is raised by nan comparision */
+	if (isnan(x)) {
+		if (!isnan(y) && y == 0.0)
+			return 1.0;
+		return x;
+	}
+	if (isnan(y)) {
+		if (x == 1.0)
+			return 1.0;
+		return y;
+	}
+	if (x == 1.0)
+		return 1.0; /* 1**y = 1, even if y is nan */
+	if (x == -1.0 && !isfinite(y))
+		return 1.0; /* -1**inf = 1 */
+	if (y == 0.0)
+		return 1.0; /* x**0 = 1, even if x is nan */
+	if (y == 1.0)
+		return x;
+	if (y >= LDBL_MAX) {
+		if (x > 1.0 || x < -1.0)
+			return INFINITY;
+		if (x != 0.0)
+			return 0.0;
+	}
+	if (y <= -LDBL_MAX) {
+		if (x > 1.0 || x < -1.0)
+			return 0.0;
+		if (x != 0.0 || y == -INFINITY)
+			return INFINITY;
+	}
+	if (x >= LDBL_MAX) {
+		if (y > 0.0)
+			return INFINITY;
+		return 0.0;
+	}
+
+	w = floorl(y);
+
+	/* Set iyflg to 1 if y is an integer. */
+	iyflg = 0;
+	if (w == y)
+		iyflg = 1;
+
+	/* Test for odd integer y. */
+	yoddint = 0;
+	if (iyflg) {
+		ya = fabsl(y);
+		ya = floorl(0.5 * ya);
+		yb = 0.5 * fabsl(w);
+		if( ya != yb )
+			yoddint = 1;
+	}
+
+	if (x <= -LDBL_MAX) {
+		if (y > 0.0) {
+			if (yoddint)
+				return -INFINITY;
+			return INFINITY;
+		}
+		if (y < 0.0) {
+			if (yoddint)
+				return -0.0;
+			return 0.0;
+		}
+	}
+	nflg = 0; /* (x<0)**(odd int) */
+	if (x <= 0.0) {
+		if (x == 0.0) {
+			if (y < 0.0) {
+				if (signbit(x) && yoddint)
+					/* (-0.0)**(-odd int) = -inf, divbyzero */
+					return -1.0/0.0;
+				/* (+-0.0)**(negative) = inf, divbyzero */
+				return 1.0/0.0;
+			}
+			if (signbit(x) && yoddint)
+				return -0.0;
+			return 0.0;
+		}
+		if (iyflg == 0)
+			return (x - x) / (x - x); /* (x<0)**(non-int) is NaN */
+		/* (x<0)**(integer) */
+		if (yoddint)
+			nflg = 1; /* negate result */
+		x = -x;
+	}
+	/* (+integer)**(integer)  */
+	if (iyflg && floorl(x) == x && fabsl(y) < 32768.0) {
+		w = powil(x, (int)y);
+		return nflg ? -w : w;
+	}
+
+	/* separate significand from exponent */
+	x = frexpl(x, &i);
+	e = i;
+
+	/* find significand in antilog table A[] */
+	i = 1;
+	if (x <= A[17])
+		i = 17;
+	if (x <= A[i+8])
+		i += 8;
+	if (x <= A[i+4])
+		i += 4;
+	if (x <= A[i+2])
+		i += 2;
+	if (x >= A[1])
+		i = -1;
+	i += 1;
+
+	/* Find (x - A[i])/A[i]
+	 * in order to compute log(x/A[i]):
+	 *
+	 * log(x) = log( a x/a ) = log(a) + log(x/a)
+	 *
+	 * log(x/a) = log(1+v),  v = x/a - 1 = (x-a)/a
+	 */
+	x -= A[i];
+	x -= B[i/2];
+	x /= A[i];
+
+	/* rational approximation for log(1+v):
+	 *
+	 * log(1+v)  =  v  -  v**2/2  +  v**3 P(v) / Q(v)
+	 */
+	z = x*x;
+	w = x * (z * __polevll(x, P, 3) / __p1evll(x, Q, 3));
+	w = w - 0.5*z;
+
+	/* Convert to base 2 logarithm:
+	 * multiply by log2(e) = 1 + LOG2EA
+	 */
+	z = LOG2EA * w;
+	z += w;
+	z += LOG2EA * x;
+	z += x;
+
+	/* Compute exponent term of the base 2 logarithm. */
+	w = -i;
+	w /= NXT;
+	w += e;
+	/* Now base 2 log of x is w + z. */
+
+	/* Multiply base 2 log by y, in extended precision. */
+
+	/* separate y into large part ya
+	 * and small part yb less than 1/NXT
+	 */
+	ya = reducl(y);
+	yb = y - ya;
+
+	/* (w+z)(ya+yb)
+	 * = w*ya + w*yb + z*y
+	 */
+	F = z * y  +  w * yb;
+	Fa = reducl(F);
+	Fb = F - Fa;
+
+	G = Fa + w * ya;
+	Ga = reducl(G);
+	Gb = G - Ga;
+
+	H = Fb + Gb;
+	Ha = reducl(H);
+	w = (Ga + Ha) * NXT;
+
+	/* Test the power of 2 for overflow */
+	if (w > MEXP)
+		return huge * huge;  /* overflow */
+	if (w < MNEXP)
+		return twom10000 * twom10000;  /* underflow */
+
+	e = w;
+	Hb = H - Ha;
+
+	if (Hb > 0.0) {
+		e += 1;
+		Hb -= 1.0/NXT;  /*0.0625L;*/
+	}
+
+	/* Now the product y * log2(x)  =  Hb + e/NXT.
+	 *
+	 * Compute base 2 exponential of Hb,
+	 * where -0.0625 <= Hb <= 0.
+	 */
+	z = Hb * __polevll(Hb, R, 6);  /*  z = 2**Hb - 1  */
+
+	/* Express e/NXT as an integer plus a negative number of (1/NXT)ths.
+	 * Find lookup table entry for the fractional power of 2.
+	 */
+	if (e < 0)
+		i = 0;
+	else
+		i = 1;
+	i = e/NXT + i;
+	e = NXT*i - e;
+	w = A[e];
+	z = w * z;  /*  2**-e * ( 1 + (2**Hb-1) )  */
+	z = z + w;
+	z = scalbnl(z, i);  /* multiply by integer power of 2 */
+
+	if (nflg)
+		z = -z;
+	return z;
+}
+
+
+/* Find a multiple of 1/NXT that is within 1/NXT of x. */
+static long double reducl(long double x)
+{
+	long double t;
+
+	t = x * NXT;
+	t = floorl(t);
+	t = t / NXT;
+	return t;
+}
+
+/*
+ *      Positive real raised to integer power, long double precision
+ *
+ *
+ * SYNOPSIS:
+ *
+ * long double x, y, powil();
+ * int n;
+ *
+ * y = powil( x, n );
+ *
+ *
+ * DESCRIPTION:
+ *
+ * Returns argument x>0 raised to the nth power.
+ * The routine efficiently decomposes n as a sum of powers of
+ * two. The desired power is a product of two-to-the-kth
+ * powers of x.  Thus to compute the 32767 power of x requires
+ * 28 multiplications instead of 32767 multiplications.
+ *
+ *
+ * ACCURACY:
+ *
+ *                      Relative error:
+ * arithmetic   x domain   n domain  # trials      peak         rms
+ *    IEEE     .001,1000  -1022,1023  50000       4.3e-17     7.8e-18
+ *    IEEE        1,2     -1022,1023  20000       3.9e-17     7.6e-18
+ *    IEEE     .99,1.01     0,8700    10000       3.6e-16     7.2e-17
+ *
+ * Returns MAXNUM on overflow, zero on underflow.
+ */
+
+static long double powil(long double x, int nn)
+{
+	long double ww, y;
+	long double s;
+	int n, e, sign, lx;
+
+	if (nn == 0)
+		return 1.0;
+
+	if (nn < 0) {
+		sign = -1;
+		n = -nn;
+	} else {
+		sign = 1;
+		n = nn;
+	}
+
+	/* Overflow detection */
+
+	/* Calculate approximate logarithm of answer */
+	s = x;
+	s = frexpl( s, &lx);
+	e = (lx - 1)*n;
+	if ((e == 0) || (e > 64) || (e < -64)) {
+		s = (s - 7.0710678118654752e-1L) / (s +  7.0710678118654752e-1L);
+		s = (2.9142135623730950L * s - 0.5 + lx) * nn * LOGE2L;
+	} else {
+		s = LOGE2L * e;
+	}
+
+	if (s > MAXLOGL)
+		return huge * huge;  /* overflow */
+
+	if (s < MINLOGL)
+		return twom10000 * twom10000;  /* underflow */
+	/* Handle tiny denormal answer, but with less accuracy
+	 * since roundoff error in 1.0/x will be amplified.
+	 * The precise demarcation should be the gradual underflow threshold.
+	 */
+	if (s < -MAXLOGL+2.0) {
+		x = 1.0/x;
+		sign = -sign;
+	}
+
+	/* First bit of the power */
+	if (n & 1)
+		y = x;
+	else
+		y = 1.0;
+
+	ww = x;
+	n >>= 1;
+	while (n) {
+		ww = ww * ww;   /* arg to the 2-to-the-kth power */
+		if (n & 1)     /* if that bit is set, then include in product */
+			y *= ww;
+		n >>= 1;
+	}
+
+	if (sign < 0)
+		y = 1.0/y;
+	return y;
+}
+#elif LDBL_MANT_DIG == 113 && LDBL_MAX_EXP == 16384
+// TODO: broken implementation to make things compile
+long double powl(long double x, long double y)
+{
+	return pow(x, y);
+}
+#endif
diff --git a/base/glibc-compatibility/musl/timerfd.c b/base/glibc-compatibility/musl/timerfd.c
new file mode 100644
index 00000000000..0f9adb54389
--- /dev/null
+++ b/base/glibc-compatibility/musl/timerfd.c
@@ -0,0 +1,17 @@
+#include <sys/timerfd.h>
+#include "syscall.h"
+
+int timerfd_create(int clockid, int flags)
+{
+    return syscall(SYS_timerfd_create, clockid, flags);
+}
+
+int timerfd_settime(int fd, int flags, const struct itimerspec *new, struct itimerspec *old)
+{
+    return syscall(SYS_timerfd_settime, fd, flags, new, old);
+}
+
+int timerfd_gettime(int fd, struct itimerspec *cur)
+{
+    return syscall(SYS_timerfd_gettime, fd, cur);
+}
diff --git a/cmake/find/parquet.cmake b/cmake/find/parquet.cmake
index 6d05fa17aec..eb1b529fbfe 100644
--- a/cmake/find/parquet.cmake
+++ b/cmake/find/parquet.cmake
@@ -141,11 +141,6 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
     else()
     set(USE_INTERNAL_PARQUET_LIBRARY 1)
 
-    if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
-        set(ARROW_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src")
-        set(PARQUET_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src" ${ClickHouse_BINARY_DIR}/contrib/arrow/cpp/src)
-    endif()
-
     if(MAKE_STATIC_LIBRARIES)
         set(FLATBUFFERS_LIBRARY flatbuffers)
         set(ARROW_LIBRARY arrow_static)
@@ -155,9 +150,6 @@ if(NOT EXTERNAL_PARQUET_FOUND AND NOT MISSING_INTERNAL_PARQUET_LIBRARY AND NOT O
         set(FLATBUFFERS_LIBRARY flatbuffers_shared)
         set(ARROW_LIBRARY arrow_shared)
         set(PARQUET_LIBRARY parquet_shared)
-        if(USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
-            list(APPEND PARQUET_LIBRARY boost::regex)
-        endif()
         set(THRIFT_LIBRARY thrift)
     endif()
 
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index ded6c16ba85..9fccee89c1d 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -163,51 +163,21 @@ if(USE_INTERNAL_SNAPPY_LIBRARY)
 endif()
 
 if (USE_INTERNAL_PARQUET_LIBRARY)
-if (USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
     # We dont use arrow's cmakefiles because they uses too many depends and download some libs in compile time
-    # But this mode can be used for updating auto-generated parquet files:
-    # cmake -DUSE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE=1 -DUSE_STATIC_LIBRARIES=0
-    # copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> /contrib/arrow-cmake/cpp/src/parquet/
+    # But you can update auto-generated parquet files manually:
+    # cd {BUILD_DIR}/contrib/arrow/cpp/src/parquet && mkdir -p build && cd build
+    # cmake .. -DARROW_COMPUTE=ON -DARROW_PARQUET=ON -DARROW_SIMD_LEVEL=NONE -DARROW_VERBOSE_THIRDPARTY_BUILD=ON
+    #          -DARROW_BUILD_SHARED=1 -DARROW_BUILD_UTILITIES=OFF -DARROW_BUILD_INTEGRATION=OFF
+    #          -DBoost_FOUND=1 -DARROW_TEST_LINKAGE="shared"
+    # make -j8
+    # copy {BUILD_DIR}/contrib/arrow/cpp/src/parquet/*.cpp,*.h -> {BUILD_DIR}/contrib/arrow-cmake/cpp/src/parquet/
 
     # Also useful parquet reader:
-    # cd contrib/arrow/cpp/build && mkdir -p build && cmake .. -DPARQUET_BUILD_EXECUTABLES=1 && make -j8
-    # contrib/arrow/cpp/build/debug/parquet-reader some_file.parquet
+    # cd {BUILD_DIR}/contrib/arrow/cpp && mkdir -p build && cd build
+    # cmake .. -DARROW_PARQUET=1 -DARROW_WITH_SNAPPY=1 -DPARQUET_BUILD_EXECUTABLES=1
+    # make -j8
+    # {BUILD_DIR}/contrib/arrow/cpp/build/release/parquet-reader some_file.parquet
 
-    set (ARROW_COMPUTE ON CACHE INTERNAL "")
-    set (ARROW_PARQUET ON CACHE INTERNAL "")
-    set (ARROW_VERBOSE_THIRDPARTY_BUILD ON CACHE INTERNAL "")
-    set (ARROW_BUILD_SHARED 1 CACHE INTERNAL "")
-    set (ARROW_BUILD_UTILITIES OFF CACHE INTERNAL "")
-    set (ARROW_BUILD_INTEGRATION OFF CACHE INTERNAL "")
-    set (ARROW_BOOST_HEADER_ONLY ON CACHE INTERNAL "")
-    set (Boost_FOUND 1 CACHE INTERNAL "")
-    if (MAKE_STATIC_LIBRARIES)
-        set (PARQUET_ARROW_LINKAGE "static" CACHE INTERNAL "")
-        set (ARROW_TEST_LINKAGE "static" CACHE INTERNAL "")
-        set (ARROW_BUILD_STATIC ${MAKE_STATIC_LIBRARIES} CACHE INTERNAL "")
-    else ()
-        set (PARQUET_ARROW_LINKAGE "shared" CACHE INTERNAL "")
-        set (ARROW_TEST_LINKAGE "shared" CACHE INTERNAL "")
-    endif ()
-
-    if (CMAKE_BUILD_TYPE_UC STREQUAL "RELWITHDEBINFO")
-        set (_save_build_type ${CMAKE_BUILD_TYPE})
-        set (CMAKE_BUILD_TYPE Release)
-        string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
-    endif ()
-
-    # Because Arrow uses CMAKE_SOURCE_DIR as a project path
-    # Hopefully will be fixed in https://github.com/apache/arrow/pull/2676
-    set (CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/cmake_modules")
-    add_subdirectory (arrow/cpp)
-
-    if (_save_build_type)
-        set (CMAKE_BUILD_TYPE ${_save_build_type})
-        unset (_save_build_type)
-        string (TOUPPER ${CMAKE_BUILD_TYPE} CMAKE_BUILD_TYPE_UC)
-    endif ()
-
-else()
     add_subdirectory(arrow-cmake)
 
     # The library is large - avoid bloat.
@@ -215,7 +185,6 @@ else()
     target_compile_options (${THRIFT_LIBRARY} PRIVATE -g0)
     target_compile_options (${PARQUET_LIBRARY} PRIVATE -g0)
 endif()
-endif()
 
 if (USE_INTERNAL_AVRO_LIBRARY)
     add_subdirectory(avro-cmake)
diff --git a/contrib/arrow b/contrib/arrow
index 3cbcb7b62c2..744bdfe188f 160000
--- a/contrib/arrow
+++ b/contrib/arrow
@@ -1 +1 @@
-Subproject commit 3cbcb7b62c2f2d02851bff837758637eb592a64b
+Subproject commit 744bdfe188f018e5e05f5deebd4e9ee0a7706cf4
diff --git a/contrib/arrow-cmake/CMakeLists.txt b/contrib/arrow-cmake/CMakeLists.txt
index 442f2534f6a..4b402a9db79 100644
--- a/contrib/arrow-cmake/CMakeLists.txt
+++ b/contrib/arrow-cmake/CMakeLists.txt
@@ -144,15 +144,16 @@ set(ORC_SRCS
 
 set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/arrow)
 
-configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cpp/src/arrow/util/config.h")
+configure_file("${LIBRARY_DIR}/util/config.h.cmake" "${CMAKE_CURRENT_BINARY_DIR}/cpp/src/arrow/util/config.h")
 
 # arrow/cpp/src/arrow/CMakeLists.txt
 set(ARROW_SRCS
-        ${LIBRARY_DIR}/array.cc
         ${LIBRARY_DIR}/buffer.cc
-        ${LIBRARY_DIR}/device.cc
         ${LIBRARY_DIR}/builder.cc
+        ${LIBRARY_DIR}/chunked_array.cc
         ${LIBRARY_DIR}/compare.cc
+        ${LIBRARY_DIR}/datum.cc
+        ${LIBRARY_DIR}/device.cc
         ${LIBRARY_DIR}/extension_type.cc
         ${LIBRARY_DIR}/memory_pool.cc
         ${LIBRARY_DIR}/pretty_print.cc
@@ -167,11 +168,12 @@ set(ARROW_SRCS
         ${LIBRARY_DIR}/type.cc
         ${LIBRARY_DIR}/visitor.cc
 
-        ${LIBRARY_DIR}/tensor/coo_converter.cc
-        ${LIBRARY_DIR}/tensor/csc_converter.cc
-        ${LIBRARY_DIR}/tensor/csf_converter.cc
-        ${LIBRARY_DIR}/tensor/csr_converter.cc
-
+        ${LIBRARY_DIR}/array/array_base.cc
+        ${LIBRARY_DIR}/array/array_binary.cc
+        ${LIBRARY_DIR}/array/array_decimal.cc
+        ${LIBRARY_DIR}/array/array_dict.cc
+        ${LIBRARY_DIR}/array/array_nested.cc
+        ${LIBRARY_DIR}/array/array_primitive.cc
         ${LIBRARY_DIR}/array/builder_adaptive.cc
         ${LIBRARY_DIR}/array/builder_base.cc
         ${LIBRARY_DIR}/array/builder_binary.cc
@@ -181,17 +183,50 @@ set(ARROW_SRCS
         ${LIBRARY_DIR}/array/builder_primitive.cc
         ${LIBRARY_DIR}/array/builder_union.cc
         ${LIBRARY_DIR}/array/concatenate.cc
-        ${LIBRARY_DIR}/array/dict_internal.cc
+        ${LIBRARY_DIR}/array/data.cc
         ${LIBRARY_DIR}/array/diff.cc
+        ${LIBRARY_DIR}/array/util.cc
         ${LIBRARY_DIR}/array/validate.cc
 
-        ${LIBRARY_DIR}/csv/converter.cc
+        ${LIBRARY_DIR}/compute/api_scalar.cc
+        ${LIBRARY_DIR}/compute/api_vector.cc
+        ${LIBRARY_DIR}/compute/cast.cc
+        ${LIBRARY_DIR}/compute/exec.cc
+        ${LIBRARY_DIR}/compute/function.cc
+        ${LIBRARY_DIR}/compute/kernel.cc
+        ${LIBRARY_DIR}/compute/registry.cc
+
+        ${LIBRARY_DIR}/compute/kernels/aggregate_basic.cc
+        ${LIBRARY_DIR}/compute/kernels/aggregate_mode.cc
+        ${LIBRARY_DIR}/compute/kernels/aggregate_var_std.cc
+        ${LIBRARY_DIR}/compute/kernels/codegen_internal.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_arithmetic.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_boolean.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_boolean.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_internal.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_nested.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_numeric.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_string.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_cast_temporal.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_compare.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_fill_null.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_nested.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_set_lookup.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_string.cc
+        ${LIBRARY_DIR}/compute/kernels/scalar_validity.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_hash.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_nested.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_selection.cc
+        ${LIBRARY_DIR}/compute/kernels/vector_sort.cc
+        ${LIBRARY_DIR}/compute/kernels/util_internal.cc
+
         ${LIBRARY_DIR}/csv/chunker.cc
         ${LIBRARY_DIR}/csv/column_builder.cc
+        ${LIBRARY_DIR}/csv/column_decoder.cc
+        ${LIBRARY_DIR}/csv/converter.cc
         ${LIBRARY_DIR}/csv/options.cc
         ${LIBRARY_DIR}/csv/parser.cc
         ${LIBRARY_DIR}/csv/reader.cc
-        ${LIBRARY_DIR}/csv/column_decoder.cc
 
         ${LIBRARY_DIR}/ipc/dictionary.cc
         ${LIBRARY_DIR}/ipc/feather.cc
@@ -202,14 +237,25 @@ set(ARROW_SRCS
         ${LIBRARY_DIR}/ipc/writer.cc
 
         ${LIBRARY_DIR}/io/buffered.cc
+        ${LIBRARY_DIR}/io/caching.cc
         ${LIBRARY_DIR}/io/compressed.cc
         ${LIBRARY_DIR}/io/file.cc
         ${LIBRARY_DIR}/io/interfaces.cc
         ${LIBRARY_DIR}/io/memory.cc
         ${LIBRARY_DIR}/io/slow.cc
 
+        ${LIBRARY_DIR}/tensor/coo_converter.cc
+        ${LIBRARY_DIR}/tensor/csf_converter.cc
+        ${LIBRARY_DIR}/tensor/csx_converter.cc
+
         ${LIBRARY_DIR}/util/basic_decimal.cc
+        ${LIBRARY_DIR}/util/bit_block_counter.cc
+        ${LIBRARY_DIR}/util/bit_run_reader.cc
         ${LIBRARY_DIR}/util/bit_util.cc
+        ${LIBRARY_DIR}/util/bitmap.cc
+        ${LIBRARY_DIR}/util/bitmap_builders.cc
+        ${LIBRARY_DIR}/util/bitmap_ops.cc
+        ${LIBRARY_DIR}/util/bpacking.cc
         ${LIBRARY_DIR}/util/compression.cc
         ${LIBRARY_DIR}/util/compression_lz4.cc
         ${LIBRARY_DIR}/util/compression_snappy.cc
@@ -217,8 +263,12 @@ set(ARROW_SRCS
         ${LIBRARY_DIR}/util/compression_zstd.cc
         ${LIBRARY_DIR}/util/cpu_info.cc
         ${LIBRARY_DIR}/util/decimal.cc
+        ${LIBRARY_DIR}/util/delimiting.cc
+        ${LIBRARY_DIR}/util/formatting.cc
+        ${LIBRARY_DIR}/util/future.cc
         ${LIBRARY_DIR}/util/int_util.cc
         ${LIBRARY_DIR}/util/io_util.cc
+        ${LIBRARY_DIR}/util/iterator.cc
         ${LIBRARY_DIR}/util/key_value_metadata.cc
         ${LIBRARY_DIR}/util/logging.cc
         ${LIBRARY_DIR}/util/memory.cc
@@ -226,27 +276,15 @@ set(ARROW_SRCS
         ${LIBRARY_DIR}/util/string.cc
         ${LIBRARY_DIR}/util/task_group.cc
         ${LIBRARY_DIR}/util/thread_pool.cc
+        ${LIBRARY_DIR}/util/time.cc
         ${LIBRARY_DIR}/util/trie.cc
         ${LIBRARY_DIR}/util/utf8.cc
-        ${LIBRARY_DIR}/util/future.cc
-        ${LIBRARY_DIR}/util/formatting.cc
-        ${LIBRARY_DIR}/util/parsing.cc
-        ${LIBRARY_DIR}/util/time.cc
-        ${LIBRARY_DIR}/util/delimiting.cc
-        ${LIBRARY_DIR}/util/iterator.cc
+        ${LIBRARY_DIR}/util/value_parsing.cc
 
         ${LIBRARY_DIR}/vendored/base64.cpp
         ${ORC_SRCS}
         )
 
-set(ARROW_SRCS ${ARROW_SRCS}
-        ${LIBRARY_DIR}/compute/context.cc
-        ${LIBRARY_DIR}/compute/kernels/boolean.cc
-        ${LIBRARY_DIR}/compute/kernels/cast.cc
-        ${LIBRARY_DIR}/compute/kernels/hash.cc
-        ${LIBRARY_DIR}/compute/kernels/util_internal.cc
-        )
-
 if (SNAPPY_INCLUDE_DIR AND SNAPPY_LIBRARY)
     set(ARROW_WITH_SNAPPY 1)
 endif ()
@@ -289,7 +327,8 @@ if (USE_INTERNAL_PROTOBUF_LIBRARY)
     add_dependencies(${ARROW_LIBRARY} protoc)
 endif ()
 
-target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/cpp/src)
+target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src)
+target_include_directories(${ARROW_LIBRARY} SYSTEM PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/cpp/src)
 target_link_libraries(${ARROW_LIBRARY} PRIVATE ${DOUBLE_CONVERSION_LIBRARIES} ${Protobuf_LIBRARY})
 target_link_libraries(${ARROW_LIBRARY} PRIVATE lz4)
 if (ARROW_WITH_SNAPPY)
@@ -319,19 +358,26 @@ set(LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/parquet)
 set(GEN_LIBRARY_DIR ${ClickHouse_SOURCE_DIR}/contrib/arrow/cpp/src/generated)
 # arrow/cpp/src/parquet/CMakeLists.txt
 set(PARQUET_SRCS
+        ${LIBRARY_DIR}/arrow/path_internal.cc
         ${LIBRARY_DIR}/arrow/reader.cc
         ${LIBRARY_DIR}/arrow/reader_internal.cc
         ${LIBRARY_DIR}/arrow/schema.cc
+        ${LIBRARY_DIR}/arrow/schema_internal.cc
         ${LIBRARY_DIR}/arrow/writer.cc
-        ${LIBRARY_DIR}/arrow/path_internal.cc
         ${LIBRARY_DIR}/bloom_filter.cc
         ${LIBRARY_DIR}/column_reader.cc
         ${LIBRARY_DIR}/column_scanner.cc
         ${LIBRARY_DIR}/column_writer.cc
         ${LIBRARY_DIR}/deprecated_io.cc
         ${LIBRARY_DIR}/encoding.cc
+        ${LIBRARY_DIR}/encryption.cc
+        ${LIBRARY_DIR}/encryption_internal.cc
         ${LIBRARY_DIR}/file_reader.cc
         ${LIBRARY_DIR}/file_writer.cc
+        ${LIBRARY_DIR}/internal_file_decryptor.cc
+        ${LIBRARY_DIR}/internal_file_encryptor.cc
+        ${LIBRARY_DIR}/level_conversion.cc
+        ${LIBRARY_DIR}/level_comparison.cc
         ${LIBRARY_DIR}/metadata.cc
         ${LIBRARY_DIR}/murmur3.cc
         ${LIBRARY_DIR}/platform.cc
@@ -340,10 +386,6 @@ set(PARQUET_SRCS
         ${LIBRARY_DIR}/schema.cc
         ${LIBRARY_DIR}/statistics.cc
         ${LIBRARY_DIR}/types.cc
-        ${LIBRARY_DIR}/encryption.cc
-        ${LIBRARY_DIR}/encryption_internal.cc
-        ${LIBRARY_DIR}/internal_file_decryptor.cc
-        ${LIBRARY_DIR}/internal_file_encryptor.cc
 
         ${GEN_LIBRARY_DIR}/parquet_constants.cpp
         ${GEN_LIBRARY_DIR}/parquet_types.cpp
diff --git a/contrib/arrow-cmake/cpp/src/arrow/util/config.h b/contrib/arrow-cmake/cpp/src/arrow/util/config.h
deleted file mode 100644
index bf8ea581922..00000000000
--- a/contrib/arrow-cmake/cpp/src/arrow/util/config.h
+++ /dev/null
@@ -1,26 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#define ARROW_VERSION_MAJOR 
-#define ARROW_VERSION_MINOR 
-#define ARROW_VERSION_PATCH 
-#define ARROW_VERSION ((ARROW_VERSION_MAJOR * 1000) + ARROW_VERSION_MINOR) * 1000 + ARROW_VERSION_PATCH
-
-#define ARROW_SO_VERSION ""
-#define ARROW_FULL_SO_VERSION ""
-
-/* #undef GRPCPP_PP_INCLUDE */
diff --git a/contrib/arrow-cmake/cpp/src/parquet/parquet_version.h b/contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
index 7404db1d381..ec9fb32b2a5 100644
--- a/contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
+++ b/contrib/arrow-cmake/cpp/src/parquet/parquet_version.h
@@ -22,8 +22,8 @@
 #define PARQUET_VERSION_MINOR 5
 #define PARQUET_VERSION_PATCH 1
 
-#define PARQUET_SO_VERSION 0
-#define PARQUET_FULL_SO_VERSION 0.17
+#define PARQUET_SO_VERSION "200"
+#define PARQUET_FULL_SO_VERSION "200.0.0"
 
 // define the parquet created by version
 #define CREATED_BY_VERSION "parquet-cpp version 1.5.1-SNAPSHOT"
diff --git a/contrib/boost-cmake/CMakeLists.txt b/contrib/boost-cmake/CMakeLists.txt
index fd860c9f9b0..f7c1ce22e90 100644
--- a/contrib/boost-cmake/CMakeLists.txt
+++ b/contrib/boost-cmake/CMakeLists.txt
@@ -11,10 +11,11 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
         iostreams
         program_options
         regex
+        context
     )
 
     if(Boost_INCLUDE_DIR AND Boost_FILESYSTEM_LIBRARY AND Boost_FILESYSTEM_LIBRARY AND
-       Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY)
+       Boost_PROGRAM_OPTIONS_LIBRARY AND Boost_REGEX_LIBRARY AND Boost_SYSTEM_LIBRARY AND Boost_CONTEXT_LIBRARY)
 
         set(EXTERNAL_BOOST_FOUND 1)
 
@@ -27,18 +28,21 @@ if (NOT USE_INTERNAL_BOOST_LIBRARY)
         add_library (_boost_program_options INTERFACE)
         add_library (_boost_regex INTERFACE)
         add_library (_boost_system INTERFACE)
+        add_library (_boost_context INTERFACE)
 
         target_link_libraries (_boost_filesystem INTERFACE ${Boost_FILESYSTEM_LIBRARY})
         target_link_libraries (_boost_iostreams INTERFACE ${Boost_IOSTREAMS_LIBRARY})
         target_link_libraries (_boost_program_options INTERFACE ${Boost_PROGRAM_OPTIONS_LIBRARY})
         target_link_libraries (_boost_regex INTERFACE ${Boost_REGEX_LIBRARY})
         target_link_libraries (_boost_system INTERFACE ${Boost_SYSTEM_LIBRARY})
+        target_link_libraries (_boost_context INTERFACE ${Boost_CONTEXT_LIBRARY})
 
         add_library (boost::filesystem ALIAS _boost_filesystem)
         add_library (boost::iostreams ALIAS _boost_iostreams)
         add_library (boost::program_options ALIAS _boost_program_options)
         add_library (boost::regex ALIAS _boost_regex)
         add_library (boost::system ALIAS _boost_system)
+        add_library (boost::context ALIAS _boost_context)
     else()
         set(EXTERNAL_BOOST_FOUND 0)
         message (${RECONFIGURE_MESSAGE_LEVEL} "Can't find system boost")
@@ -142,4 +146,57 @@ if (NOT EXTERNAL_BOOST_FOUND)
     add_library (_boost_system ${SRCS_SYSTEM})
     add_library (boost::system ALIAS _boost_system)
     target_include_directories (_boost_system PRIVATE ${LIBRARY_DIR})
+
+    # context
+    enable_language(ASM)
+    SET(ASM_OPTIONS "-x assembler-with-cpp")
+
+    if (SANITIZE AND (SANITIZE STREQUAL "address" OR SANITIZE STREQUAL "thread"))
+        add_compile_definitions(BOOST_USE_UCONTEXT)
+
+        if (SANITIZE STREQUAL "address")
+            add_compile_definitions(BOOST_USE_ASAN)
+        elseif (SANITIZE STREQUAL "thread")
+            add_compile_definitions(BOOST_USE_TSAN)
+        endif()
+
+        set (SRCS_CONTEXT
+                ${LIBRARY_DIR}/libs/context/src/fiber.cpp
+                ${LIBRARY_DIR}/libs/context/src/continuation.cpp
+                ${LIBRARY_DIR}/libs/context/src/dummy.cpp
+                ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
+                ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
+        )
+    elseif (ARCH_ARM)
+        set (SRCS_CONTEXT
+            ${LIBRARY_DIR}/libs/context/src/asm/jump_arm64_aapcs_elf_gas.S
+            ${LIBRARY_DIR}/libs/context/src/asm/make_arm64_aapcs_elf_gas.S
+            ${LIBRARY_DIR}/libs/context/src/asm/ontop_arm64_aapcs_elf_gas.S
+            ${LIBRARY_DIR}/libs/context/src/dummy.cpp
+            ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
+            ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
+        )
+    elseif(OS_DARWIN)
+        set (SRCS_CONTEXT
+            ${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_macho_gas.S
+            ${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_macho_gas.S
+            ${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_macho_gas.S
+            ${LIBRARY_DIR}/libs/context/src/dummy.cpp
+            ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
+            ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
+        )
+    else()
+        set (SRCS_CONTEXT
+            ${LIBRARY_DIR}/libs/context/src/asm/jump_x86_64_sysv_elf_gas.S
+            ${LIBRARY_DIR}/libs/context/src/asm/make_x86_64_sysv_elf_gas.S
+            ${LIBRARY_DIR}/libs/context/src/asm/ontop_x86_64_sysv_elf_gas.S
+            ${LIBRARY_DIR}/libs/context/src/dummy.cpp
+            ${LIBRARY_DIR}/libs/context/src/execution_context.cpp
+            ${LIBRARY_DIR}/libs/context/src/posix/stack_traits.cpp
+        )
+    endif()
+
+    add_library (_boost_context ${SRCS_CONTEXT})
+    add_library (boost::context ALIAS _boost_context)
+    target_include_directories (_boost_context PRIVATE ${LIBRARY_DIR})
 endif ()
diff --git a/contrib/grpc-cmake/CMakeLists.txt b/contrib/grpc-cmake/CMakeLists.txt
index efb0f1c4f43..97ca3fab4db 100644
--- a/contrib/grpc-cmake/CMakeLists.txt
+++ b/contrib/grpc-cmake/CMakeLists.txt
@@ -54,6 +54,26 @@ else ()
   set(CARES_SHARED ON CACHE BOOL "" FORCE)
 endif ()
 
+# Disable looking for libnsl on a platforms that has gethostbyname in glibc
+#
+# c-ares searching for gethostbyname in the libnsl library, however in the
+# version that shipped with gRPC it doing it wrong [1], since it uses
+# CHECK_LIBRARY_EXISTS(), which will return TRUE even if the function exists in
+# another dependent library. The upstream already contains correct macro [2],
+# but it is not included in gRPC (even upstream gRPC, not the one that is
+# shipped with clickhousee).
+#
+#   [1]: https://github.com/c-ares/c-ares/blob/e982924acee7f7313b4baa4ee5ec000c5e373c30/CMakeLists.txt#L125
+#   [2]: https://github.com/c-ares/c-ares/blob/44fbc813685a1fa8aa3f27fcd7544faf612d376a/CMakeLists.txt#L146
+#
+# And because if you by some reason have libnsl [3] installed, clickhouse will
+# reject to start w/o it. While this is completelly different library.
+#
+#   [3]: https://packages.debian.org/bullseye/libnsl2
+if (NOT CMAKE_SYSTEM_NAME STREQUAL "SunOS")
+  set(HAVE_LIBNSL OFF CACHE BOOL "" FORCE)
+endif()
+
 # We don't want to build C# extensions.
 set(gRPC_BUILD_CSHARP_EXT OFF)
 
diff --git a/contrib/jemalloc b/contrib/jemalloc
index 93e27e435ca..e6891d97461 160000
--- a/contrib/jemalloc
+++ b/contrib/jemalloc
@@ -1 +1 @@
-Subproject commit 93e27e435cac846028da20cd9b0841fbc9110bd2
+Subproject commit e6891d9746143bf2cf617493d880ba5a0b9a3efd
diff --git a/contrib/poco b/contrib/poco
index 08974cc024b..2c32e17c7df 160000
--- a/contrib/poco
+++ b/contrib/poco
@@ -1 +1 @@
-Subproject commit 08974cc024b2e748f5b1d45415396706b3521d0f
+Subproject commit 2c32e17c7dfee1f8bf24227b697cdef5fddf0823
diff --git a/contrib/rocksdb-cmake/CMakeLists.txt b/contrib/rocksdb-cmake/CMakeLists.txt
index f99401ce75d..77a30776a4a 100644
--- a/contrib/rocksdb-cmake/CMakeLists.txt
+++ b/contrib/rocksdb-cmake/CMakeLists.txt
@@ -2,12 +2,6 @@
 set(ROCKSDB_SOURCE_DIR "${ClickHouse_SOURCE_DIR}/contrib/rocksdb")
 list(APPEND CMAKE_MODULE_PATH "${ROCKSDB_SOURCE_DIR}/cmake/modules/")
 
-find_program(CCACHE_FOUND ccache)
-if(CCACHE_FOUND)
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache)
-  set_property(GLOBAL PROPERTY RULE_LAUNCH_LINK ccache)
-endif(CCACHE_FOUND)
-
 if (SANITIZE STREQUAL "undefined")
     set(WITH_UBSAN ON)
 elseif (SANITIZE STREQUAL "address")
diff --git a/debian/clickhouse-test.install b/debian/clickhouse-test.install
index 5d92a2767ef..042a4e02be1 100644
--- a/debian/clickhouse-test.install
+++ b/debian/clickhouse-test.install
@@ -1,5 +1,2 @@
 usr/bin/clickhouse-test
-usr/bin/clickhouse-test-server
 usr/share/clickhouse-test/*
-etc/clickhouse-client/client-test.xml
-etc/clickhouse-server/server-test.xml
diff --git a/debian/rules b/debian/rules
index 837f81dd503..30015ba0afd 100755
--- a/debian/rules
+++ b/debian/rules
@@ -62,7 +62,7 @@ ifndef DISABLE_NINJA
     NINJA=$(shell which ninja)
 ifneq ($(NINJA),)
         CMAKE_FLAGS += -GNinja
-        export MAKE=$(NINJA)
+        export MAKE=$(NINJA) $(NINJA_FLAGS)
 endif
 endif
 
@@ -93,7 +93,7 @@ override_dh_auto_build:
 
 override_dh_auto_test:
 ifeq (,$(filter nocheck,$(DEB_BUILD_OPTIONS)))
-	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V -E with_server
+	cd $(BUILDDIR) && ctest $(THREADS_COUNT) -V
 endif
 
 override_dh_clean:
diff --git a/docker/packager/unbundled/Dockerfile b/docker/packager/unbundled/Dockerfile
index 2f501f76e68..c6ebe95d44a 100644
--- a/docker/packager/unbundled/Dockerfile
+++ b/docker/packager/unbundled/Dockerfile
@@ -21,6 +21,7 @@ RUN apt-get update \
         libboost-thread-dev \
         libboost-iostreams-dev \
         libboost-regex-dev \
+        libboost-context-dev \
         zlib1g-dev \
         liblz4-dev \
         libdouble-conversion-dev \
diff --git a/docker/server/alpine-build.sh b/docker/server/alpine-build.sh
index 30101225b3e..c9ba03f7f35 100755
--- a/docker/server/alpine-build.sh
+++ b/docker/server/alpine-build.sh
@@ -47,13 +47,13 @@ cp "${DOCKER_BUILD_FOLDER}/entrypoint.alpine.sh"      "${CONTAINER_ROOT_FOLDER}/
 ## get glibc components from ubuntu 20.04 and put them to expected place
 docker pull ubuntu:20.04
 ubuntu20image=$(docker create --rm ubuntu:20.04)
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libc.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libdl.so.2      "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libm.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/librt.so.1      "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
-docker cp -L ${ubuntu20image}:/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libc.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libdl.so.2      "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libm.so.6       "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libpthread.so.0 "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/librt.so.1      "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libnss_dns.so.2 "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib/x86_64-linux-gnu/libresolv.so.2  "${CONTAINER_ROOT_FOLDER}/lib"
+docker cp -L "${ubuntu20image}":/lib64/ld-linux-x86-64.so.2           "${CONTAINER_ROOT_FOLDER}/lib64"
 
-docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
\ No newline at end of file
+docker build "$DOCKER_BUILD_FOLDER" -f Dockerfile.alpine -t "yandex/clickhouse-server:${VERSION}-alpine" --pull
diff --git a/docker/server/entrypoint.alpine.sh b/docker/server/entrypoint.alpine.sh
index e2edda9ca26..f0cc62d276d 100755
--- a/docker/server/entrypoint.alpine.sh
+++ b/docker/server/entrypoint.alpine.sh
@@ -26,17 +26,17 @@ fi
 CLICKHOUSE_CONFIG="${CLICKHOUSE_CONFIG:-/etc/clickhouse-server/config.xml}"
 
 # port is needed to check if clickhouse-server is ready for connections
-HTTP_PORT="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=http_port)"
+HTTP_PORT="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=http_port)"
 
 # get CH directories locations
-DATA_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=path || true)"
-TMP_DIR="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=tmp_path || true)"
-USER_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=user_files_path || true)"
-LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.log || true)"
-LOG_DIR="$(dirname $LOG_PATH || true)"
-ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=logger.errorlog || true)"
-ERROR_LOG_DIR="$(dirname $ERROR_LOG_PATH || true)"
-FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file $CLICKHOUSE_CONFIG --key=format_schema_path || true)"
+DATA_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=path || true)"
+TMP_DIR="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=tmp_path || true)"
+USER_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=user_files_path || true)"
+LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.log || true)"
+LOG_DIR="$(dirname "${LOG_PATH}" || true)"
+ERROR_LOG_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=logger.errorlog || true)"
+ERROR_LOG_DIR="$(dirname "${ERROR_LOG_PATH}" || true)"
+FORMAT_SCHEMA_PATH="$(clickhouse extract-from-config --config-file "${CLICKHOUSE_CONFIG}" --key=format_schema_path || true)"
 
 CLICKHOUSE_USER="${CLICKHOUSE_USER:-default}"
 CLICKHOUSE_PASSWORD="${CLICKHOUSE_PASSWORD:-}"
@@ -92,7 +92,7 @@ fi
 
 if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
     # Listen only on localhost until the initialization is done
-    $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG -- --listen_host=127.0.0.1 &
+    $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" -- --listen_host=127.0.0.1 &
     pid="$!"
 
     # check if clickhouse is ready to accept connections
@@ -107,7 +107,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
         sleep 1
     done
 
-    if [ ! -z "$CLICKHOUSE_PASSWORD" ]; then
+    if [ -n "$CLICKHOUSE_PASSWORD" ]; then
         printf -v WITH_PASSWORD '%s %q' "--password" "$CLICKHOUSE_PASSWORD"
     fi
 
@@ -130,7 +130,7 @@ if [ -n "$(ls /docker-entrypoint-initdb.d/)" ] || [ -n "$CLICKHOUSE_DB" ]; then
                     . "$f"
                 fi
                 ;;
-            *.sql)    echo "$0: running $f"; cat "$f" | "$clickhouseclient" ; echo ;;
+            *.sql)    echo "$0: running $f"; "$clickhouseclient" < "$f" ; echo ;;
             *.sql.gz) echo "$0: running $f"; gunzip -c "$f" | "$clickhouseclient"; echo ;;
             *)        echo "$0: ignoring $f" ;;
         esac
@@ -145,7 +145,7 @@ fi
 
 # if no args passed to `docker run` or first argument start with `--`, then the user is passing clickhouse-server arguments
 if [[ $# -lt 1 ]] || [[ "$1" == "--"* ]]; then
-    exec $gosu /usr/bin/clickhouse-server --config-file=$CLICKHOUSE_CONFIG "$@"
+    exec $gosu /usr/bin/clickhouse-server --config-file="${CLICKHOUSE_CONFIG}" "$@"
 fi
 
 # Otherwise, we assume the user want to run his own process, for example a `bash` shell to explore this image
diff --git a/docker/test/stateless/run.sh b/docker/test/stateless/run.sh
index 8793f3d22b4..e6e987e1d94 100755
--- a/docker/test/stateless/run.sh
+++ b/docker/test/stateless/run.sh
@@ -12,7 +12,32 @@ dpkg -i package_folder/clickhouse-test_*.deb
 # install test configs
 /usr/share/clickhouse-test/config/install.sh
 
-service clickhouse-server start && sleep 5
+# For flaky check we also enable thread fuzzer
+if [ "$NUM_TRIES" -gt "1" ]; then
+    export THREAD_FUZZER_CPU_TIME_PERIOD_US=1000
+    export THREAD_FUZZER_SLEEP_PROBABILITY=0.1
+    export THREAD_FUZZER_SLEEP_TIME_US=100000
+
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_MIGRATE_PROBABILITY=1
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_MIGRATE_PROBABILITY=1
+
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_PROBABILITY=0.001
+    export THREAD_FUZZER_pthread_mutex_lock_BEFORE_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_lock_AFTER_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_unlock_BEFORE_SLEEP_TIME_US=10000
+    export THREAD_FUZZER_pthread_mutex_unlock_AFTER_SLEEP_TIME_US=10000
+
+    # simpliest way to forward env variables to server
+    sudo -E -u clickhouse /usr/bin/clickhouse-server --config /etc/clickhouse-server/config.xml --daemon
+    sleep 5
+else
+    service clickhouse-server start && sleep 5
+fi
 
 if grep -q -- "--use-skip-list" /usr/bin/clickhouse-test; then
     SKIP_LIST_OPT="--use-skip-list"
diff --git a/docs/_description_templates/template-server-setting.md b/docs/_description_templates/template-server-setting.md
new file mode 100644
index 00000000000..36a2bcacfba
--- /dev/null
+++ b/docs/_description_templates/template-server-setting.md
@@ -0,0 +1,33 @@
+## server_setting_name {#server_setting_name}
+
+Description.
+
+Describe what is configured in this section of settings.
+
+Possible value: ...
+
+Default value: ...
+
+Settings: (Optional)
+
+If the section contains several settings, list them here. Specify possible values and default values:
+
+-   setting_1 — Description.
+-   setting_2 — Description.
+
+**Example:**
+
+```xml
+<server_setting_name>
+    <setting_1> ... </setting_1>
+    <setting_2> ... </setting_2>
+</server_setting_name>
+```
+
+**Additional Info** (Optional)
+
+The name of an additional section can be any, for example, **Usage**.
+
+**See Also** (Optional)
+
+-   [link](#)
diff --git a/docs/en/getting-started/example-datasets/index.md b/docs/en/getting-started/example-datasets/index.md
index ed795a6c4de..00bedd4aa78 100644
--- a/docs/en/getting-started/example-datasets/index.md
+++ b/docs/en/getting-started/example-datasets/index.md
@@ -12,6 +12,7 @@ The list of documented datasets:
 
 -   [GitHub Events](../../getting-started/example-datasets/github-events.md)
 -   [Anonymized Yandex.Metrica Dataset](../../getting-started/example-datasets/metrica.md)
+-   [Recipes](../../getting-started/example-datasets/recipes.md)
 -   [Star Schema Benchmark](../../getting-started/example-datasets/star-schema.md)
 -   [WikiStat](../../getting-started/example-datasets/wikistat.md)
 -   [Terabyte of Click Logs from Criteo](../../getting-started/example-datasets/criteo.md)
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 093e4e50dfe..7f0a61ccd42 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -324,7 +324,7 @@ Consider the table:
 CREATE TABLE table_with_enum_column_for_tsv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
 ```
 
-When the `input_format_tsv_enum_as_number` setting is enabled:  
+When the `input_format_tsv_enum_as_number` setting is enabled:
 
 ```sql
 SET input_format_tsv_enum_as_number = 1;
@@ -1248,7 +1248,7 @@ Consider the table:
 CREATE TABLE table_with_enum_column_for_csv_insert (Id Int32,Value Enum('first' = 1, 'second' = 2)) ENGINE=Memory();
 ```
 
-When the `input_format_csv_enum_as_number` setting is enabled:  
+When the `input_format_csv_enum_as_number` setting is enabled:
 
 ```sql
 SET input_format_csv_enum_as_number = 1;
@@ -1841,7 +1841,7 @@ Default value: 0.
 
 Enables or disables synchronous data insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table.
 
-By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in asynchronous mode. When `insert_distributed_sync=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true). 
+By default, when inserting data into a `Distributed` table, the ClickHouse server sends data to cluster nodes in asynchronous mode. When `insert_distributed_sync=1`, the data is processed synchronously, and the `INSERT` operation succeeds only after all the data is saved on all shards (at least one replica for each shard if `internal_replication` is true).
 
 Possible values:
 
@@ -1855,6 +1855,18 @@ Default value: `0`.
 -   [Distributed Table Engine](../../engines/table-engines/special/distributed.md#distributed)
 -   [Managing Distributed Tables](../../sql-reference/statements/system.md#query-language-system-distributed)
 
+## insert_distributed_one_random_shard {#insert_distributed_one_random_shard}
+
+Enables or disables random shard insertion into a [Distributed](../../engines/table-engines/special/distributed.md#distributed) table when there is no distributed key.
+
+By default, when inserting data into a `Distributed` table with more than one shard, the ClickHouse server will any insertion request if there is no distributed key. When `insert_distributed_one_random_shard = 1`, insertions are allowed and data is forwarded randomly among all shards.
+
+Possible values:
+
+-   0 — Insertion is rejected if there are multiple shards and no distributed key is given.
+-   1 — Insertion is done randomly among all available shards when no distributed key is given.
+
+Default value: `0`.
 
 ## use_compact_format_in_distributed_parts_names {#use_compact_format_in_distributed_parts_names}
 
@@ -2109,8 +2121,8 @@ Enables [ORDER BY](../../sql-reference/statements/select/order-by.md#optimize_re
 
 Possible values:
 
--   0 — `ORDER BY` optimization is disabled. 
--   1 — `ORDER BY` optimization is enabled. 
+-   0 — `ORDER BY` optimization is disabled.
+-   1 — `ORDER BY` optimization is enabled.
 
 Default value: `1`.
 
@@ -2124,8 +2136,8 @@ Allows to execute `ALTER TABLE ... UPDATE|DELETE` queries ([mutations](../../sql
 
 Possible values:
 
--   0 - Mutations execute asynchronously. 
--   1 - The query waits for all mutations to complete on the current server. 
+-   0 - Mutations execute asynchronously.
+-   1 - The query waits for all mutations to complete on the current server.
 -   2 - The query waits for all mutations to complete on all replicas (if they exist).
 
 Default value: `0`.
@@ -2137,11 +2149,11 @@ Default value: `0`.
 
 ## ttl_only_drop_parts {#ttl_only_drop_parts}
 
-Enables or disables complete dropping of data parts where all rows are expired in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables. 
+Enables or disables complete dropping of data parts where all rows are expired in [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md) tables.
 
-When `ttl_only_drop_parts` is disabled (by default), the ClickHouse server only deletes expired rows according to their TTL. 
+When `ttl_only_drop_parts` is disabled (by default), the ClickHouse server only deletes expired rows according to their TTL.
 
-When `ttl_only_drop_parts` is enabled, the ClickHouse server drops a whole part when all rows in it are expired. 
+When `ttl_only_drop_parts` is enabled, the ClickHouse server drops a whole part when all rows in it are expired.
 
 Dropping whole parts instead of partial cleaning TTL-d rows allows having shorter `merge_with_ttl_timeout` times and lower impact on system performance.
 
@@ -2152,14 +2164,14 @@ Possible values:
 
 Default value: `0`.
 
-**See Also** 
+**See Also**
 
 -   [CREATE TABLE query clauses and settings](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-query-clauses) (`merge_with_ttl_timeout` setting)
 -   [Table TTL](../../engines/table-engines/mergetree-family/mergetree.md#mergetree-table-ttl)
 
 ## lock_acquire_timeout {#lock_acquire_timeout}
 
-Defines how many seconds a locking request waits before failing. 
+Defines how many seconds a locking request waits before failing.
 
 Locking timeout is used to protect from deadlocks while executing read/write operations with tables. When the timeout expires and the locking request fails, the ClickHouse server throws an exception "Locking attempt timed out! Possible deadlock avoided. Client should retry." with error code `DEADLOCK_AVOIDED`.
 
@@ -2179,11 +2191,11 @@ When the setting is enabled and the argument of `CAST` function is `Nullable`, t
 Possible values:
 
 -  0 — The `CAST` result has exactly the destination type specified.
--  1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`. 
+-  1 — If the argument type is `Nullable`, the `CAST` result is transformed to `Nullable(DestinationDataType)`.
 
 Default value: `0`.
 
-**Examples** 
+**Examples**
 
 The following query results in the destination data type exactly:
 
@@ -2215,17 +2227,17 @@ Result:
 └───┴───────────────────────────────────────────────────┘
 ```
 
-**See Also** 
+**See Also**
 
 -   [CAST](../../sql-reference/functions/type-conversion-functions.md#type_conversion_function-cast) function
 
 ## output_format_pretty_max_value_width {#output_format_pretty_max_value_width}
 
-Limits the width of value displayed in [Pretty](../../interfaces/formats.md#pretty) formats. If the value width exceeds the limit, the value is cut. 
+Limits the width of value displayed in [Pretty](../../interfaces/formats.md#pretty) formats. If the value width exceeds the limit, the value is cut.
 
 Possible values:
 
--   Positive integer. 
+-   Positive integer.
 -   0 — The value is cut completely.
 
 Default value: `10000` symbols.
@@ -2352,7 +2364,7 @@ Default value: `0`.
 
 ## persistent {#persistent}
 
-Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines. 
+Disables persistency for the [Set](../../engines/table-engines/special/set.md#set) and [Join](../../engines/table-engines/special/join.md#join) table engines.
 
 Reduces the I/O overhead. Suitable for scenarios that pursue performance and do not require persistence.
 
@@ -2426,7 +2438,7 @@ Result:
 [
 {"number":"0"},
 {"number":"1"},
-{"number":"2"}                                                                                                                                                                                  
+{"number":"2"}
 ]
 ```
 
@@ -2447,7 +2459,6 @@ Result:
 {"number":"2"}
 ```
 
-=======
 ## allow_nullable_key {#allow-nullable-key}
 
 Allows using of the [Nullable](../../sql-reference/data-types/nullable.md#data_type-nullable)-typed values in a sorting and a primary key for [MergeTree](../../engines/table-engines/mergetree-family/mergetree.md#table_engines-mergetree) tables.
diff --git a/docs/en/sql-reference/statements/alter/column.md b/docs/en/sql-reference/statements/alter/column.md
index 69aa086c069..825399422d1 100644
--- a/docs/en/sql-reference/statements/alter/column.md
+++ b/docs/en/sql-reference/statements/alter/column.md
@@ -29,12 +29,12 @@ These actions are described in detail below.
 ## ADD COLUMN {#alter_add-column}
 
 ``` sql
-ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
+ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
 ```
 
 Adds a new column to the table with the specified `name`, `type`, [`codec`](../../../sql-reference/statements/create/table.md#codecs) and `default_expr` (see the section [Default expressions](../../../sql-reference/statements/create/table.md#create-default-values)).
 
-If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. Otherwise, the column is added to the end of the table. Note that there is no way to add a column to the beginning of a table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
+If the `IF NOT EXISTS` clause is included, the query won’t return an error if the column already exists. If you specify `AFTER name_after` (the name of another column), the column is added after the specified one in the list of table columns. If you want to add a column to the beginning of the table use the `FIRST` clause. Otherwise, the column is added to the end of the table. For a chain of actions, `name_after` can be the name of a column that is added in one of the previous actions.
 
 Adding a column just changes the table structure, without performing any actions with data. The data doesn’t appear on the disk after `ALTER`. If the data is missing for a column when reading from the table, it is filled in with default values (by performing the default expression if there is one, or using zeros or empty strings). The column appears on the disk after merging data parts (see [MergeTree](../../../engines/table-engines/mergetree-family/mergetree.md)).
 
@@ -43,9 +43,24 @@ This approach allows us to complete the `ALTER` query instantly, without increas
 Example:
 
 ``` sql
-ALTER TABLE visits ADD COLUMN browser String AFTER user_id
+ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST;
+ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn;
+ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop;
+DESC alter_test FORMAT TSV;
 ```
 
+``` text
+Added1  UInt32
+CounterID       UInt32
+StartDate       Date
+UserID  UInt32
+VisitID UInt32
+NestedColumn.A  Array(UInt8)
+NestedColumn.S  Array(String)
+Added2  UInt32
+ToDrop  UInt32
+Added3  UInt32
+```
 ## DROP COLUMN {#alter_drop-column}
 
 ``` sql
@@ -99,7 +114,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'The table shows the browser used for
 ## MODIFY COLUMN {#alter_modify-column}
 
 ``` sql
-MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]
+MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
 ```
 
 This query changes the `name` column properties:
@@ -114,6 +129,8 @@ This query changes the `name` column properties:
 
 If the `IF EXISTS` clause is specified, the query won’t return an error if the column doesn’t exist.
 
+The query also can change the order of the columns using `FIRST | AFTER` clause, see [ADD COLUMN](#alter_add-column) description.
+
 When changing the type, values are converted as if the [toType](../../../sql-reference/functions/type-conversion-functions.md) functions were applied to them. If only the default expression is changed, the query doesn’t do anything complex, and is completed almost instantly.
 
 Example:
@@ -124,15 +141,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
 
 Changing the column type is the only complex action – it changes the contents of files with data. For large tables, this may take a long time.
 
-There are several processing stages:
-
--   Preparing temporary (new) files with modified data.
--   Renaming old files.
--   Renaming the temporary (new) files to the old names.
--   Deleting the old files.
-
-Only the first stage takes time. If there is a failure at this stage, the data is not changed.
-If there is a failure during one of the successive stages, data can be restored manually. The exception is if the old files were deleted from the file system but the data for the new files did not get written to the disk and was lost.
+The `ALTER` query is atomic. For MergeTree tables it is also lock-free.
 
 The `ALTER` query for changing columns is replicated. The instructions are saved in ZooKeeper, then each replica applies them. All `ALTER` queries are run in the same order. The query waits for the appropriate actions to be completed on the other replicas. However, a query to change columns in a replicated table can be interrupted, and all actions will be performed asynchronously.
 
diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md
index 482e4999cea..ce29272b451 100644
--- a/docs/ru/interfaces/formats.md
+++ b/docs/ru/interfaces/formats.md
@@ -9,7 +9,6 @@ ClickHouse может принимать (`INSERT`) и отдавать (`SELECT
 
 Поддерживаемые форматы и возможность использовать их в запросах `INSERT` и `SELECT` перечислены в таблице ниже.
 
-=======
 | Формат                                                                                  | INSERT | SELECT |
 |-----------------------------------------------------------------------------------------|--------|--------|
 | [TabSeparated](#tabseparated)                                                           | ✔     | ✔      |
@@ -1252,7 +1251,7 @@ SELECT * FROM line_as_string;
 
 ## RawBLOB {#rawblob}
 
-В этом формате все входные данные считываются в одно значение. Парсить можно только таблицу с одним полем типа [String](../sql-reference/data-types/string.md) или подобным ему. 
+В этом формате все входные данные считываются в одно значение. Парсить можно только таблицу с одним полем типа [String](../sql-reference/data-types/string.md) или подобным ему.
 Результат выводится в бинарном виде без разделителей и экранирования. При выводе более одного значения формат неоднозначен и будет невозможно прочитать данные снова.
 
 Ниже приведено сравнение форматов `RawBLOB` и [TabSeparatedRaw](#tabseparatedraw).
@@ -1272,7 +1271,7 @@ SELECT * FROM line_as_string;
 -   строки представлены как длина в формате varint (unsigned [LEB128](https://en.wikipedia.org/wiki/LEB128)), а затем байты строки.
 
 При передаче на вход `RawBLOB` пустых данных, ClickHouse бросает исключение:
- 
+
 ``` text
 Code: 108. DB::Exception: No data to insert
 ```
diff --git a/docs/ru/sql-reference/statements/alter/column.md b/docs/ru/sql-reference/statements/alter/column.md
index a8fe0ccf642..7fe1b4e4e78 100644
--- a/docs/ru/sql-reference/statements/alter/column.md
+++ b/docs/ru/sql-reference/statements/alter/column.md
@@ -18,12 +18,12 @@ toc_title: "\u041c\u0430\u043d\u0438\u043f\u0443\u043b\u044f\u0446\u0438\u0438\u
 ## ADD COLUMN {#alter_add-column}
 
 ``` sql
-ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
+ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after | FIRST]
 ```
 
 Добавляет в таблицу новый столбец с именем `name`, типом `type`, [кодеком](../create/table.md#codecs) `codec` и выражением для умолчания `default_expr` (смотрите раздел [Значения по умолчанию](../create/index.md#create-default-values)).
 
-Если указано `IF NOT EXISTS`, запрос не будет возвращать ошибку, если столбец уже существует. Если указано `AFTER name_after` (имя другого столбца), то столбец добавляется (в список столбцов таблицы) после указанного. Иначе, столбец добавляется в конец таблицы. Обратите внимание, ClickHouse не позволяет добавлять столбцы в начало таблицы. Для цепочки действий, `name_after` может быть именем столбца, который добавляется в одном из предыдущих действий.
+Если указано `IF NOT EXISTS`, запрос не будет возвращать ошибку, если столбец уже существует. Если указано `AFTER name_after` (имя другого столбца), то столбец добавляется (в список столбцов таблицы) после указанного. Если вы хотите добавить столбец в начало таблицы, используйте `FIRST`. Иначе столбец добавляется в конец таблицы. Для цепочки действий `name_after` может быть именем столбца, который добавляется в одном из предыдущих действий.
 
 Добавление столбца всего лишь меняет структуру таблицы, и не производит никаких действий с данными - соответствующие данные не появляются на диске после ALTER-а. При чтении из таблицы, если для какого-либо столбца отсутствуют данные, то он заполняется значениями по умолчанию (выполняя выражение по умолчанию, если такое есть, или нулями, пустыми строками). Также, столбец появляется на диске при слиянии кусков данных (см. [MergeTree](../../../sql-reference/statements/alter/index.md)).
 
@@ -32,7 +32,23 @@ ADD COLUMN [IF NOT EXISTS] name [type] [default_expr] [codec] [AFTER name_after]
 Пример:
 
 ``` sql
-ALTER TABLE visits ADD COLUMN browser String AFTER user_id
+ALTER TABLE alter_test ADD COLUMN Added1 UInt32 FIRST;
+ALTER TABLE alter_test ADD COLUMN Added2 UInt32 AFTER NestedColumn;
+ALTER TABLE alter_test ADD COLUMN Added3 UInt32 AFTER ToDrop;
+DESC alter_test FORMAT TSV;
+```
+
+``` text
+Added1  UInt32
+CounterID       UInt32
+StartDate       Date
+UserID  UInt32
+VisitID UInt32
+NestedColumn.A  Array(UInt8)
+NestedColumn.S  Array(String)
+Added2  UInt32
+ToDrop  UInt32
+Added3  UInt32
 ```
 
 ## DROP COLUMN {#alter_drop-column}
@@ -88,7 +104,7 @@ ALTER TABLE visits COMMENT COLUMN browser 'Столбец показывает,
 ## MODIFY COLUMN {#alter_modify-column}
 
 ``` sql
-MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]
+MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL] [AFTER name_after | FIRST]
 ```
 
 Запрос изменяет следующие свойства столбца `name`:
@@ -103,6 +119,8 @@ MODIFY COLUMN [IF EXISTS] name [type] [default_expr] [TTL]
 
 Если указано `IF EXISTS`, запрос не возвращает ошибку, если столбца не существует.
 
+Запрос также может изменять порядок столбцов при помощи `FIRST | AFTER`, смотрите описание [ADD COLUMN](#alter_add-column).
+
 При изменении типа, значения преобразуются так, как если бы к ним была применена функция [toType](../../../sql-reference/statements/alter/index.md). Если изменяется только выражение для умолчания, запрос не делает никакой сложной работы и выполняется мгновенно.
 
 Пример запроса:
@@ -113,15 +131,7 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
 
 Изменение типа столбца - это единственное действие, которое выполняет сложную работу - меняет содержимое файлов с данными. Для больших таблиц, выполнение может занять длительное время.
 
-Выполнение производится в несколько стадий:
-
--   подготовка временных (новых) файлов с изменёнными данными;
--   переименование старых файлов;
--   переименование временных (новых) файлов в старые;
--   удаление старых файлов.
-
-Из них, длительной является только первая стадия. Если на этой стадии возникнет сбой, то данные не поменяются.
-Если на одной из следующих стадий возникнет сбой, то данные будет можно восстановить вручную. За исключением случаев, когда старые файлы удалены из файловой системы, а данные для новых файлов не доехали на диск и потеряны.
+Выполнение запроса ALTER атомарно.
 
 Запрос `ALTER` на изменение столбцов реплицируется. Соответствующие инструкции сохраняются в ZooKeeper, и затем каждая реплика их применяет. Все запросы `ALTER` выполняются в одном и том же порядке. Запрос ждёт выполнения соответствующих действий на всех репликах. Но при этом, запрос на изменение столбцов в реплицируемой таблице можно прервать, и все действия будут осуществлены асинхронно.
 
@@ -137,4 +147,4 @@ ALTER TABLE visits MODIFY COLUMN browser Array(String)
 
 Для таблиц, которые не хранят данные самостоятельно (типа [Merge](../../../sql-reference/statements/alter/index.md) и [Distributed](../../../sql-reference/statements/alter/index.md)), `ALTER` всего лишь меняет структуру таблицы, но не меняет структуру подчинённых таблиц. Для примера, при ALTER-е таблицы типа `Distributed`, вам также потребуется выполнить запрос `ALTER` для таблиц на всех удалённых серверах.
 
-[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/column/) <!--hide-->
\ No newline at end of file
+[Оригинальная статья](https://clickhouse.tech/docs/ru/query_language/alter/column/) <!--hide-->
diff --git a/docs/tools/README.md b/docs/tools/README.md
index d4b6a2201f8..3c8862f1079 100644
--- a/docs/tools/README.md
+++ b/docs/tools/README.md
@@ -28,8 +28,8 @@ Follow the instructions on it's official website: <https://wkhtmltopdf.org/downl
 
 #### 2. Install CLI tools from npm
 
-1. `apt-get install npm` for Debian/Ubuntu or `brew install npm` on Mac OS X.
-2. `npm install -g purifycss amphtml-validator`.
+1. `sudo apt-get install npm` for Debian/Ubuntu or `brew install npm` on Mac OS X.
+2. `sudo npm install -g purify-css amphtml-validator`.
 
 #### 3. Set up virtualenv
 
diff --git a/docs/tools/build.py b/docs/tools/build.py
index 45d74423fa8..dfb9661c326 100755
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@@ -48,11 +48,6 @@ def build_for_lang(lang, args):
     logging.info(f'Building {lang} docs')
     os.environ['SINGLE_PAGE'] = '0'
 
-    config_path = os.path.join(args.docs_dir, f'toc_{lang}.yml')
-    if args.is_stable_release and not os.path.exists(config_path):
-        logging.warning(f'Skipping {lang} docs, because {config} does not exist')
-        return
-
     try:
         theme_cfg = {
             'name': None,
@@ -73,9 +68,7 @@ def build_for_lang(lang, args):
             'es': 'Español',
             'fr': 'Français',
             'ru': 'Русский',
-            'ja': '日本語',
-            'tr': 'Türkçe',
-            'fa': 'فارسی'
+            'ja': '日本語'
         }
 
         site_names = {
@@ -84,31 +77,26 @@ def build_for_lang(lang, args):
             'es': 'Documentación de ClickHouse %s',
             'fr': 'Documentation ClickHouse %s',
             'ru': 'Документация ClickHouse %s',
-            'ja': 'ClickHouseドキュメント %s',
-            'tr': 'ClickHouse Belgeleri %s',
-            'fa': 'مستندات %sClickHouse'
+            'ja': 'ClickHouseドキュメント %s'
         }
 
         assert len(site_names) == len(languages)
 
-        if args.version_prefix:
-            site_dir = os.path.join(args.docs_output_dir, args.version_prefix, lang)
-        else:
-            site_dir = os.path.join(args.docs_output_dir, lang)
+        site_dir = os.path.join(args.docs_output_dir, lang)
 
         plugins = ['macros']
         if args.htmlproofer:
             plugins.append('htmlproofer')
 
         website_url = 'https://clickhouse.tech'
-        site_name = site_names.get(lang, site_names['en']) % args.version_prefix
+        site_name = site_names.get(lang, site_names['en']) % ''
         site_name = site_name.replace('  ', ' ')
         raw_config = dict(
             site_name=site_name,
             site_url=f'{website_url}/docs/{lang}/',
             docs_dir=os.path.join(args.docs_dir, lang),
             site_dir=site_dir,
-            strict=not args.version_prefix,
+            strict=True,
             theme=theme_cfg,
             copyright='©2016–2020 Yandex LLC',
             use_directory_urls=True,
@@ -119,8 +107,6 @@ def build_for_lang(lang, args):
             plugins=plugins,
             extra=dict(
                 now=datetime.datetime.now().isoformat(),
-                stable_releases=args.stable_releases,
-                version_prefix=args.version_prefix,
                 single_page=False,
                 rev=args.rev,
                 rev_short=args.rev_short,
@@ -134,23 +120,14 @@ def build_for_lang(lang, args):
             )
         )
 
-        if os.path.exists(config_path):
-            raw_config['config_file'] = config_path
-        else:
-            raw_config['nav'] = nav.build_docs_nav(lang, args)
+        raw_config['nav'] = nav.build_docs_nav(lang, args)
 
         cfg = config.load_config(**raw_config)
 
         if not args.skip_multi_page:
-            try:
-                mkdocs.commands.build.build(cfg)
-            except jinja2.exceptions.TemplateError:
-                if not args.version_prefix:
-                    raise
-                mdx_clickhouse.PatchedMacrosPlugin.disabled = True
-                mkdocs.commands.build.build(cfg)
+            mkdocs.commands.build.build(cfg)
 
-        if not (args.skip_amp or args.version_prefix):
+        if not args.skip_amp:
             amp.build_amp(lang, args, cfg)
 
         if not args.skip_single_page:
@@ -170,8 +147,7 @@ def build_docs(args):
         if lang:
             tasks.append((lang, args,))
     util.run_function_in_parallel(build_for_lang, tasks, threads=False)
-    if not args.version_prefix:
-        redirects.build_docs_redirects(args)
+    redirects.build_docs_redirects(args)
 
 
 def build(args):
@@ -188,8 +164,6 @@ def build(args):
         generate_cmake_flags_files()
 
         build_docs(args)
-        from github import build_releases
-        build_releases(args, build_docs)
 
     if not args.skip_blog:
         blog.build_blog(args)
@@ -209,7 +183,7 @@ if __name__ == '__main__':
     website_dir = os.path.join(src_dir, 'website')
 
     arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja,tr,fa')
+    arg_parser.add_argument('--lang', default='en,es,fr,ru,zh,ja')
     arg_parser.add_argument('--blog-lang', default='en,ru')
     arg_parser.add_argument('--docs-dir', default='.')
     arg_parser.add_argument('--theme-dir', default=website_dir)
@@ -217,12 +191,7 @@ if __name__ == '__main__':
     arg_parser.add_argument('--src-dir', default=src_dir)
     arg_parser.add_argument('--blog-dir', default=os.path.join(website_dir, 'blog'))
     arg_parser.add_argument('--output-dir', default='build')
-    arg_parser.add_argument('--enable-stable-releases', action='store_true')
-    arg_parser.add_argument('--stable-releases-limit', type=int, default='3')
-    arg_parser.add_argument('--lts-releases-limit', type=int, default='2')
     arg_parser.add_argument('--nav-limit', type=int, default='0')
-    arg_parser.add_argument('--version-prefix', type=str, default='')
-    arg_parser.add_argument('--is-stable-release', action='store_true')
     arg_parser.add_argument('--skip-multi-page', action='store_true')
     arg_parser.add_argument('--skip-single-page', action='store_true')
     arg_parser.add_argument('--skip-amp', action='store_true')
@@ -252,8 +221,7 @@ if __name__ == '__main__':
     args.docs_output_dir = os.path.join(os.path.abspath(args.output_dir), 'docs')
     args.blog_output_dir = os.path.join(os.path.abspath(args.output_dir), 'blog')
 
-    from github import choose_latest_releases, get_events
-    args.stable_releases = choose_latest_releases(args) if args.enable_stable_releases else []
+    from github import get_events
     args.rev = subprocess.check_output('git rev-parse HEAD', shell=True).decode('utf-8').strip()
     args.rev_short = subprocess.check_output('git rev-parse --short HEAD', shell=True).decode('utf-8').strip()
     args.rev_url = f'https://github.com/ClickHouse/ClickHouse/commit/{args.rev}'
diff --git a/docs/tools/github.py b/docs/tools/github.py
index 29184f5e567..465695d1512 100644
--- a/docs/tools/github.py
+++ b/docs/tools/github.py
@@ -13,88 +13,6 @@ import requests
 import util
 
 
-def yield_candidates():
-    for page in range(1, 100):
-        url = f'https://api.github.com/repos/ClickHouse/ClickHouse/tags?per_page=100&page={page}'
-        github_token = os.getenv('GITHUB_TOKEN')
-        if github_token:
-            headers = {'authorization': f'OAuth {github_token}'}
-        else:
-            headers = {}
-        for candidate in requests.get(url, headers=headers).json():
-            yield candidate
-    time.sleep(random.random() * 3)
-
-
-def choose_latest_releases(args):
-    logging.info('Collecting release candidates')
-    seen_stable = collections.OrderedDict()
-    seen_lts = collections.OrderedDict()
-    candidates = []
-    stable_count = 0
-    lts_count = 0
-
-    for tag in yield_candidates():
-        if isinstance(tag, dict):
-            name = tag.get('name', '')
-            is_stable = 'stable' in name
-            is_lts = 'lts' in name
-            is_unstable = not (is_stable or is_lts)
-            is_in_blacklist = ('v18' in name) or ('prestable' in name) or ('v1.1' in name)
-            if is_unstable or is_in_blacklist:
-                continue
-            major_version = '.'.join((name.split('.', 2))[:2])
-            if major_version not in seen_lts:
-                if (stable_count >= args.stable_releases_limit) and (lts_count >= args.lts_releases_limit):
-                    break
-
-                payload = (name, tag.get('tarball_url'), is_lts,)
-                logging.debug(payload)
-                if is_lts:
-                    if lts_count < args.lts_releases_limit:
-                        seen_lts[major_version] = payload
-                        try:
-                            del seen_stable[major_version]
-                        except KeyError:
-                            pass
-                    lts_count += 1
-                else:
-                    if stable_count < args.stable_releases_limit:
-                        if major_version not in seen_stable:
-                            seen_stable[major_version] = payload
-                            stable_count += 1
-
-            logging.debug(
-                f'Stables: {stable_count}/{args.stable_releases_limit} LTS: {lts_count}/{args.lts_releases_limit}'
-            )
-        else:
-            logging.fatal('Unexpected GitHub response: %s', str(candidates))
-            sys.exit(1)
-
-    logging.info('Found LTS releases: %s', ', '.join(list(seen_lts.keys())))
-    logging.info('Found stable releases: %s', ', '.join(list(seen_stable.keys())))
-    return sorted(list(seen_lts.items()) + list(seen_stable.items()))
-
-
-def process_release(args, callback, release):
-    name, (full_name, tarball_url, is_lts,) = release
-    logging.info(f'Building docs for {full_name}')
-    buf = io.BytesIO(requests.get(tarball_url).content)
-    tar = tarfile.open(mode='r:gz', fileobj=buf)
-    with util.temp_dir() as base_dir:
-        tar.extractall(base_dir)
-        args = copy.copy(args)
-        args.version_prefix = name
-        args.is_stable_release = True
-        args.docs_dir = os.path.join(base_dir, os.listdir(base_dir)[0], 'docs')
-        callback(args)
-
-
-def build_releases(args, callback):
-    for release in args.stable_releases:
-        process_release(args, callback, release)
-
-
 def get_events(args):
     events = []
     skip = True
@@ -118,12 +36,7 @@ def get_events(args):
 
 
 if __name__ == '__main__':
-    class DummyArgs(object):
-        lts_releases_limit = 1
-        stable_releases_limit = 3
     logging.basicConfig(
         level=logging.DEBUG,
         stream=sys.stderr
     )
-    for item in choose_latest_releases(DummyArgs()):
-        print(item)
diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py
index b4c255066ff..387ade9bbdf 100755
--- a/docs/tools/mdx_clickhouse.py
+++ b/docs/tools/mdx_clickhouse.py
@@ -145,24 +145,9 @@ class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
         if self.skip_git_log:
             return markdown
         src_path = page.file.abs_src_path
-        try:
-            git_log = subprocess.check_output(f'git log --follow --date=iso8601 "{src_path}"', shell=True)
-        except subprocess.CalledProcessError:
-            return markdown
-        max_date = None
-        min_date = None
-        for line in git_log.decode('utf-8').split('\n'):
-            if line.startswith('Date:'):
-                line = line.replace('Date:', '').strip().replace(' ', 'T', 1).replace(' ', '')
-                current_date = datetime.datetime.fromisoformat(line[:-2] + ':' + line[-2:])
-                if (not max_date) or current_date > max_date:
-                    max_date = current_date
-                if (not min_date) or current_date < min_date:
-                    min_date = current_date
-        if min_date:
-            page.meta['published_date'] = min_date
-        if max_date:
-            page.meta['modified_date'] = max_date
+
+        # There was a code that determined the minimum and maximum modification dates for a page.
+        # It was removed due to being obnoxiously slow.
         return markdown
 
     def render_impl(self, markdown):
diff --git a/docs/tools/purge_cache_for_changed_files.py b/docs/tools/purge_cache_for_changed_files.py
deleted file mode 100644
index 6cfc9d18a57..00000000000
--- a/docs/tools/purge_cache_for_changed_files.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python3
-
-import subprocess
-import requests
-import os
-import time
-
-FNAME_START = "+++"
-
-CLOUDFLARE_URL = "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache"
-
-# we have changes in revision and commit sha on all pages
-# so such changes have to be ignored
-MIN_CHANGED_WORDS = 4
-
-
-def collect_changed_files():
-    proc = subprocess.Popen("git diff HEAD~1 --word-diff=porcelain | grep -e '^+[^+]\|^\-[^\-]\|^\+\+\+'", stdout=subprocess.PIPE, shell=True)
-    changed_files = []
-    current_file_name = ""
-    changed_words = []
-    while True:
-        line = proc.stdout.readline().decode("utf-8").strip()
-        if not line:
-            break
-        if FNAME_START in line:
-            if changed_words:
-                if len(changed_words) > MIN_CHANGED_WORDS:
-                    changed_files.append(current_file_name)
-                changed_words = []
-            current_file_name = line[6:]
-        else:
-            changed_words.append(line)
-    return changed_files
-
-
-def filter_and_transform_changed_files(changed_files, base_domain):
-    result = []
-    for f in changed_files:
-        if f.endswith(".html"):
-            result.append(base_domain + f.replace("index.html", ""))
-    return result
-
-
-def convert_to_dicts(changed_files, batch_size):
-    result = []
-    current_batch = {"files": []}
-    for f in changed_files:
-        if len(current_batch["files"]) >= batch_size:
-            result.append(current_batch)
-            current_batch = {"files": []}
-        current_batch["files"].append(f)
-
-    if current_batch["files"]:
-        result.append(current_batch)
-    return result
-
-
-def post_data(prepared_batches, token):
-    headers = {"Authorization": "Bearer {}".format(token)}
-    for batch in prepared_batches:
-        print(("Pugring cache for", ", ".join(batch["files"])))
-        response = requests.post(CLOUDFLARE_URL, json=batch, headers=headers)
-        response.raise_for_status()
-        time.sleep(3)
-
-
-if __name__ == "__main__":
-    token = os.getenv("CLOUDFLARE_TOKEN")
-    if not token:
-        raise Exception("Env variable CLOUDFLARE_TOKEN is empty")
-    base_domain = os.getenv("BASE_DOMAIN", "https://content.clickhouse.tech/")
-    changed_files = collect_changed_files()
-    print(("Found", len(changed_files), "changed files"))
-    filtered_files = filter_and_transform_changed_files(changed_files, base_domain)
-    print(("Files rest after filtering", len(filtered_files)))
-    prepared_batches = convert_to_dicts(filtered_files, 25)
-    post_data(prepared_batches, token)
diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py
index 2f5ebc8a620..20e3ec7aa6f 100644
--- a/docs/tools/redirects.py
+++ b/docs/tools/redirects.py
@@ -30,9 +30,8 @@ def build_redirect_html(args, base_prefix, lang, output_dir, from_path, to_path)
         output_dir, lang,
         from_path.replace('/index.md', '/index.html').replace('.md', '/index.html')
     )
-    version_prefix = f'/{args.version_prefix}/' if args.version_prefix else '/'
     target_path = to_path.replace('/index.md', '/').replace('.md', '/')
-    to_url = f'/{base_prefix}{version_prefix}{lang}/{target_path}'
+    to_url = f'/{base_prefix}/{lang}/{target_path}'
     to_url = to_url.strip()
     write_redirect_html(out_path, to_url)
 
diff --git a/docs/tools/release.sh b/docs/tools/release.sh
index e539e01e808..389b63ace7f 100755
--- a/docs/tools/release.sh
+++ b/docs/tools/release.sh
@@ -7,19 +7,22 @@ PUBLISH_DIR="${BASE_DIR}/../publish"
 BASE_DOMAIN="${BASE_DOMAIN:-content.clickhouse.tech}"
 GIT_TEST_URI="${GIT_TEST_URI:-git@github.com:ClickHouse/clickhouse-website-content.git}"
 GIT_PROD_URI="git@github.com:ClickHouse/clickhouse-website-content.git"
-EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---enable-stable-releases --minify --verbose}"
-HISTORY_SIZE="${HISTORY_SIZE:-5}"
+EXTRA_BUILD_ARGS="${EXTRA_BUILD_ARGS:---minify --verbose}"
 
 if [[ -z "$1" ]]
 then
     source "${BASE_DIR}/venv/bin/activate"
     python3 "${BASE_DIR}/build.py" ${EXTRA_BUILD_ARGS}
-    rm -rf "${PUBLISH_DIR}" || true
-    git clone "${GIT_TEST_URI}" "${PUBLISH_DIR}"
-    cd "${PUBLISH_DIR}"
+    rm -rf "${PUBLISH_DIR}"
+    mkdir "${PUBLISH_DIR}" && cd "${PUBLISH_DIR}"
+
+    # Will make a repository with website content as the only commit.
+    git init
+    git remote add origin "${GIT_TEST_URI}"
     git config user.email "robot-clickhouse@yandex-team.ru"
     git config user.name "robot-clickhouse"
-    git rm -rf *
+
+    # Add files.
     cp -R "${BUILD_DIR}"/* .
     echo -n "${BASE_DOMAIN}" > CNAME
     echo -n "" > README.md
@@ -27,16 +30,16 @@ then
     cp "${BASE_DIR}/../../LICENSE" .
     git add *
     git add ".nojekyll"
-    git commit -a -m "add new release at $(date)"
-    NEW_ROOT_COMMIT=$(git rev-parse "HEAD~${HISTORY_SIZE}")
-    git checkout --orphan temp "${NEW_ROOT_COMMIT}"
-    git commit -m "root commit"
-    git rebase --onto temp "${NEW_ROOT_COMMIT}" master
-    git branch -D temp
-    git push -f origin master
+
+    # Push to GitHub rewriting the existing contents.
+    git commit --quiet -m "Add new release at $(date)"
+    git push --force origin master
+
     if [[ ! -z "${CLOUDFLARE_TOKEN}" ]]
     then
         sleep 1m
-        python3 "${BASE_DIR}/purge_cache_for_changed_files.py"
+        # https://api.cloudflare.com/#zone-purge-files-by-cache-tags,-host-or-prefix
+        POST_DATA='{"hosts":["content.clickhouse.tech"]}'
+        curl -X POST "https://api.cloudflare.com/client/v4/zones/4fc6fb1d46e87851605aa7fa69ca6fe0/purge_cache" -H "Authorization: Bearer ${CLOUDFLARE_TOKEN}" -H "Content-Type:application/json" --data "${POST_DATA}"
     fi
 fi
diff --git a/docs/tools/single_page.py b/docs/tools/single_page.py
index 004409fe281..05d50e768e2 100644
--- a/docs/tools/single_page.py
+++ b/docs/tools/single_page.py
@@ -111,10 +111,7 @@ def build_single_page_version(lang, args, nav, cfg):
                 if not args.test_only:
                     mkdocs.commands.build.build(cfg)
 
-                    if args.version_prefix:
-                        single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, args.version_prefix, lang, 'single')
-                    else:
-                        single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
+                    single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
 
                     if os.path.exists(single_page_output_path):
                         shutil.rmtree(single_page_output_path)
@@ -157,10 +154,9 @@ def build_single_page_version(lang, args, nav, cfg):
                     if args.save_raw_single_page:
                         shutil.copytree(test_dir, args.save_raw_single_page)
 
-                    if not args.version_prefix:  # maybe enable in future
-                        logging.info(f'Running tests for {lang}')
-                        test.test_single_page(
-                            os.path.join(test_dir, 'single', 'index.html'), lang)
+                    logging.info(f'Running tests for {lang}')
+                    test.test_single_page(
+                        os.path.join(test_dir, 'single', 'index.html'), lang)
 
                     if not args.skip_pdf:
                         single_page_index_html = os.path.join(test_dir, 'single', 'index.html')
diff --git a/docs/tools/translate/translate.py b/docs/tools/translate/translate.py
index 343ab09f12a..605ff78f424 100755
--- a/docs/tools/translate/translate.py
+++ b/docs/tools/translate/translate.py
@@ -11,8 +11,6 @@ import googletrans
 import requests
 import yaml
 
-import typograph_ru
-
 
 translator = googletrans.Translator()
 default_target_language = os.environ.get('TARGET_LANGUAGE', 'ru')
@@ -25,8 +23,6 @@ def translate_impl(text, target_language=None):
     target_language = target_language or default_target_language
     if target_language == 'en':
         return text
-    elif target_language == 'typograph_ru':
-        return typograph_ru.typograph(text)
     elif is_yandex:
         text = text.replace('‘', '\'')
         text = text.replace('’', '\'')
@@ -59,25 +55,10 @@ def translate(text, target_language=None):
     )
 
 
-def translate_toc(root, lang):
-    global is_yandex
-    is_yandex = True
-    if isinstance(root, dict):
-        result = []
-        for key, value in root.items():
-            key = translate(key, lang) if key != 'hidden' and not key.isupper() else key
-            result.append((key, translate_toc(value, lang),))
-        return dict(result)
-    elif isinstance(root, list):
-        return [translate_toc(item, lang) for item in root]
-    elif isinstance(root, str):
-        return root
-
-
 def translate_po():
     import babel.messages.pofile
     base_dir = os.path.join(os.path.dirname(__file__), '..', '..', '..', 'website', 'locale')
-    for lang in ['en', 'zh', 'es', 'fr', 'ru', 'ja', 'tr', 'fa']:
+    for lang in ['en', 'zh', 'es', 'fr', 'ru', 'ja']:
         po_path = os.path.join(base_dir, lang, 'LC_MESSAGES', 'messages.po')
         with open(po_path, 'r') as f:
             po_file = babel.messages.pofile.read_po(f, locale=lang, domain='messages')
diff --git a/docs/tools/website.py b/docs/tools/website.py
index 4cce69bd869..6927fbd87bb 100644
--- a/docs/tools/website.py
+++ b/docs/tools/website.py
@@ -232,6 +232,7 @@ def minify_website(args):
             f"'{args.output_dir}/docs/en/**/*.html' '{args.website_dir}/js/**/*.js' > {css_out}"
     else:
         command = f'cat {css_in} > {css_out}'
+
     logging.info(command)
     output = subprocess.check_output(command, shell=True)
     logging.debug(output)
diff --git a/docs/zh/sql-reference/data-types/datetime64.md b/docs/zh/sql-reference/data-types/datetime64.md
index 2442972965a..46e8e9a5fa4 100644
--- a/docs/zh/sql-reference/data-types/datetime64.md
+++ b/docs/zh/sql-reference/data-types/datetime64.md
@@ -7,9 +7,9 @@ toc_title: DateTime64
 
 # Datetime64 {#data_type-datetime64}
 
-允许存储时间instant间，可以表示为日历日期和一天中的时间，具有定义的亚秒精度
+此类型允许以日期（date）加时间（time）的形式来存储一个时刻的时间值，具有定义的亚秒精度
 
-刻度尺寸（精度）：10<sup>-精度</sup> 秒
+时间刻度大小（精度）：10<sup>-精度</sup> 秒
 
 语法:
 
@@ -17,11 +17,11 @@ toc_title: DateTime64
 DateTime64(precision, [timezone])
 ```
 
-在内部，存储数据作为一些 ‘ticks’ 自纪元开始(1970-01-01 00:00:00UTC)作为Int64. 刻度分辨率由precision参数确定。 此外，该 `DateTime64` 类型可以存储时区是相同的整个列，影响如何的值 `DateTime64` 类型值以文本格式显示，以及如何解析指定为字符串的值 (‘2020-01-01 05:00:01.000’). 时区不存储在表的行中（或resultset中），而是存储在列元数据中。 查看详细信息 [日期时间](datetime.md).
+在内部，此类型以Int64类型将数据存储为自Linux纪元开始(1970-01-01 00:00:00UTC)的时间刻度数（ticks）。时间刻度的分辨率由precision参数确定。此外，`DateTime64` 类型可以像存储其他数据列一样存储时区信息，时区会影响 `DateTime64` 类型的值如何以文本格式显示，以及如何解析以字符串形式指定的时间数据 (‘2020-01-01 05:00:01.000’)。时区不存储在表的行中（也不在resultset中），而是存储在列的元数据中。详细信息请参考 [DateTime](datetime.md) 数据类型.
 
-## 例 {#examples}
+## 示例 {#examples}
 
-**1.** 创建一个表 `DateTime64`-输入列并将数据插入其中:
+**1.** 创建一个具有 `DateTime64` 类型列的表，并向其中插入数据:
 
 ``` sql
 CREATE TABLE dt
@@ -47,10 +47,10 @@ SELECT * FROM dt
 └─────────────────────────┴──────────┘
 ```
 
--   将日期时间作为整数插入时，将其视为适当缩放的Unix时间戳(UTC)。 `1546300800000` （精度为3）表示 `'2019-01-01 00:00:00'` UTC. 然而，作为 `timestamp` 列有 `Europe/Moscow` （UTC+3）指定的时区，当输出为字符串时，该值将显示为 `'2019-01-01 03:00:00'`
--   当插入字符串值作为日期时间时，它被视为处于列时区。 `'2019-01-01 00:00:00'` 将被视为 `Europe/Moscow` 时区并存储为 `1546290000000`.
+-   将日期时间作为integer类型插入时，它会被视为适当缩放的Unix时间戳(UTC)。`1546300800000` （精度为3）表示 `'2019-01-01 00:00:00'` UTC. 不过，因为 `timestamp` 列指定了 `Europe/Moscow` （UTC+3）的时区，当作为字符串输出时，它将显示为 `'2019-01-01 03:00:00'`
+-   当把字符串作为日期时间插入时，它会被赋予时区信息。 `'2019-01-01 00:00:00'` 将被认为处于 `Europe/Moscow` 时区并被存储为 `1546290000000`.
 
-**2.** 过滤 `DateTime64` 值
+**2.** 过滤 `DateTime64` 类型的值
 
 ``` sql
 SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europe/Moscow')
@@ -62,9 +62,9 @@ SELECT * FROM dt WHERE timestamp = toDateTime64('2019-01-01 00:00:00', 3, 'Europ
 └─────────────────────────┴──────────┘
 ```
 
-不像 `DateTime`, `DateTime64` 值不转换为 `String` 自动
+与 `DateTime` 不同, `DateTime64` 类型的值不会自动从 `String` 类型的值转换过来
 
-**3.** 获取一个时区 `DateTime64`-类型值:
+**3.** 获取 `DateTime64` 类型值的时区信息:
 
 ``` sql
 SELECT toDateTime64(now(), 3, 'Europe/Moscow') AS column, toTypeName(column) AS x
@@ -97,8 +97,9 @@ FROM dt
 -   [类型转换函数](../../sql-reference/functions/type-conversion-functions.md)
 -   [用于处理日期和时间的函数](../../sql-reference/functions/date-time-functions.md)
 -   [用于处理数组的函数](../../sql-reference/functions/array-functions.md)
--   [该 `date_time_input_format` 设置](../../operations/settings/settings.md#settings-date_time_input_format)
--   [该 `timezone` 服务器配置参数](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
--   [使用日期和时间的操作员](../../sql-reference/operators/index.md#operators-datetime)
+-   [`date_time_input_format` 配置](../../operations/settings/settings.md#settings-date_time_input_format)
+-   [`date_time_output_format` 配置](../../operations/settings/settings.md#settings-date_time_output_format)
+-   [`timezone` 服务器配置参数](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone)
+-   [用于处理日期和时间的算子](../../sql-reference/operators/index.md#operators-datetime)
 -   [`Date` 数据类型](date.md)
 -   [`DateTime` 数据类型](datetime.md)
diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp
index 9d0227a51e3..a8c4f070bea 100644
--- a/programs/client/Client.cpp
+++ b/programs/client/Client.cpp
@@ -949,6 +949,11 @@ private:
             TestHint test_hint(test_mode, all_queries_text);
             if (test_hint.clientError() || test_hint.serverError())
                 processTextAsSingleQuery("SET send_logs_level = 'none'");
+
+            // Echo all queries if asked; makes for a more readable reference
+            // file.
+            if (test_hint.echoQueries())
+                echo_queries = true;
         }
 
         /// Several queries separated by ';'.
diff --git a/programs/client/QueryFuzzer.cpp b/programs/client/QueryFuzzer.cpp
index a8e32d87db5..53ede4a3d92 100644
--- a/programs/client/QueryFuzzer.cpp
+++ b/programs/client/QueryFuzzer.cpp
@@ -14,6 +14,7 @@
 #include <Parsers/ASTIdentifier.h>
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTOrderByElement.h>
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSelectWithUnionQuery.h>
@@ -28,6 +29,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int TOO_DEEP_RECURSION;
+}
+
 Field QueryFuzzer::getRandomField(int type)
 {
     switch (type)
@@ -205,14 +211,88 @@ void QueryFuzzer::replaceWithTableLike(ASTPtr & ast)
     ast = new_ast;
 }
 
-void QueryFuzzer::fuzzColumnLikeExpressionList(ASTPtr ast)
+void QueryFuzzer::fuzzOrderByElement(ASTOrderByElement * elem)
+{
+    switch (fuzz_rand() % 10)
+    {
+        case 0:
+            elem->direction = -1;
+            break;
+        case 1:
+            elem->direction = 1;
+            break;
+        case 2:
+            elem->nulls_direction = -1;
+            elem->nulls_direction_was_explicitly_specified = true;
+            break;
+        case 3:
+            elem->nulls_direction = 1;
+            elem->nulls_direction_was_explicitly_specified = true;
+            break;
+        case 4:
+            elem->nulls_direction = elem->direction;
+            elem->nulls_direction_was_explicitly_specified = false;
+            break;
+        default:
+            // do nothing
+            break;
+    }
+}
+
+void QueryFuzzer::fuzzOrderByList(IAST * ast)
 {
     if (!ast)
     {
         return;
     }
 
-    auto * impl = assert_cast<ASTExpressionList *>(ast.get());
+    auto * list = assert_cast<ASTExpressionList *>(ast);
+
+    // Remove element
+    if (fuzz_rand() % 50 == 0 && list->children.size() > 1)
+    {
+        // Don't remove last element -- this leads to questionable
+        // constructs such as empty select.
+        list->children.erase(list->children.begin()
+                             + fuzz_rand() % list->children.size());
+    }
+
+    // Add element
+    if (fuzz_rand() % 50 == 0)
+    {
+        auto pos = list->children.empty()
+                ? list->children.begin()
+                : list->children.begin() + fuzz_rand() % list->children.size();
+        auto col = getRandomColumnLike();
+        if (col)
+        {
+            auto elem = std::make_shared<ASTOrderByElement>();
+            elem->children.push_back(col);
+            elem->direction = 1;
+            elem->nulls_direction = 1;
+            elem->nulls_direction_was_explicitly_specified = false;
+            elem->with_fill = false;
+
+            list->children.insert(pos, elem);
+        }
+        else
+        {
+            fprintf(stderr, "no random col!\n");
+        }
+    }
+
+    // We don't have to recurse here to fuzz the children, this is handled by
+    // the generic recursion into IAST.children.
+}
+
+void QueryFuzzer::fuzzColumnLikeExpressionList(IAST * ast)
+{
+    if (!ast)
+    {
+        return;
+    }
+
+    auto * impl = assert_cast<ASTExpressionList *>(ast);
 
     // Remove element
     if (fuzz_rand() % 50 == 0 && impl->children.size() > 1)
@@ -252,11 +332,44 @@ void QueryFuzzer::fuzz(ASTs & asts)
     }
 }
 
+struct ScopedIncrement
+{
+    size_t & counter;
+
+    explicit ScopedIncrement(size_t & counter_) : counter(counter_) { ++counter; }
+    ~ScopedIncrement() { --counter; }
+};
+
 void QueryFuzzer::fuzz(ASTPtr & ast)
 {
     if (!ast)
         return;
 
+    // Check for exceeding max depth.
+    ScopedIncrement depth_increment(current_ast_depth);
+    if (current_ast_depth > 500)
+    {
+        // The AST is too deep (see the comment for current_ast_depth). Throw
+        // an exception to fail fast and not use this query as an etalon, or we'll
+        // end up in a very slow and useless loop. It also makes sense to set it
+        // lower than the default max parse depth on the server (1000), so that
+        // we don't get the useless error about parse depth from the server either.
+        throw Exception(ErrorCodes::TOO_DEEP_RECURSION,
+            "AST depth exceeded while fuzzing ({})", current_ast_depth);
+    }
+
+    // Check for loops.
+    auto [_, inserted] = debug_visited_nodes.insert(ast.get());
+    if (!inserted)
+    {
+        fmt::print(stderr, "The AST node '{}' was already visited before."
+            " Depth {}, {} visited nodes, current top AST:\n{}\n",
+            static_cast<void *>(ast.get()), current_ast_depth,
+            debug_visited_nodes.size(), (*debug_top_ast)->dumpTree());
+        assert(false);
+    }
+
+    // The fuzzing.
     if (auto * with_union = typeid_cast<ASTSelectWithUnionQuery *>(ast.get()))
     {
         fuzz(with_union->list_of_selects);
@@ -281,17 +394,28 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
     {
         fuzz(expr_list->children);
     }
+    else if (auto * order_by_element = typeid_cast<ASTOrderByElement *>(ast.get()))
+    {
+        fuzzOrderByElement(order_by_element);
+    }
     else if (auto * fn = typeid_cast<ASTFunction *>(ast.get()))
     {
-        fuzzColumnLikeExpressionList(fn->arguments);
-        fuzzColumnLikeExpressionList(fn->parameters);
+        fuzzColumnLikeExpressionList(fn->arguments.get());
+        fuzzColumnLikeExpressionList(fn->parameters.get());
+
+        if (fn->is_window_function)
+        {
+            fuzzColumnLikeExpressionList(fn->window_partition_by);
+            fuzzOrderByList(fn->window_order_by);
+        }
 
         fuzz(fn->children);
     }
     else if (auto * select = typeid_cast<ASTSelectQuery *>(ast.get()))
     {
-        fuzzColumnLikeExpressionList(select->select());
-        fuzzColumnLikeExpressionList(select->groupBy());
+        fuzzColumnLikeExpressionList(select->select().get());
+        fuzzColumnLikeExpressionList(select->groupBy().get());
+        fuzzOrderByList(select->orderBy().get());
 
         fuzz(select->children);
     }
@@ -416,6 +540,10 @@ void QueryFuzzer::collectFuzzInfoRecurse(const ASTPtr ast)
 
 void QueryFuzzer::fuzzMain(ASTPtr & ast)
 {
+    current_ast_depth = 0;
+    debug_visited_nodes.clear();
+    debug_top_ast = &ast;
+
     collectFuzzInfoMain(ast);
     fuzz(ast);
 
diff --git a/programs/client/QueryFuzzer.h b/programs/client/QueryFuzzer.h
index 0c7cec8dc84..e9d3f150283 100644
--- a/programs/client/QueryFuzzer.h
+++ b/programs/client/QueryFuzzer.h
@@ -12,6 +12,9 @@
 namespace DB
 {
 
+class ASTExpressionList;
+class ASTOrderByElement;
+
 /*
  * This is an AST-based query fuzzer that makes random modifications to query
  * AST, changing numbers, list of columns, functions, etc. It remembers part of
@@ -23,6 +26,13 @@ struct QueryFuzzer
 {
     pcg64 fuzz_rand{randomSeed()};
 
+    // We add elements to expression lists with fixed probability. Some elements
+    // are so large, that the expected number of elements we add to them is
+    // one or higher, hence this process might never finish. Put some limit on the
+    // total depth of AST to prevent this.
+    // This field is reset for each fuzzMain() call.
+    size_t current_ast_depth = 0;
+
     // These arrays hold parts of queries that we can substitute into the query
     // we are currently fuzzing. We add some part from each new query we are asked
     // to fuzz, and keep this state between queries, so the fuzzing output becomes
@@ -36,6 +46,12 @@ struct QueryFuzzer
     std::unordered_map<std::string, ASTPtr> table_like_map;
     std::vector<ASTPtr> table_like;
 
+    // Some debug fields for detecting problematic ASTs with loops.
+    // These are reset for each fuzzMain call.
+    std::unordered_set<const IAST *> debug_visited_nodes;
+    ASTPtr * debug_top_ast;
+
+
     // This is the only function you have to call -- it will modify the passed
     // ASTPtr to point to new AST with some random changes.
     void fuzzMain(ASTPtr & ast);
@@ -46,7 +62,9 @@ struct QueryFuzzer
     ASTPtr getRandomColumnLike();
     void replaceWithColumnLike(ASTPtr & ast);
     void replaceWithTableLike(ASTPtr & ast);
-    void fuzzColumnLikeExpressionList(ASTPtr ast);
+    void fuzzOrderByElement(ASTOrderByElement * elem);
+    void fuzzOrderByList(IAST * ast);
+    void fuzzColumnLikeExpressionList(IAST * ast);
     void fuzz(ASTs & asts);
     void fuzz(ASTPtr & ast);
     void collectFuzzInfoMain(const ASTPtr ast);
diff --git a/programs/client/TestHint.h b/programs/client/TestHint.h
index 65666f4304c..f1998588261 100644
--- a/programs/client/TestHint.h
+++ b/programs/client/TestHint.h
@@ -19,6 +19,7 @@ namespace ErrorCodes
 
 /// Checks expected server and client error codes in testmode.
 /// To enable it add special comment after the query: "-- { serverError 60 }" or "-- { clientError 20 }".
+/// Also you can enable echoing all queries by writing "-- { echo }".
 class TestHint
 {
 public:
@@ -84,12 +85,14 @@ public:
 
     int serverError() const { return server_error; }
     int clientError() const { return client_error; }
+    bool echoQueries() const { return echo; }
 
 private:
     bool enabled = false;
     const String & query;
     int server_error = 0;
     int client_error = 0;
+    bool echo = false;
 
     void parse(const String & hint)
     {
@@ -107,6 +110,8 @@ private:
                 ss >> server_error;
             else if (item == "clientError")
                 ss >> client_error;
+            else if (item == "echo")
+                echo = true;
         }
     }
 
diff --git a/programs/install/Install.cpp b/programs/install/Install.cpp
index 9e3942e126d..2fba335bc66 100644
--- a/programs/install/Install.cpp
+++ b/programs/install/Install.cpp
@@ -10,6 +10,10 @@
     #include <linux/capability.h>
 #endif
 
+#if defined(OS_DARWIN)
+    #include <mach-o/dyld.h>
+#endif
+
 #include <Common/Exception.h>
 #include <Common/ShellCommand.h>
 #include <Common/formatReadable.h>
@@ -147,9 +151,24 @@ int mainEntryClickHouseInstall(int argc, char ** argv)
     try
     {
         /// We need to copy binary to the binary directory.
-        /// The binary is currently run. We need to obtain its path from procfs.
+        /// The binary is currently run. We need to obtain its path from procfs (on Linux).
 
+#if defined(OS_DARWIN)
+        uint32_t path_length = 0;
+        _NSGetExecutablePath(nullptr, &path_length);
+        if (path_length <= 1)
+            Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary");
+
+        std::string path(path_length, std::string::value_type());
+        auto res = _NSGetExecutablePath(&path[0], &path_length);
+        if (res != 0)
+            Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary");
+
+        fs::path binary_self_path(path);
+#else
         fs::path binary_self_path = "/proc/self/exe";
+#endif
+
         if (!fs::exists(binary_self_path))
             throw Exception(ErrorCodes::FILE_DOESNT_EXIST, "Cannot obtain path to the binary from {}, file doesn't exist",
                             binary_self_path.string());
diff --git a/programs/main.cpp b/programs/main.cpp
index fad2d35f3bd..dee02c55832 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -308,53 +308,11 @@ void checkRequiredInstructions()
     }
 }
 
-#ifdef __linux__
-/// clickhouse uses jemalloc as a production allocator
-/// and jemalloc relies on working MADV_DONTNEED,
-/// which doesn't work under qemu
-///
-/// but do this only under for linux, since only it return zeroed pages after MADV_DONTNEED
-/// (and jemalloc assumes this too, see contrib/jemalloc-cmake/include_linux_x86_64/jemalloc/internal/jemalloc_internal_defs.h.in)
-void checkRequiredMadviseFlags()
-{
-    size_t size = 1 << 16;
-    void * addr = mmap(nullptr, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-    if (addr == MAP_FAILED)
-    {
-        writeError("Can not mmap pages for MADV_DONTNEED check\n");
-        _Exit(1);
-    }
-    memset(addr, 'A', size);
-
-    if (!madvise(addr, size, MADV_DONTNEED))
-    {
-        /// Suboptimal, but should be simple.
-        for (size_t i = 0; i < size; ++i)
-        {
-            if (reinterpret_cast<unsigned char *>(addr)[i] != 0)
-            {
-                writeError("MADV_DONTNEED does not zeroed page. jemalloc will be broken\n");
-                _Exit(1);
-            }
-        }
-    }
-
-    if (munmap(addr, size))
-    {
-        writeError("Can not munmap pages for MADV_DONTNEED check\n");
-        _Exit(1);
-    }
-}
-#endif
-
 struct Checker
 {
     Checker()
     {
         checkRequiredInstructions();
-#ifdef __linux__
-        checkRequiredMadviseFlags();
-#endif
     }
 } checker;
 
diff --git a/programs/odbc-bridge/ODBCBridge.cpp b/programs/odbc-bridge/ODBCBridge.cpp
index 3b26e192a07..9deefaf7895 100644
--- a/programs/odbc-bridge/ODBCBridge.cpp
+++ b/programs/odbc-bridge/ODBCBridge.cpp
@@ -89,7 +89,7 @@ void ODBCBridge::defineOptions(Poco::Util::OptionSet & options)
 {
     options.addOption(Poco::Util::Option("http-port", "", "port to listen").argument("http-port", true).binding("http-port"));
     options.addOption(
-        Poco::Util::Option("listen-host", "", "hostname to listen, default localhost").argument("listen-host").binding("listen-host"));
+        Poco::Util::Option("listen-host", "", "hostname or address to listen, default 127.0.0.1").argument("listen-host").binding("listen-host"));
     options.addOption(
         Poco::Util::Option("http-timeout", "", "http timeout for socket, default 1800").argument("http-timeout").binding("http-timeout"));
 
@@ -161,7 +161,7 @@ void ODBCBridge::initialize(Application & self)
     BaseDaemon::logRevision();
 
     log = &logger();
-    hostname = config().getString("listen-host", "localhost");
+    hostname = config().getString("listen-host", "127.0.0.1");
     port = config().getUInt("http-port");
     if (port > 0xFFFF)
         throw Exception("Out of range 'http-port': " + std::to_string(port), ErrorCodes::ARGUMENT_OUT_OF_BOUND);
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index e246be6c343..76765c0374c 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -4,6 +4,7 @@
 #include <sys/resource.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/wait.h>
 #include <errno.h>
 #include <pwd.h>
 #include <unistd.h>
@@ -103,6 +104,12 @@ namespace CurrentMetrics
 int mainEntryClickHouseServer(int argc, char ** argv)
 {
     DB::Server app;
+
+    /// Do not fork separate process from watchdog if we attached to terminal.
+    /// Otherwise it breaks gdb usage.
+    if (argc > 0 && !isatty(STDIN_FILENO) && !isatty(STDOUT_FILENO) && !isatty(STDERR_FILENO))
+        app.shouldSetupWatchdog(argv[0]);
+
     try
     {
         return app.run(argc, argv);
@@ -366,6 +373,7 @@ void checkForUsersNotInMainConfig(
 int Server::main(const std::vector<std::string> & /*args*/)
 {
     Poco::Logger * log = &logger();
+
     UseSSL use_ssl;
 
     MainThreadStatus::getInstance();
@@ -770,7 +778,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
     http_params->setTimeout(settings.http_receive_timeout);
     http_params->setKeepAliveTimeout(keep_alive_timeout);
 
-    std::vector<ProtocolServerAdapter> servers_to_start_before_tables;
+    auto servers_to_start_before_tables = std::make_shared<std::vector<ProtocolServerAdapter>>();
 
     std::vector<std::string> listen_hosts = DB::getMultipleValuesFromConfig(config(), "", "listen_host");
 
@@ -792,7 +800,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
             auto address = socketBindListen(socket, listen_host, port);
             socket.setReceiveTimeout(settings.receive_timeout);
             socket.setSendTimeout(settings.send_timeout);
-            servers_to_start_before_tables.emplace_back(
+            servers_to_start_before_tables->emplace_back(
                 port_name,
                 std::make_unique<Poco::Net::TCPServer>(
                     new TestKeeperTCPHandlerFactory(*this), server_pool, socket, new Poco::Net::TCPServerParams));
@@ -801,7 +809,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
         });
     }
 
-    for (auto & server : servers_to_start_before_tables)
+    for (auto & server : *servers_to_start_before_tables)
         server.start();
 
     SCOPE_EXIT({
@@ -816,11 +824,11 @@ int Server::main(const std::vector<std::string> & /*args*/)
 
         LOG_DEBUG(log, "Shut down storages.");
 
-        if (!servers_to_start_before_tables.empty())
+        if (!servers_to_start_before_tables->empty())
         {
             LOG_DEBUG(log, "Waiting for current connections to servers for tables to finish.");
             int current_connections = 0;
-            for (auto & server : servers_to_start_before_tables)
+            for (auto & server : *servers_to_start_before_tables)
             {
                 server.stop();
                 current_connections += server.currentConnections();
@@ -832,7 +840,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 LOG_INFO(log, "Closed all listening sockets.");
 
             if (current_connections > 0)
-                current_connections = waitServersToFinish(servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5));
+                current_connections = waitServersToFinish(*servers_to_start_before_tables, config().getInt("shutdown_wait_unfinished", 5));
 
             if (current_connections)
                 LOG_INFO(log, "Closed connections to servers for tables. But {} remain. Probably some tables of other users cannot finish their connections after context shutdown.", current_connections);
@@ -978,7 +986,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
     LOG_INFO(log, "TaskStats is not implemented for this OS. IO accounting will be disabled.");
 #endif
 
-    std::vector<ProtocolServerAdapter> servers;
+    auto servers = std::make_shared<std::vector<ProtocolServerAdapter>>();
     {
         /// This object will periodically calculate some metrics.
         AsynchronousMetrics async_metrics(
@@ -996,7 +1004,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
 
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
                     createHandlerFactory(*this, async_metrics, "HTTPHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for http://{}", address.toString());
@@ -1011,7 +1019,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
                     createHandlerFactory(*this, async_metrics, "HTTPSHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for https://{}", address.toString());
@@ -1030,7 +1038,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.receive_timeout);
                 socket.setSendTimeout(settings.send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
                     new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ false),
                     server_pool,
                     socket,
@@ -1047,7 +1055,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.receive_timeout);
                 socket.setSendTimeout(settings.send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
                     new TCPHandlerFactory(*this, /* secure */ false, /* proxy protocol */ true),
                     server_pool,
                     socket,
@@ -1065,7 +1073,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.receive_timeout);
                 socket.setSendTimeout(settings.send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
                     new TCPHandlerFactory(*this, /* secure */ true, /* proxy protocol */ false),
                     server_pool,
                     socket,
@@ -1086,7 +1094,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
                     createHandlerFactory(*this, async_metrics, "InterserverIOHTTPHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for replica communication (interserver): http://{}", address.toString());
@@ -1100,7 +1108,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
                     createHandlerFactory(*this, async_metrics, "InterserverIOHTTPSHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for secure replica communication (interserver): https://{}", address.toString());
@@ -1118,7 +1126,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(Poco::Timespan());
                 socket.setSendTimeout(settings.send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
                     new MySQLHandlerFactory(*this),
                     server_pool,
                     socket,
@@ -1134,7 +1142,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port, /* secure = */ true);
                 socket.setReceiveTimeout(Poco::Timespan());
                 socket.setSendTimeout(settings.send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::TCPServer>(
                     new PostgreSQLHandlerFactory(*this),
                     server_pool,
                     socket,
@@ -1148,7 +1156,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
             createServer(listen_host, port_name, listen_try, [&](UInt16 port)
             {
                 Poco::Net::SocketAddress server_address(listen_host, port);
-                servers.emplace_back(port_name, std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, log)));
+                servers->emplace_back(port_name, std::make_unique<GRPCServer>(*this, makeSocketAddress(listen_host, port, log)));
                 LOG_INFO(log, "Listening for gRPC protocol: " + server_address.toString());
             });
 #endif
@@ -1161,14 +1169,14 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 auto address = socketBindListen(socket, listen_host, port);
                 socket.setReceiveTimeout(settings.http_receive_timeout);
                 socket.setSendTimeout(settings.http_send_timeout);
-                servers.emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
+                servers->emplace_back(port_name, std::make_unique<Poco::Net::HTTPServer>(
                     createHandlerFactory(*this, async_metrics, "PrometheusHandler-factory"), server_pool, socket, http_params));
 
                 LOG_INFO(log, "Listening for Prometheus: http://{}", address.toString());
             });
         }
 
-        if (servers.empty())
+        if (servers->empty())
              throw Exception("No servers started (add valid listen_host and 'tcp_port' or 'http_port' to configuration file.)",
                 ErrorCodes::NO_ELEMENTS_IN_CONFIG);
 
@@ -1176,7 +1184,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
         async_metrics.start();
         global_context->enableNamedSessions();
 
-        for (auto & server : servers)
+        for (auto & server : *servers)
             server.start();
 
         {
@@ -1208,7 +1216,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
             is_cancelled = true;
 
             int current_connections = 0;
-            for (auto & server : servers)
+            for (auto & server : *servers)
             {
                 server.stop();
                 current_connections += server.currentConnections();
@@ -1223,7 +1231,7 @@ int Server::main(const std::vector<std::string> & /*args*/)
             global_context->getProcessList().killAllQueries();
 
             if (current_connections)
-                current_connections = waitServersToFinish(servers, config().getInt("shutdown_wait_unfinished", 5));
+                current_connections = waitServersToFinish(*servers, config().getInt("shutdown_wait_unfinished", 5));
 
             if (current_connections)
                 LOG_INFO(log, "Closed connections. But {} remain."
diff --git a/src/AggregateFunctions/AggregateFunctionAvg.h b/src/AggregateFunctions/AggregateFunctionAvg.h
index d07ff5db2f2..a46796b9c8d 100644
--- a/src/AggregateFunctions/AggregateFunctionAvg.h
+++ b/src/AggregateFunctions/AggregateFunctionAvg.h
@@ -127,10 +127,10 @@ public:
     void insertResultInto(AggregateDataPtr place, IColumn & to, Arena *) const override
     {
         if constexpr (IsDecimalNumber<Numerator> || IsDecimalNumber<Denominator>)
-            static_cast<ColumnVector<Float64> &>(to).getData().push_back(
+            assert_cast<ColumnVector<Float64> &>(to).getData().push_back(
                 this->data(place).divideIfAnyDecimal(num_scale, denom_scale));
         else
-            static_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
+            assert_cast<ColumnVector<Float64> &>(to).getData().push_back(this->data(place).divide());
     }
 private:
     UInt32 num_scale;
diff --git a/src/AggregateFunctions/IAggregateFunction.h b/src/AggregateFunctions/IAggregateFunction.h
index b591bd3acd7..d5e931ccc73 100644
--- a/src/AggregateFunctions/IAggregateFunction.h
+++ b/src/AggregateFunctions/IAggregateFunction.h
@@ -104,9 +104,12 @@ public:
         return false;
     }
 
-    /// Inserts results into a column.
-    /// This method must be called once, from single thread.
-    /// After this method was called for state, you can't do anything with state but destroy.
+    /// Inserts results into a column. This method might modify the state (e.g.
+    /// sort an array), so must be called once, from single thread. The state
+    /// must remain valid though, and the subsequent calls to add/merge/
+    /// insertResultInto must work correctly. This kind of call sequence occurs
+    /// in `runningAccumulate`, or when calculating an aggregate function as a
+    /// window function.
     virtual void insertResultInto(AggregateDataPtr place, IColumn & to, Arena * arena) const = 0;
 
     /// Used for machine learning methods. Predict result from trained model.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 50e6d037b54..5f655f1f466 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -363,7 +363,7 @@ endif ()
 
 if (USE_PARQUET)
     dbms_target_link_libraries(PRIVATE ${PARQUET_LIBRARY})
-    if (NOT USE_INTERNAL_PARQUET_LIBRARY OR USE_INTERNAL_PARQUET_LIBRARY_NATIVE_CMAKE)
+    if (NOT USE_INTERNAL_PARQUET_LIBRARY)
         dbms_target_include_directories (SYSTEM BEFORE PRIVATE ${PARQUET_INCLUDE_DIR} ${ARROW_INCLUDE_DIR})
         if (USE_STATIC_LIBRARIES)
             dbms_target_link_libraries(PRIVATE ${ARROW_LIBRARY})
@@ -436,6 +436,8 @@ if (USE_ROCKSDB)
     dbms_target_include_directories(SYSTEM BEFORE PUBLIC ${ROCKSDB_INCLUDE_DIR})
 endif()
 
+dbms_target_link_libraries(PRIVATE _boost_context)
+
 if (ENABLE_TESTS AND USE_GTEST)
     macro (grep_gtest_sources BASE_DIR DST_VAR)
         # Cold match files that are not in tests/ directories
diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp
index 8f4a64766cd..ef114490c51 100644
--- a/src/Client/Connection.cpp
+++ b/src/Client/Connection.cpp
@@ -742,8 +742,11 @@ std::optional<UInt64> Connection::checkPacket(size_t timeout_microseconds)
 }
 
 
-Packet Connection::receivePacket()
+Packet Connection::receivePacket(std::function<void(Poco::Net::Socket &)> async_callback)
 {
+    in->setAsyncCallback(std::move(async_callback));
+    SCOPE_EXIT(in->setAsyncCallback({}));
+
     try
     {
         Packet res;
diff --git a/src/Client/Connection.h b/src/Client/Connection.h
index 265a9913b8e..83e8f3ba206 100644
--- a/src/Client/Connection.h
+++ b/src/Client/Connection.h
@@ -18,6 +18,7 @@
 #include <DataStreams/BlockStreamProfileInfo.h>
 
 #include <IO/ConnectionTimeouts.h>
+#include <IO/ReadBufferFromPocoSocket.h>
 
 #include <Interpreters/TablesStatus.h>
 
@@ -171,7 +172,8 @@ public:
     std::optional<UInt64> checkPacket(size_t timeout_microseconds = 0);
 
     /// Receive packet from server.
-    Packet receivePacket();
+    /// Each time read blocks and async_callback is set, it will be called. You can poll socket inside it.
+    Packet receivePacket(std::function<void(Poco::Net::Socket &)> async_callback = {});
 
     /// If not connected yet, or if connection is broken - then connect. If cannot connect - throw an exception.
     void forceConnected(const ConnectionTimeouts & timeouts);
@@ -226,7 +228,7 @@ private:
     String server_display_name;
 
     std::unique_ptr<Poco::Net::StreamSocket> socket;
-    std::shared_ptr<ReadBuffer> in;
+    std::shared_ptr<ReadBufferFromPocoSocket> in;
     std::shared_ptr<WriteBuffer> out;
     std::optional<UInt64> last_input_packet_type;
 
diff --git a/src/Client/MultiplexedConnections.cpp b/src/Client/MultiplexedConnections.cpp
index a99b0f9d7cc..ed7aad0a515 100644
--- a/src/Client/MultiplexedConnections.cpp
+++ b/src/Client/MultiplexedConnections.cpp
@@ -237,7 +237,7 @@ std::string MultiplexedConnections::dumpAddressesUnlocked() const
     return buf.str();
 }
 
-Packet MultiplexedConnections::receivePacketUnlocked()
+Packet MultiplexedConnections::receivePacketUnlocked(std::function<void(Poco::Net::Socket &)> async_callback)
 {
     if (!sent_query)
         throw Exception("Cannot receive packets: no query sent.", ErrorCodes::LOGICAL_ERROR);
@@ -249,7 +249,7 @@ Packet MultiplexedConnections::receivePacketUnlocked()
     if (current_connection == nullptr)
         throw Exception("Logical error: no available replica", ErrorCodes::NO_AVAILABLE_REPLICA);
 
-    Packet packet = current_connection->receivePacket();
+    Packet packet = current_connection->receivePacket(std::move(async_callback));
 
     switch (packet.type)
     {
diff --git a/src/Client/MultiplexedConnections.h b/src/Client/MultiplexedConnections.h
index eaec7f744bc..46312ae339d 100644
--- a/src/Client/MultiplexedConnections.h
+++ b/src/Client/MultiplexedConnections.h
@@ -69,7 +69,7 @@ public:
 
 private:
     /// Internal version of `receivePacket` function without locking.
-    Packet receivePacketUnlocked();
+    Packet receivePacketUnlocked(std::function<void(Poco::Net::Socket &)> async_callback = {});
 
     /// Internal version of `dumpAddresses` function without locking.
     std::string dumpAddressesUnlocked() const;
@@ -105,6 +105,8 @@ private:
     /// A mutex for the sendCancel function to execute safely
     /// in separate thread.
     mutable std::mutex cancel_mutex;
+
+    friend class RemoteQueryExecutorReadContext;
 };
 
 }
diff --git a/src/Common/CounterInFile.h b/src/Common/CounterInFile.h
index 48414bd09cc..8cd4534d413 100644
--- a/src/Common/CounterInFile.h
+++ b/src/Common/CounterInFile.h
@@ -87,7 +87,7 @@ public:
                 {
                     /// A more understandable error message.
                     if (e.code() == DB::ErrorCodes::CANNOT_READ_ALL_DATA || e.code() == DB::ErrorCodes::ATTEMPT_TO_READ_AFTER_EOF)
-                        throw DB::Exception("File " + path + " is empty. You must fill it manually with appropriate value.", e.code());
+                        throw DB::ParsingException("File " + path + " is empty. You must fill it manually with appropriate value.", e.code());
                     else
                         throw;
                 }
diff --git a/src/Common/Exception.cpp b/src/Common/Exception.cpp
index d9bbb170dcc..b782471a4e8 100644
--- a/src/Common/Exception.cpp
+++ b/src/Common/Exception.cpp
@@ -450,5 +450,49 @@ ExecutionStatus ExecutionStatus::fromCurrentException(const std::string & start_
     return ExecutionStatus(getCurrentExceptionCode(), msg);
 }
 
+ParsingException::ParsingException()
+{
+    Exception::message(Exception::message() + "{}");
+}
+
+ParsingException::ParsingException(const std::string & msg, int code)
+    : Exception(msg, code)
+{
+    Exception::message(Exception::message() + "{}");
+}
+
+ParsingException::ParsingException(int code, const std::string & message)
+    : Exception(message, code)
+{
+    Exception::message(Exception::message() + "{}");
+}
+
+
+/// We use additional field formatted_message_ to make this method const.
+std::string ParsingException::displayText() const
+{
+    try
+    {
+        if (line_number_ == -1)
+            formatted_message_ = fmt::format(message(), "");
+        else
+            formatted_message_ = fmt::format(message(), fmt::format(": (at row {})\n", line_number_));
+    }
+    catch (...)
+    {}
+
+    if (!formatted_message_.empty())
+    {
+        std::string result = name();
+        result.append(": ");
+        result.append(formatted_message_);
+        return result;
+    }
+    else
+    {
+        return Exception::displayText();
+    }
+}
+
 
 }
diff --git a/src/Common/Exception.h b/src/Common/Exception.h
index 3da2e2fb0d0..4b04de5d8a2 100644
--- a/src/Common/Exception.h
+++ b/src/Common/Exception.h
@@ -96,6 +96,38 @@ private:
 };
 
 
+/// Special class of exceptions, used mostly in ParallelParsingInputFormat for
+/// more convinient calculation of problem line number.
+class ParsingException : public Exception
+{
+public:
+    ParsingException();
+    ParsingException(const std::string & msg, int code);
+    ParsingException(int code, const std::string & message);
+
+    // Format message with fmt::format, like the logging functions.
+    template <typename ...Args>
+    ParsingException(int code, const std::string & fmt, Args&&... args)
+        : Exception(fmt::format(fmt, std::forward<Args>(args)...), code)
+    {
+        Exception::message(Exception::message() + "{}");
+    }
+
+
+    std::string displayText() const override;
+
+    int getLineNumber() { return line_number_; }
+    void setLineNumber(int line_number) { line_number_ = line_number;}
+
+private:
+    ssize_t line_number_{-1};
+    mutable std::string formatted_message_;
+
+    const char * name() const throw() override { return "DB::ParsingException"; }
+    const char * className() const throw() override { return "DB::ParsingException"; }
+};
+
+
 using Exceptions = std::vector<std::exception_ptr>;
 
 
diff --git a/src/Common/Fiber.h b/src/Common/Fiber.h
new file mode 100644
index 00000000000..e4efc0bdb6a
--- /dev/null
+++ b/src/Common/Fiber.h
@@ -0,0 +1,5 @@
+#pragma once
+#include <common/defines.h>
+#include <boost/context/fiber.hpp>
+
+using Fiber = boost::context::fiber;
diff --git a/src/Common/FiberStack.h b/src/Common/FiberStack.h
new file mode 100644
index 00000000000..2917a64a692
--- /dev/null
+++ b/src/Common/FiberStack.h
@@ -0,0 +1,74 @@
+#pragma once
+#include <common/defines.h>
+#include <boost/context/stack_context.hpp>
+#include <Common/formatReadable.h>
+#include <Common/MemoryTracker.h>
+
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/mman.h>
+
+#if defined(BOOST_USE_VALGRIND)
+#include <valgrind/valgrind.h>
+#endif
+
+namespace DB::ErrorCodes
+{
+    extern const int CANNOT_ALLOCATE_MEMORY;
+}
+
+/// This is an implementation of allocator for fiber stack.
+/// The reference implementation is protected_fixedsize_stack from boost::context.
+/// This implementation additionally track memory usage. It is the main reason why it is needed.
+class FiberStack
+{
+private:
+    size_t stack_size;
+    size_t page_size = 0;
+public:
+    static constexpr size_t default_stack_size = 128 * 1024; /// 64KB was not enough for tests
+
+    explicit FiberStack(size_t stack_size_ = default_stack_size) : stack_size(stack_size_)
+    {
+        page_size = ::sysconf(_SC_PAGESIZE);
+    }
+
+    boost::context::stack_context allocate()
+    {
+        size_t num_pages = 1 + (stack_size - 1) / page_size;
+        size_t num_bytes = (num_pages + 1) * page_size; /// Add one page at bottom that will be used as guard-page
+
+        void * vp = ::mmap(nullptr, num_bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+        if (MAP_FAILED == vp)
+            DB::throwFromErrno(fmt::format("FiberStack: Cannot mmap {}.", ReadableSize(num_bytes)), DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+
+        if (-1 == ::mprotect(vp, page_size, PROT_NONE))
+        {
+            ::munmap(vp, num_bytes);
+            DB::throwFromErrno("FiberStack: cannot protect guard page", DB::ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+        }
+
+        /// Do not count guard page in memory usage.
+        CurrentMemoryTracker::alloc(num_pages * page_size);
+
+        boost::context::stack_context sctx;
+        sctx.size = num_bytes;
+        sctx.sp = static_cast< char * >(vp) + sctx.size;
+#if defined(BOOST_USE_VALGRIND)
+        sctx.valgrind_stack_id = VALGRIND_STACK_REGISTER(sctx.sp, vp);
+#endif
+        return sctx;
+    }
+
+    void deallocate(boost::context::stack_context & sctx)
+    {
+#if defined(BOOST_USE_VALGRIND)
+        VALGRIND_STACK_DEREGISTER(sctx.valgrind_stack_id);
+#endif
+        void * vp = static_cast< char * >(sctx.sp) - sctx.size;
+        ::munmap(vp, sctx.size);
+
+        /// Do not count guard page in memory usage.
+        CurrentMemoryTracker::free(sctx.size - page_size);
+    }
+};
diff --git a/src/Common/SimpleIncrement.h b/src/Common/SimpleIncrement.h
index a91ef06d45b..4896d2192f4 100644
--- a/src/Common/SimpleIncrement.h
+++ b/src/Common/SimpleIncrement.h
@@ -8,9 +8,7 @@
   */
 struct SimpleIncrement
 {
-    std::atomic<UInt64> value;
-
-    SimpleIncrement(UInt64 start = 0) : value(start) {}
+    std::atomic<UInt64> value{0};
 
     void set(UInt64 new_value)
     {
diff --git a/src/Common/ThreadFuzzer.cpp b/src/Common/ThreadFuzzer.cpp
index a32e50c44b2..88ff53534e6 100644
--- a/src/Common/ThreadFuzzer.cpp
+++ b/src/Common/ThreadFuzzer.cpp
@@ -10,6 +10,7 @@
 #include <common/sleep.h>
 
 #include <IO/ReadHelpers.h>
+#include <common/logger_useful.h>
 
 #include <Common/Exception.h>
 #include <Common/thread_local_rng.h>
diff --git a/src/Common/ThreadPool.cpp b/src/Common/ThreadPool.cpp
index 7b2c2108629..7fc0d65aa5b 100644
--- a/src/Common/ThreadPool.cpp
+++ b/src/Common/ThreadPool.cpp
@@ -55,6 +55,13 @@ void ThreadPoolImpl<Thread>::setMaxThreads(size_t value)
     max_threads = value;
 }
 
+template <typename Thread>
+size_t ThreadPoolImpl<Thread>::getMaxThreads() const
+{
+    std::lock_guard lock(mutex);
+    return max_threads;
+}
+
 template <typename Thread>
 void ThreadPoolImpl<Thread>::setMaxFreeThreads(size_t value)
 {
diff --git a/src/Common/ThreadPool.h b/src/Common/ThreadPool.h
index 8dd6cbbe02c..0ae023e4ebd 100644
--- a/src/Common/ThreadPool.h
+++ b/src/Common/ThreadPool.h
@@ -71,6 +71,7 @@ public:
     void setMaxThreads(size_t value);
     void setMaxFreeThreads(size_t value);
     void setQueueSize(size_t value);
+    size_t getMaxThreads() const;
 
 private:
     mutable std::mutex mutex;
diff --git a/src/Common/TimerDescriptor.cpp b/src/Common/TimerDescriptor.cpp
new file mode 100644
index 00000000000..f4c3ec35588
--- /dev/null
+++ b/src/Common/TimerDescriptor.cpp
@@ -0,0 +1,84 @@
+#if defined(OS_LINUX)
+#include <Common/TimerDescriptor.h>
+#include <Common/Exception.h>
+
+#include <sys/timerfd.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_CREATE_TIMER;
+    extern const int CANNOT_SET_TIMER_PERIOD;
+    extern const int CANNOT_FCNTL;
+    extern const int CANNOT_READ_FROM_SOCKET;
+}
+
+TimerDescriptor::TimerDescriptor(int clockid, int flags)
+{
+    timer_fd = timerfd_create(clockid, flags);
+    if (timer_fd == -1)
+        throw Exception(ErrorCodes::CANNOT_CREATE_TIMER, "Cannot create timer_fd descriptor");
+
+    if (-1 == fcntl(timer_fd, F_SETFL, O_NONBLOCK))
+        throwFromErrno("Cannot set O_NONBLOCK for timer_fd", ErrorCodes::CANNOT_FCNTL);
+}
+
+TimerDescriptor::~TimerDescriptor()
+{
+    /// Do not check for result cause cannot throw exception.
+    close(timer_fd);
+}
+
+void TimerDescriptor::reset() const
+{
+    itimerspec spec;
+    spec.it_interval.tv_nsec = 0;
+    spec.it_interval.tv_sec = 0;
+    spec.it_value.tv_sec = 0;
+    spec.it_value.tv_nsec = 0;
+
+    if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
+        throwFromErrno("Cannot reset timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
+
+    /// Drain socket.
+    /// It may be possible that alarm happened and socket is readable.
+    drain();
+}
+
+void TimerDescriptor::drain() const
+{
+    /// It is expected that socket returns 8 bytes when readable.
+    /// Read in loop anyway cause signal may interrupt read call.
+    uint64_t buf;
+    while (true)
+    {
+        ssize_t res = ::read(timer_fd, &buf, sizeof(buf));
+        if (res < 0)
+        {
+            if (errno == EAGAIN)
+                break;
+
+            if (errno != EINTR)
+                throwFromErrno("Cannot drain timer_fd", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+        }
+    }
+}
+
+void TimerDescriptor::setRelative(const Poco::Timespan & timespan) const
+{
+    itimerspec spec;
+    spec.it_interval.tv_nsec = 0;
+    spec.it_interval.tv_sec = 0;
+    spec.it_value.tv_sec = timespan.totalSeconds();
+    spec.it_value.tv_nsec = timespan.useconds();
+
+    if (-1 == timerfd_settime(timer_fd, 0 /*relative timer */, &spec, nullptr))
+        throwFromErrno("Cannot set time for timer_fd", ErrorCodes::CANNOT_SET_TIMER_PERIOD);
+}
+
+}
+#endif
diff --git a/src/Common/TimerDescriptor.h b/src/Common/TimerDescriptor.h
new file mode 100644
index 00000000000..ddb8f2a1367
--- /dev/null
+++ b/src/Common/TimerDescriptor.h
@@ -0,0 +1,31 @@
+#pragma once
+#if defined(OS_LINUX)
+#include <Poco/Timespan.h>
+
+namespace DB
+{
+
+/// Wrapper over timerfd.
+class TimerDescriptor
+{
+private:
+    int timer_fd;
+
+public:
+    explicit TimerDescriptor(int clockid, int flags);
+    ~TimerDescriptor();
+
+    TimerDescriptor(const TimerDescriptor &) = delete;
+    TimerDescriptor & operator=(const TimerDescriptor &) = delete;
+    TimerDescriptor(TimerDescriptor &&) = default;
+    TimerDescriptor & operator=(TimerDescriptor &&) = default;
+
+    int getDescriptor() const { return timer_fd; }
+
+    void reset() const;
+    void drain() const;
+    void setRelative(const Poco::Timespan & timespan) const;
+};
+
+}
+#endif
diff --git a/src/Common/XDBCBridgeHelper.h b/src/Common/XDBCBridgeHelper.h
index d7d3a6ba4cc..8da735c6fe3 100644
--- a/src/Common/XDBCBridgeHelper.h
+++ b/src/Common/XDBCBridgeHelper.h
@@ -76,7 +76,7 @@ public:
     const Context & context;
     const Configuration & config;
 
-    static constexpr inline auto DEFAULT_HOST = "localhost";
+    static constexpr inline auto DEFAULT_HOST = "127.0.0.1";
     static constexpr inline auto DEFAULT_PORT = BridgeHelperMixin::DEFAULT_PORT;
     static constexpr inline auto PING_HANDLER = "/ping";
     static constexpr inline auto MAIN_HANDLER = "/";
diff --git a/src/Common/ZooKeeper/tests/CMakeLists.txt b/src/Common/ZooKeeper/tests/CMakeLists.txt
index 7092720e7bc..bbfa3e1f137 100644
--- a/src/Common/ZooKeeper/tests/CMakeLists.txt
+++ b/src/Common/ZooKeeper/tests/CMakeLists.txt
@@ -4,9 +4,6 @@ target_link_libraries(zkutil_test_commands PRIVATE clickhouse_common_zookeeper)
 add_executable(zkutil_test_commands_new_lib zkutil_test_commands_new_lib.cpp)
 target_link_libraries(zkutil_test_commands_new_lib PRIVATE clickhouse_common_zookeeper string_utils)
 
-add_executable(zkutil_expiration_test zkutil_expiration_test.cpp)
-target_link_libraries(zkutil_expiration_test PRIVATE clickhouse_common_zookeeper)
-
 add_executable(zkutil_test_async zkutil_test_async.cpp)
 target_link_libraries(zkutil_test_async PRIVATE clickhouse_common_zookeeper)
 
diff --git a/src/Common/ZooKeeper/tests/nozk.sh b/src/Common/ZooKeeper/tests/nozk.sh
deleted file mode 100755
index c4e409f735b..00000000000
--- a/src/Common/ZooKeeper/tests/nozk.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-# Добавляет в файрвол правила, не пропускающие пакеты до серверов ZooKeeper.
-# Используется для тестирования поведения программ при потере соединения с ZooKeeper.
-# yeszk.sh производит обратные изменения.
-
-# Чтобы посмотреть, какие правила сейчас есть, используйте sudo iptables -L и sudo ip6tables -L
-
-sudo iptables -A OUTPUT -p tcp --dport 2181 -j DROP
-sudo ip6tables -A OUTPUT -p tcp --dport 2181 -j DROP
-
-# You could also test random drops:
-#sudo iptables -A OUTPUT -p tcp --dport 2181 -j REJECT --reject-with tcp-reset -m statistic --mode random --probability 0.1
-#sudo ip6tables -A OUTPUT -p tcp --dport 2181 -j REJECT --reject-with tcp-reset -m statistic --mode random --probability 0.1
-
diff --git a/src/Common/ZooKeeper/tests/yeszk.sh b/src/Common/ZooKeeper/tests/yeszk.sh
deleted file mode 100755
index 4f186a90183..00000000000
--- a/src/Common/ZooKeeper/tests/yeszk.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/usr/bin/env bash
-
-# Выполняет действия, обратные nozk.sh
-
-cat nozk.sh | sed 's/-A/-D/g' | bash
-
diff --git a/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp b/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp
deleted file mode 100644
index e09c72a4d6c..00000000000
--- a/src/Common/ZooKeeper/tests/zkutil_expiration_test.cpp
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <iostream>
-#include <Common/ZooKeeper/ZooKeeper.h>
-#include <Common/ZooKeeper/KeeperException.h>
-#include <Poco/ConsoleChannel.h>
-#include <Common/Exception.h>
-
-
-/// Проверяет, какие ошибки выдает ZooKeeper при попытке сделать какую-нибудь операцию через разное время после истечения сессии.
-/// Спойлер: multi иногда падает с segfault, а до этого фейлится с marshalling error.
-///          create всегда фейлится с invalid zhandle state.
-
-int main(int argc, char ** argv)
-{
-    try
-    {
-        if (argc != 2)
-        {
-            std::cerr << "usage: " << argv[0] << " hosts" << std::endl;
-            return 2;
-        }
-
-        Poco::AutoPtr<Poco::ConsoleChannel> channel = new Poco::ConsoleChannel(std::cerr);
-        Poco::Logger::root().setChannel(channel);
-        Poco::Logger::root().setLevel("trace");
-
-        zkutil::ZooKeeper zk(argv[1]);
-        std::string unused;
-        zk.tryCreate("/test", "", zkutil::CreateMode::Persistent, unused);
-
-        std::cerr << "Please run `./nozk.sh && sleep 40s && ./yeszk.sh`" << std::endl;
-
-        time_t time0 = time(nullptr);
-
-        while (true)
-        {
-            {
-                Coordination::Requests ops;
-                ops.emplace_back(zkutil::makeCreateRequest("/test/zk_expiration_test", "hello", zkutil::CreateMode::Persistent));
-                ops.emplace_back(zkutil::makeRemoveRequest("/test/zk_expiration_test", -1));
-
-                Coordination::Responses responses;
-                Coordination::Error code = zk.tryMultiNoThrow(ops, responses);
-
-                std::cout << time(nullptr) - time0 << "s: " << Coordination::errorMessage(code) << std::endl;
-                try
-                {
-                    if (code != Coordination::Error::ZOK)
-                        std::cout << "Path: " << zkutil::KeeperMultiException(code, ops, responses).getPathForFirstFailedOp() << std::endl;
-                }
-                catch (...)
-                {
-                    std::cout << DB::getCurrentExceptionMessage(false) << std::endl;
-                }
-
-            }
-
-            sleep(1);
-        }
-    }
-    catch (Coordination::Exception &)
-    {
-        std::cerr << "KeeperException: " << DB::getCurrentExceptionMessage(true) << std::endl;
-        return 1;
-    }
-    catch (...)
-    {
-        std::cerr << "Some exception: " << DB::getCurrentExceptionMessage(true) << std::endl;
-        return 2;
-    }
-}
diff --git a/src/Common/ya.make b/src/Common/ya.make
index c8be43f98a2..07175029276 100644
--- a/src/Common/ya.make
+++ b/src/Common/ya.make
@@ -75,6 +75,7 @@ SRCS(
     ThreadPool.cpp
     ThreadProfileEvents.cpp
     ThreadStatus.cpp
+    TimerDescriptor.cpp
     TraceCollector.cpp
     UTF8Helpers.cpp
     UnicodeBar.cpp
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 0719eb1afc9..b09e960da36 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -405,16 +405,21 @@ class IColumn;
     M(Bool, optimize_skip_merged_partitions, false, "Skip partitions with one part with level > 0 in optimize final", 0) \
     M(Bool, optimize_on_insert, true, "Do the same transformation for inserted block of data as if merge was done on this block.", 0) \
     M(Bool, allow_experimental_map_type, false, "Allow data type Map", 0) \
+    M(Bool, allow_experimental_window_functions, false, "Allow experimental window functions", 0) \
     \
     M(Bool, use_antlr_parser, false, "Parse incoming queries using ANTLR-generated parser", 0) \
     \
+    M(Bool, async_socket_for_remote, true, "Asynchronously read from socket executing remote query", 0) \
+    \
     /** Obsolete settings that do nothing but left for compatibility reasons. Remove each one after half a year of obsolescence. */ \
     \
     M(UInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \
     M(UInt64, multiple_joins_rewriter_version, 0, "Obsolete setting, does nothing. Will be removed after 2021-03-31", 0) \
     M(Bool, enable_debug_queries, false, "Enabled debug queries, but now is obsolete", 0) \
     M(Bool, allow_experimental_database_atomic, true, "Obsolete setting, does nothing. Will be removed after 2021-02-12", 0) \
-    M(UnionMode, union_default_mode, UnionMode::DISTINCT, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0)
+    M(UnionMode, union_default_mode, UnionMode::DISTINCT, "Set default Union Mode in SelectWithUnion query. Possible values: empty string, 'ALL', 'DISTINCT'. If empty, query without Union Mode will throw exception.", 0) \
+    M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
+    M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
 
 // End of COMMON_SETTINGS
 // Please add settings related to formats into the FORMAT_FACTORY_SETTINGS below.
@@ -426,10 +431,10 @@ class IColumn;
     M(Bool, output_format_csv_crlf_end_of_line, false, "If it is set true, end of line in CSV format will be \\r\\n instead of \\n.", 0) \
     M(Bool, input_format_csv_unquoted_null_literal_as_null, false, "Consider unquoted NULL literal as \\N", 0) \
     M(Bool, input_format_csv_enum_as_number, false, "Treat inserted enum values in CSV formats as enum indices \\N", 0) \
+    M(Bool, input_format_csv_arrays_as_nested_csv, false, R"(When reading Array from CSV, expect that its elements were serialized in nested CSV and then put into string. Example: "[""Hello"", ""world"", ""42"""" TV""]". Braces around array can be omitted.)", 0) \
     M(Bool, input_format_skip_unknown_fields, false, "Skip columns with unknown names from input data (it works for JSONEachRow, CSVWithNames, TSVWithNames and TSKV formats).", 0) \
     M(Bool, input_format_with_names_use_header, true, "For TSVWithNames and CSVWithNames input formats this controls whether format parser is to assume that column data appear in the input exactly as they are specified in the header.", 0) \
     M(Bool, input_format_import_nested_json, false, "Map nested JSON data to nested tables (it works for JSONEachRow format).", 0) \
-    M(Bool, optimize_aggregators_of_group_by_keys, true, "Eliminates min/max/any/anyLast aggregators of GROUP BY keys in SELECT section", 0) \
     M(Bool, input_format_defaults_for_omitted_fields, true, "For input data calculate default expressions for omitted fields (it works for JSONEachRow, CSV and TSV formats).", IMPORTANT) \
     M(Bool, input_format_tsv_empty_as_default, false, "Treat empty fields in TSV input as default values.", 0) \
     M(Bool, input_format_tsv_enum_as_number, false, "Treat inserted enum values in TSV formats as enum indices \\N", 0) \
@@ -438,7 +443,6 @@ class IColumn;
     M(DateTimeInputFormat, date_time_input_format, FormatSettings::DateTimeInputFormat::Basic, "Method to read DateTime from text input formats. Possible values: 'basic' and 'best_effort'.", 0) \
     M(DateTimeOutputFormat, date_time_output_format, FormatSettings::DateTimeOutputFormat::Simple, "Method to write DateTime to text output. Possible values: 'simple', 'iso', 'unix_timestamp'.", 0) \
     \
-    M(Bool, optimize_group_by_function_keys, true, "Eliminates functions of other keys in GROUP BY section", 0) \
     M(Bool, input_format_values_interpret_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser and try to interpret it as SQL expression.", 0) \
     M(Bool, input_format_values_deduce_templates_of_expressions, true, "For Values format: if the field could not be parsed by streaming parser, run SQL parser, deduce template of the SQL expression, try to parse all rows using template and then interpret expression for all rows.", 0) \
     M(Bool, input_format_values_accurate_types_of_literals, true, "For Values format: when parsing and interpreting expressions using template, check actual type of literal to avoid possible overflow and precision issues.", 0) \
@@ -486,7 +490,9 @@ class IColumn;
     \
     M(Bool, output_format_enable_streaming, false, "Enable streaming in output formats that support it.", 0) \
     M(Bool, output_format_write_statistics, true, "Write statistics about read rows, bytes, time elapsed in suitable output formats.", 0) \
-    M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0)
+    M(Bool, output_format_pretty_row_numbers, false, "Add row numbers before each row for pretty output format", 0) \
+    M(Bool, insert_distributed_one_random_shard, false, "If setting is enabled, inserting into distributed table will choose a random shard to write when there is no sharding key", 0) \
+
 
 // End of FORMAT_FACTORY_SETTINGS
 // Please add settings non-related to formats into the COMMON_SETTINGS above.
diff --git a/src/Core/SortDescription.cpp b/src/Core/SortDescription.cpp
index 4a5952c3bc2..cb7378cf096 100644
--- a/src/Core/SortDescription.cpp
+++ b/src/Core/SortDescription.cpp
@@ -37,5 +37,12 @@ void dumpSortDescription(const SortDescription & description, const Block & head
     }
 }
 
+std::string dumpSortDescription(const SortDescription & description)
+{
+    WriteBufferFromOwnString wb;
+    dumpSortDescription(description, Block{}, wb);
+    return wb.str();
+}
+
 }
 
diff --git a/src/Core/SortDescription.h b/src/Core/SortDescription.h
index 79ac7ddf142..1450393ebd8 100644
--- a/src/Core/SortDescription.h
+++ b/src/Core/SortDescription.h
@@ -72,4 +72,6 @@ class Block;
 /// Outputs user-readable description into `out`.
 void dumpSortDescription(const SortDescription & description, const Block & header, WriteBuffer & out);
 
+std::string dumpSortDescription(const SortDescription & description);
+
 }
diff --git a/src/DataStreams/NativeBlockInputStream.cpp b/src/DataStreams/NativeBlockInputStream.cpp
index ae2d6886fa8..b182d5e0588 100644
--- a/src/DataStreams/NativeBlockInputStream.cpp
+++ b/src/DataStreams/NativeBlockInputStream.cpp
@@ -106,7 +106,7 @@ Block NativeBlockInputStream::readImpl()
     if (istr.eof())
     {
         if (use_index)
-            throw Exception("Input doesn't contain all data for index.", ErrorCodes::CANNOT_READ_ALL_DATA);
+            throw ParsingException("Input doesn't contain all data for index.", ErrorCodes::CANNOT_READ_ALL_DATA);
 
         return res;
     }
diff --git a/src/DataStreams/ParallelParsingBlockInputStream.cpp b/src/DataStreams/ParallelParsingBlockInputStream.cpp
index 19b04d36fc1..b7a0c3cab99 100644
--- a/src/DataStreams/ParallelParsingBlockInputStream.cpp
+++ b/src/DataStreams/ParallelParsingBlockInputStream.cpp
@@ -126,8 +126,11 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction(ThreadGroupStatu
             // Segmentating the original input.
             unit.segment.resize(0);
 
-            const bool have_more_data = file_segmentation_engine(original_buffer,
-                unit.segment, min_chunk_bytes);
+            auto [have_more_data, currently_read_rows] = file_segmentation_engine(
+                original_buffer, unit.segment, min_chunk_bytes);
+
+            unit.offset = successfully_read_rows_count;
+            successfully_read_rows_count += currently_read_rows;
 
             unit.is_last = !have_more_data;
             unit.status = READY_TO_PARSE;
@@ -142,7 +145,7 @@ void ParallelParsingBlockInputStream::segmentatorThreadFunction(ThreadGroupStatu
     }
     catch (...)
     {
-        onBackgroundException();
+        onBackgroundException(successfully_read_rows_count);
     }
 }
 
@@ -157,11 +160,11 @@ void ParallelParsingBlockInputStream::parserThreadFunction(ThreadGroupStatusPtr
 
     setThreadName("ChunkParser");
 
+    const auto current_unit_number = current_ticket_number % processing_units.size();
+    auto & unit = processing_units[current_unit_number];
+
     try
     {
-        const auto current_unit_number = current_ticket_number % processing_units.size();
-        auto & unit = processing_units[current_unit_number];
-
         /*
          * This is kind of suspicious -- the input_process_creator contract with
          * respect to multithreaded use is not clear, but we hope that it is
@@ -195,19 +198,22 @@ void ParallelParsingBlockInputStream::parserThreadFunction(ThreadGroupStatusPtr
     }
     catch (...)
     {
-        onBackgroundException();
+        onBackgroundException(unit.offset);
     }
 }
 
-void ParallelParsingBlockInputStream::onBackgroundException()
+void ParallelParsingBlockInputStream::onBackgroundException(size_t offset)
 {
-    tryLogCurrentException(__PRETTY_FUNCTION__);
-
     std::unique_lock<std::mutex> lock(mutex);
     if (!background_exception)
     {
         background_exception = std::current_exception();
+
+        if (ParsingException * e = exception_cast<ParsingException *>(background_exception))
+            if (e->getLineNumber() != -1)
+                e->setLineNumber(e->getLineNumber() + offset);
     }
+    tryLogCurrentException(__PRETTY_FUNCTION__);
     finished = true;
     reader_condvar.notify_all();
     segmentator_condvar.notify_all();
diff --git a/src/DataStreams/ParallelParsingBlockInputStream.h b/src/DataStreams/ParallelParsingBlockInputStream.h
index c882acd9ddd..749de83b583 100644
--- a/src/DataStreams/ParallelParsingBlockInputStream.h
+++ b/src/DataStreams/ParallelParsingBlockInputStream.h
@@ -149,6 +149,8 @@ private:
         BlockExt block_ext;
         Memory<> segment;
         std::atomic<ProcessingUnitStatus> status;
+        /// Needed for better exception message.
+        size_t offset = 0;
         bool is_last{false};
     };
 
@@ -159,6 +161,10 @@ private:
     std::deque<ProcessingUnit> processing_units;
 
 
+    /// Compute it to have a more understandable error message.
+    size_t successfully_read_rows_count{0};
+
+
     void scheduleParserThreadForUnitWithNumber(size_t ticket_number);
     void finishAndWait();
 
@@ -169,7 +175,7 @@ private:
     // threads. This function is used by segmentator and parsed threads.
     // readImpl() is called from the main thread, so the exception handling
     // is different.
-    void onBackgroundException();
+    void onBackgroundException(size_t offset);
 };
 
 }
diff --git a/src/DataStreams/RemoteQueryExecutor.cpp b/src/DataStreams/RemoteQueryExecutor.cpp
index c38f42893af..51d229a1126 100644
--- a/src/DataStreams/RemoteQueryExecutor.cpp
+++ b/src/DataStreams/RemoteQueryExecutor.cpp
@@ -1,4 +1,5 @@
 #include <DataStreams/RemoteQueryExecutor.h>
+#include <DataStreams/RemoteQueryExecutorReadContext.h>
 
 #include <Columns/ColumnConst.h>
 #include <Common/CurrentThread.h>
@@ -11,6 +12,7 @@
 #include <Interpreters/Context.h>
 #include <Interpreters/InternalTextLogsQueue.h>
 #include <IO/ConnectionTimeoutsContext.h>
+#include <Common/FiberStack.h>
 
 namespace DB
 {
@@ -192,68 +194,119 @@ Block RemoteQueryExecutor::read()
 
         Packet packet = multiplexed_connections->receivePacket();
 
-        switch (packet.type)
-        {
-            case Protocol::Server::Data:
-                /// If the block is not empty and is not a header block
-                if (packet.block && (packet.block.rows() > 0))
-                    return adaptBlockStructure(packet.block, header);
-                break;  /// If the block is empty - we will receive other packets before EndOfStream.
-
-            case Protocol::Server::Exception:
-                got_exception_from_replica = true;
-                packet.exception->rethrow();
-                break;
-
-            case Protocol::Server::EndOfStream:
-                if (!multiplexed_connections->hasActiveConnections())
-                {
-                    finished = true;
-                    return Block();
-                }
-                break;
-
-            case Protocol::Server::Progress:
-                /** We use the progress from a remote server.
-                  * We also include in ProcessList,
-                  * and we use it to check
-                  * constraints (for example, the minimum speed of query execution)
-                  * and quotas (for example, the number of lines to read).
-                  */
-                if (progress_callback)
-                    progress_callback(packet.progress);
-                break;
-
-            case Protocol::Server::ProfileInfo:
-                /// Use own (client-side) info about read bytes, it is more correct info than server-side one.
-                if (profile_info_callback)
-                    profile_info_callback(packet.profile_info);
-                break;
-
-            case Protocol::Server::Totals:
-                totals = packet.block;
-                break;
-
-            case Protocol::Server::Extremes:
-                extremes = packet.block;
-                break;
-
-            case Protocol::Server::Log:
-                /// Pass logs from remote server to client
-                if (auto log_queue = CurrentThread::getInternalTextLogsQueue())
-                    log_queue->pushBlock(std::move(packet.block));
-                break;
-
-            default:
-                got_unknown_packet_from_replica = true;
-                throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
-                    toString(packet.type),
-                    multiplexed_connections->dumpAddresses());
-        }
+        if (auto block = processPacket(std::move(packet)))
+            return *block;
     }
 }
 
-void RemoteQueryExecutor::finish()
+std::variant<Block, int> RemoteQueryExecutor::read(std::unique_ptr<ReadContext> & read_context [[maybe_unused]])
+{
+
+#if defined(OS_LINUX)
+    if (!sent_query)
+    {
+        sendQuery();
+
+        if (context.getSettingsRef().skip_unavailable_shards && (0 == multiplexed_connections->size()))
+            return Block();
+    }
+
+    if (!read_context)
+    {
+        std::lock_guard lock(was_cancelled_mutex);
+        if (was_cancelled)
+            return Block();
+
+        read_context = std::make_unique<ReadContext>(*multiplexed_connections);
+    }
+
+    do
+    {
+        if (!read_context->resumeRoutine())
+            return Block();
+
+        if (read_context->is_read_in_progress)
+        {
+            read_context->setTimer();
+            return read_context->epoll_fd;
+        }
+        else
+        {
+            if (auto data = processPacket(std::move(read_context->packet)))
+                return std::move(*data);
+        }
+    }
+    while (true);
+#else
+    return read();
+#endif
+}
+
+std::optional<Block> RemoteQueryExecutor::processPacket(Packet packet)
+{
+    switch (packet.type)
+    {
+        case Protocol::Server::Data:
+            /// If the block is not empty and is not a header block
+            if (packet.block && (packet.block.rows() > 0))
+                return adaptBlockStructure(packet.block, header);
+            break;  /// If the block is empty - we will receive other packets before EndOfStream.
+
+        case Protocol::Server::Exception:
+            got_exception_from_replica = true;
+            packet.exception->rethrow();
+            break;
+
+        case Protocol::Server::EndOfStream:
+            if (!multiplexed_connections->hasActiveConnections())
+            {
+                finished = true;
+                return Block();
+            }
+            break;
+
+        case Protocol::Server::Progress:
+            /** We use the progress from a remote server.
+              * We also include in ProcessList,
+              * and we use it to check
+              * constraints (for example, the minimum speed of query execution)
+              * and quotas (for example, the number of lines to read).
+              */
+            if (progress_callback)
+                progress_callback(packet.progress);
+            break;
+
+        case Protocol::Server::ProfileInfo:
+            /// Use own (client-side) info about read bytes, it is more correct info than server-side one.
+            if (profile_info_callback)
+                profile_info_callback(packet.profile_info);
+            break;
+
+        case Protocol::Server::Totals:
+            totals = packet.block;
+            break;
+
+        case Protocol::Server::Extremes:
+            extremes = packet.block;
+            break;
+
+        case Protocol::Server::Log:
+            /// Pass logs from remote server to client
+            if (auto log_queue = CurrentThread::getInternalTextLogsQueue())
+                log_queue->pushBlock(std::move(packet.block));
+            break;
+
+        default:
+            got_unknown_packet_from_replica = true;
+            throw Exception(ErrorCodes::UNKNOWN_PACKET_FROM_SERVER, "Unknown packet {} from one of the following replicas: {}",
+                toString(packet.type),
+                multiplexed_connections->dumpAddresses());
+    }
+
+    return {};
+}
+
+void RemoteQueryExecutor::finish(std::unique_ptr<ReadContext> * read_context)
 {
     /** If one of:
       * - nothing started to do;
@@ -270,7 +323,7 @@ void RemoteQueryExecutor::finish()
       */
 
     /// Send the request to abort the execution of the request, if not already sent.
-    tryCancel("Cancelling query because enough data has been read");
+    tryCancel("Cancelling query because enough data has been read", read_context);
 
     /// Get the remaining packets so that there is no out of sync in the connections to the replicas.
     Packet packet = multiplexed_connections->drain();
@@ -299,7 +352,7 @@ void RemoteQueryExecutor::finish()
     }
 }
 
-void RemoteQueryExecutor::cancel()
+void RemoteQueryExecutor::cancel(std::unique_ptr<ReadContext> * read_context)
 {
     {
         std::lock_guard lock(external_tables_mutex);
@@ -313,7 +366,7 @@ void RemoteQueryExecutor::cancel()
     if (!isQueryPending() || hasThrownException())
         return;
 
-    tryCancel("Cancelling query");
+    tryCancel("Cancelling query", read_context);
 }
 
 void RemoteQueryExecutor::sendScalars()
@@ -365,7 +418,7 @@ void RemoteQueryExecutor::sendExternalTables()
     multiplexed_connections->sendExternalTablesData(external_tables_data);
 }
 
-void RemoteQueryExecutor::tryCancel(const char * reason)
+void RemoteQueryExecutor::tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context)
 {
     {
         /// Flag was_cancelled is atomic because it is checked in read().
@@ -375,6 +428,10 @@ void RemoteQueryExecutor::tryCancel(const char * reason)
             return;
 
         was_cancelled = true;
+
+        if (read_context && *read_context)
+            (*read_context)->cancel();
+
         multiplexed_connections->sendCancel();
     }
 
diff --git a/src/DataStreams/RemoteQueryExecutor.h b/src/DataStreams/RemoteQueryExecutor.h
index cec92a5f0e9..46d9d067563 100644
--- a/src/DataStreams/RemoteQueryExecutor.h
+++ b/src/DataStreams/RemoteQueryExecutor.h
@@ -5,6 +5,9 @@
 #include <Storages/IStorage_fwd.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/StorageID.h>
+#include <Common/FiberStack.h>
+#include <Common/TimerDescriptor.h>
+#include <variant>
 
 namespace DB
 {
@@ -20,10 +23,14 @@ using ProgressCallback = std::function<void(const Progress & progress)>;
 struct BlockStreamProfileInfo;
 using ProfileInfoCallback = std::function<void(const BlockStreamProfileInfo & info)>;
 
+class RemoteQueryExecutorReadContext;
+
 /// This class allows one to launch queries on remote replicas of one shard and get results
 class RemoteQueryExecutor
 {
 public:
+    using ReadContext = RemoteQueryExecutorReadContext;
+
     /// Takes already set connection.
     RemoteQueryExecutor(
         Connection & connection,
@@ -53,13 +60,17 @@ public:
     /// Read next block of data. Returns empty block if query is finished.
     Block read();
 
+    /// Async variant of read. Returns ready block or file descriptor which may be used for polling.
+    /// ReadContext is an internal read state. Pass empty ptr first time, reuse created one for every call.
+    std::variant<Block, int> read(std::unique_ptr<ReadContext> & read_context);
+
     /// Receive all remain packets and finish query.
     /// It should be cancelled after read returned empty block.
-    void finish();
+    void finish(std::unique_ptr<ReadContext> * read_context = nullptr);
 
     /// Cancel query execution. Sends Cancel packet and ignore others.
     /// This method may be called from separate thread.
-    void cancel();
+    void cancel(std::unique_ptr<ReadContext> * read_context = nullptr);
 
     /// Get totals and extremes if any.
     Block getTotals() { return std::move(totals); }
@@ -153,13 +164,16 @@ private:
     void sendExternalTables();
 
     /// If wasn't sent yet, send request to cancel all connections to replicas
-    void tryCancel(const char * reason);
+    void tryCancel(const char * reason, std::unique_ptr<ReadContext> * read_context);
 
     /// Returns true if query was sent
     bool isQueryPending() const;
 
     /// Returns true if exception was thrown
     bool hasThrownException() const;
+
+    /// Process packet for read and return data block if possible.
+    std::optional<Block> processPacket(Packet packet);
 };
 
 }
diff --git a/src/DataStreams/RemoteQueryExecutorReadContext.h b/src/DataStreams/RemoteQueryExecutorReadContext.h
new file mode 100644
index 00000000000..f8c64954b83
--- /dev/null
+++ b/src/DataStreams/RemoteQueryExecutorReadContext.h
@@ -0,0 +1,272 @@
+#pragma once
+
+#if defined(OS_LINUX)
+
+#include <sys/epoll.h>
+#include <Common/Fiber.h>
+#include <Common/FiberStack.h>
+#include <Common/TimerDescriptor.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_READ_FROM_SOCKET;
+    extern const int CANNOT_OPEN_FILE;
+    extern const int SOCKET_TIMEOUT;
+}
+
+class RemoteQueryExecutorReadContext
+{
+public:
+    using Self = RemoteQueryExecutorReadContext;
+
+    bool is_read_in_progress = false;
+    Packet packet;
+
+    std::exception_ptr exception;
+    FiberStack stack;
+    boost::context::fiber fiber;
+    /// This mutex for fiber is needed because fiber could be destroyed in cancel method from another thread.
+    std::mutex fiber_lock;
+
+    Poco::Timespan receive_timeout;
+    MultiplexedConnections & connections;
+    Poco::Net::Socket * last_used_socket = nullptr;
+
+    /// Here we have three descriptors we are going to wait:
+    /// * socket_fd is a descriptor of connection. It may be changed in case of reading from several replicas.
+    /// * timer is a timerfd descriptor to manually check socket timeout
+    /// * pipe_fd is a pipe we use to cancel query and socket polling by executor.
+    /// We put those descriptors into our own epoll_fd which is used by external executor.
+    TimerDescriptor timer{CLOCK_MONOTONIC, 0};
+    int socket_fd = -1;
+    int epoll_fd;
+    int pipe_fd[2];
+
+    explicit RemoteQueryExecutorReadContext(MultiplexedConnections & connections_) : connections(connections_)
+    {
+        epoll_fd = epoll_create(2);
+        if (-1 == epoll_fd)
+            throwFromErrno("Cannot create epoll descriptor", ErrorCodes::CANNOT_OPEN_FILE);
+
+        if (-1 == pipe2(pipe_fd, O_NONBLOCK))
+            throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
+
+        {
+            epoll_event socket_event;
+            socket_event.events = EPOLLIN | EPOLLPRI;
+            socket_event.data.fd = pipe_fd[0];
+
+            if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fd[0], &socket_event))
+                throwFromErrno("Cannot add pipe descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+        }
+
+        {
+            epoll_event timer_event;
+            timer_event.events = EPOLLIN | EPOLLPRI;
+            timer_event.data.fd = timer.getDescriptor();
+
+            if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, timer_event.data.fd, &timer_event))
+                throwFromErrno("Cannot add timer descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+        }
+
+        auto routine = Routine{connections, *this};
+        fiber = boost::context::fiber(std::allocator_arg_t(), stack, std::move(routine));
+    }
+
+    void setSocket(Poco::Net::Socket & socket)
+    {
+        int fd = socket.impl()->sockfd();
+        if (fd == socket_fd)
+            return;
+
+        epoll_event socket_event;
+        socket_event.events = EPOLLIN | EPOLLPRI;
+        socket_event.data.fd = fd;
+
+        if (socket_fd != -1)
+        {
+            if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_DEL, socket_fd, &socket_event))
+                throwFromErrno("Cannot remove socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+        }
+
+        socket_fd = fd;
+
+        if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, socket_fd, &socket_event))
+            throwFromErrno("Cannot add socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+
+        receive_timeout = socket.impl()->getReceiveTimeout();
+    }
+
+    bool checkTimeout() const
+    {
+        try
+        {
+            return checkTimeoutImpl();
+        }
+        catch (DB::Exception & e)
+        {
+            if (last_used_socket)
+                e.addMessage(" while reading from socket ({})", last_used_socket->peerAddress().toString());
+            throw;
+        }
+    }
+
+    bool checkTimeoutImpl() const
+    {
+        epoll_event events[3];
+        events[0].data.fd = events[1].data.fd = events[2].data.fd = -1;
+
+        /// Wait for epoll_fd will not block if it was polled externally.
+        int num_events = epoll_wait(epoll_fd, events, 3, 0);
+        if (num_events == -1)
+            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+
+        bool is_socket_ready = false;
+        bool is_pipe_alarmed = false;
+        bool has_timer_alarm = false;
+
+        for (int i = 0; i < num_events; ++i)
+        {
+            if (events[i].data.fd == socket_fd)
+                is_socket_ready = true;
+            if (events[i].data.fd == timer.getDescriptor())
+                has_timer_alarm = true;
+            if (events[i].data.fd == pipe_fd[0])
+                is_pipe_alarmed = true;
+        }
+
+        if (is_pipe_alarmed)
+            return false;
+
+        if (has_timer_alarm && !is_socket_ready)
+        {
+            /// Socket receive timeout. Drain it in case or error, or it may be hide by timeout exception.
+            timer.drain();
+            throw NetException("Timeout exceeded", ErrorCodes::SOCKET_TIMEOUT);
+        }
+
+        return true;
+    }
+
+    void setTimer() const
+    {
+        /// Did not get packet yet. Init timeout for the next async reading.
+        timer.reset();
+
+        if (receive_timeout.totalMicroseconds())
+            timer.setRelative(receive_timeout);
+    }
+
+    bool resumeRoutine()
+    {
+        if (is_read_in_progress && !checkTimeout())
+            return false;
+
+        {
+            std::lock_guard guard(fiber_lock);
+            if (!fiber)
+                return false;
+
+            fiber = std::move(fiber).resume();
+        }
+
+        if (exception)
+            std::rethrow_exception(std::move(exception));
+
+        return true;
+    }
+
+    void cancel()
+    {
+        std::lock_guard guard(fiber_lock);
+        /// It is safe to just destroy fiber - we are not in the process of reading from socket.
+        boost::context::fiber to_destroy = std::move(fiber);
+
+        /// Send something to pipe to cancel executor waiting.
+        uint64_t buf = 0;
+        while (-1 == write(pipe_fd[1], &buf, sizeof(buf)))
+        {
+            if (errno == EAGAIN)
+                break;
+
+            if (errno != EINTR)
+                throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+        }
+    }
+
+    ~RemoteQueryExecutorReadContext()
+    {
+        /// socket_fd is closed by Poco::Net::Socket
+        /// timer_fd is closed by TimerDescriptor
+        close(epoll_fd);
+    }
+
+    struct Routine
+    {
+        MultiplexedConnections & connections;
+        Self & read_context;
+
+        struct ReadCallback
+        {
+            Self & read_context;
+            Fiber & fiber;
+
+            void operator()(Poco::Net::Socket & socket)
+            {
+                try
+                {
+                    read_context.setSocket(socket);
+                }
+                catch (DB::Exception & e)
+                {
+                    e.addMessage(" while reading from socket ({})", socket.peerAddress().toString());
+                    throw;
+                }
+
+                read_context.is_read_in_progress = true;
+                fiber = std::move(fiber).resume();
+                read_context.is_read_in_progress = false;
+            }
+        };
+
+        Fiber operator()(Fiber && sink) const
+        {
+            try
+            {
+                while (true)
+                {
+                    read_context.packet = connections.receivePacketUnlocked(ReadCallback{read_context, sink});
+                    sink = std::move(sink).resume();
+                }
+            }
+            catch (const boost::context::detail::forced_unwind &)
+            {
+                /// This exception is thrown by fiber implementation in case if fiber is being deleted but hasn't exited
+                /// It should not be caught or it will segfault.
+                /// Other exceptions must be caught
+                throw;
+            }
+            catch (...)
+            {
+                read_context.exception = std::current_exception();
+            }
+
+            return std::move(sink);
+        }
+    };
+};
+}
+#else
+namespace DB
+{
+class RemoteQueryExecutorReadContext
+{
+public:
+    void cancel() {}
+};
+
+}
+#endif
diff --git a/src/DataStreams/ya.make b/src/DataStreams/ya.make
index 776578af131..858bf7081e7 100644
--- a/src/DataStreams/ya.make
+++ b/src/DataStreams/ya.make
@@ -6,6 +6,7 @@ LIBRARY()
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/poco/MongoDB
+    contrib/restricted/boost/libs
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/src/DataStreams/ya.make.in b/src/DataStreams/ya.make.in
index d6a683daa66..1624ddb799f 100644
--- a/src/DataStreams/ya.make.in
+++ b/src/DataStreams/ya.make.in
@@ -5,6 +5,7 @@ LIBRARY()
 PEERDIR(
     clickhouse/src/Common
     contrib/libs/poco/MongoDB
+    contrib/restricted/boost/libs
 )
 
 NO_COMPILER_WARNINGS()
diff --git a/src/DataTypes/DataTypeArray.cpp b/src/DataTypes/DataTypeArray.cpp
index ed570beeda6..9cd56d0e2b5 100644
--- a/src/DataTypes/DataTypeArray.cpp
+++ b/src/DataTypes/DataTypeArray.cpp
@@ -272,7 +272,7 @@ void DataTypeArray::deserializeBinaryBulkWithMultipleStreams(
     /// Check consistency between offsets and elements subcolumns.
     /// But if elements column is empty - it's ok for columns of Nested types that was added by ALTER.
     if (!nested_column.empty() && nested_column.size() != last_offset)
-        throw Exception("Cannot read all array values: read just " + toString(nested_column.size()) + " of " + toString(last_offset),
+        throw ParsingException("Cannot read all array values: read just " + toString(nested_column.size()) + " of " + toString(last_offset),
             ErrorCodes::CANNOT_READ_ALL_DATA);
 }
 
@@ -300,7 +300,7 @@ static void serializeTextImpl(const IColumn & column, size_t row_num, WriteBuffe
 
 
 template <typename Reader>
-static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested)
+static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && read_nested, bool allow_unenclosed)
 {
     ColumnArray & column_array = assert_cast<ColumnArray &>(column);
     ColumnArray::Offsets & offsets = column_array.getOffsets();
@@ -308,7 +308,12 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
     IColumn & nested_column = column_array.getData();
 
     size_t size = 0;
-    assertChar('[', istr);
+
+    bool has_braces = false;
+    if (checkChar('[', istr))
+        has_braces = true;
+    else if (!allow_unenclosed)
+        throw Exception(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT, "Array does not start with '[' character");
 
     try
     {
@@ -320,7 +325,9 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
                 if (*istr.position() == ',')
                     ++istr.position();
                 else
-                    throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
+                    throw ParsingException(ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT,
+                        "Cannot read array from text, expected comma or end of array, found '{}'",
+                        *istr.position());
             }
 
             first = false;
@@ -335,7 +342,11 @@ static void deserializeTextImpl(IColumn & column, ReadBuffer & istr, Reader && r
 
             skipWhitespaceIfAny(istr);
         }
-        assertChar(']', istr);
+
+        if (has_braces)
+            assertChar(']', istr);
+        else /// If array is not enclosed in braces, we read until EOF.
+            assertEOF(istr);
     }
     catch (...)
     {
@@ -364,7 +375,7 @@ void DataTypeArray::deserializeText(IColumn & column, ReadBuffer & istr, const F
         [&](IColumn & nested_column)
         {
             nested->deserializeAsTextQuoted(nested_column, istr, settings);
-        });
+        }, false);
 }
 
 void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, WriteBuffer & ostr, const FormatSettings & settings) const
@@ -390,7 +401,11 @@ void DataTypeArray::serializeTextJSON(const IColumn & column, size_t row_num, Wr
 
 void DataTypeArray::deserializeTextJSON(IColumn & column, ReadBuffer & istr, const FormatSettings & settings) const
 {
-    deserializeTextImpl(column, istr, [&](IColumn & nested_column) { nested->deserializeAsTextJSON(nested_column, istr, settings); });
+    deserializeTextImpl(column, istr,
+        [&](IColumn & nested_column)
+        {
+            nested->deserializeAsTextJSON(nested_column, istr, settings);
+        }, false);
 }
 
 
@@ -429,7 +444,23 @@ void DataTypeArray::deserializeTextCSV(IColumn & column, ReadBuffer & istr, cons
     String s;
     readCSV(s, istr, settings.csv);
     ReadBufferFromString rb(s);
-    deserializeText(column, rb, settings);
+
+    if (settings.csv.input_format_arrays_as_nested_csv)
+    {
+        deserializeTextImpl(column, rb,
+            [&](IColumn & nested_column)
+            {
+                nested->deserializeAsTextCSV(nested_column, rb, settings);
+            }, true);
+    }
+    else
+    {
+        deserializeTextImpl(column, rb,
+            [&](IColumn & nested_column)
+            {
+                nested->deserializeAsTextQuoted(nested_column, rb, settings);
+            }, true);
+    }
 }
 
 
diff --git a/src/DataTypes/DataTypeNullable.cpp b/src/DataTypes/DataTypeNullable.cpp
index ed501939901..a0fc8baaf7e 100644
--- a/src/DataTypes/DataTypeNullable.cpp
+++ b/src/DataTypes/DataTypeNullable.cpp
@@ -235,7 +235,7 @@ ReturnType DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer
     /// Little tricky, because we cannot discriminate null from first character.
 
     if (istr.eof())
-        throw Exception("Unexpected end of stream, while parsing value of Nullable type", ErrorCodes::CANNOT_READ_ALL_DATA);
+        throw ParsingException("Unexpected end of stream, while parsing value of Nullable type", ErrorCodes::CANNOT_READ_ALL_DATA);
 
     /// This is not null, surely.
     if (*istr.position() != '\\')
@@ -250,7 +250,7 @@ ReturnType DataTypeNullable::deserializeTextEscaped(IColumn & column, ReadBuffer
         ++istr.position();
 
         if (istr.eof())
-            throw Exception("Unexpected end of stream, while parsing value of Nullable type, after backslash", ErrorCodes::CANNOT_READ_ALL_DATA);
+            throw ParsingException("Unexpected end of stream, while parsing value of Nullable type, after backslash", ErrorCodes::CANNOT_READ_ALL_DATA);
 
         return safeDeserialize<ReturnType>(column, *nested_data_type,
             [&istr]
@@ -405,11 +405,11 @@ ReturnType DataTypeNullable::deserializeTextCSV(IColumn & column, ReadBuffer & i
                 /// or if someone uses 'U' or 'L' as delimiter in CSV.
                 /// In the first case we cannot continue reading anyway. The second case seems to be unlikely.
                 if (settings.csv.delimiter == 'U' || settings.csv.delimiter == 'L')
-                    throw DB::Exception("Enabled setting input_format_csv_unquoted_null_literal_as_null may not work correctly "
+                    throw DB::ParsingException("Enabled setting input_format_csv_unquoted_null_literal_as_null may not work correctly "
                                         "with format_csv_delimiter = 'U' or 'L' for large input.", ErrorCodes::CANNOT_READ_ALL_DATA);
                 WriteBufferFromOwnString parsed_value;
                 nested_data_type->serializeAsTextCSV(nested, nested.size() - 1, parsed_value, settings);
-                throw DB::Exception("Error while parsing \"" + std::string(null_literal, null_prefix_len)
+                throw DB::ParsingException("Error while parsing \"" + std::string(null_literal, null_prefix_len)
                                     + std::string(istr.position(), std::min(size_t{10}, istr.available())) + "\" as Nullable(" + nested_data_type->getName()
                                     + ") at position " + std::to_string(istr.count()) + ": expected \"NULL\" or " + nested_data_type->getName()
                                     + ", got \"" + std::string(null_literal, buf.count()) + "\", which was deserialized as \""
diff --git a/src/Dictionaries/ExternalQueryBuilder.h b/src/Dictionaries/ExternalQueryBuilder.h
index 4c0e876b5db..9f9ccd65001 100644
--- a/src/Dictionaries/ExternalQueryBuilder.h
+++ b/src/Dictionaries/ExternalQueryBuilder.h
@@ -2,6 +2,7 @@
 
 #include <string>
 #include <Columns/IColumn.h>
+#include <Dictionaries/DictionaryStructure.h>
 #include <Formats/FormatSettings.h>
 #include <Parsers/IdentifierQuotingStyle.h>
 
@@ -16,11 +17,11 @@ class WriteBuffer;
   */
 struct ExternalQueryBuilder
 {
-    const DictionaryStructure & dict_struct;
-    std::string db;
-    std::string schema;
-    std::string table;
-    const std::string & where;
+    const DictionaryStructure dict_struct;
+    const std::string db;
+    const std::string schema;
+    const std::string table;
+    const std::string where;
 
     IdentifierQuotingStyle quoting_style;
 
diff --git a/src/Formats/FormatFactory.cpp b/src/Formats/FormatFactory.cpp
index da63151613e..877f62dfb34 100644
--- a/src/Formats/FormatFactory.cpp
+++ b/src/Formats/FormatFactory.cpp
@@ -64,6 +64,7 @@ FormatSettings getFormatSettings(const Context & context,
     format_settings.csv.empty_as_default = settings.input_format_defaults_for_omitted_fields;
     format_settings.csv.input_format_enum_as_number = settings.input_format_csv_enum_as_number;
     format_settings.csv.unquoted_null_literal_as_null = settings.input_format_csv_unquoted_null_literal_as_null;
+    format_settings.csv.input_format_arrays_as_nested_csv = settings.input_format_csv_arrays_as_nested_csv;
     format_settings.custom.escaping_rule = settings.format_custom_escaping_rule;
     format_settings.custom.field_delimiter = settings.format_custom_field_delimiter;
     format_settings.custom.result_after_delimiter = settings.format_custom_result_after_delimiter;
@@ -162,7 +163,7 @@ BlockInputStreamPtr FormatFactory::getInput(
     // (segmentator + two parsers + reader).
     bool parallel_parsing = settings.input_format_parallel_parsing && file_segmentation_engine && settings.max_threads >= 4;
 
-    if (settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage)
+    if (settings.max_memory_usage && settings.min_chunk_bytes_for_parallel_parsing * settings.max_threads * 2 > settings.max_memory_usage)
         parallel_parsing = false;
 
     if (parallel_parsing && name == "JSONEachRow")
@@ -258,7 +259,6 @@ InputFormatPtr FormatFactory::getInputFormat(
 
     auto format = input_getter(buf, sample, params, format_settings);
 
-
     /// It's a kludge. Because I cannot remove context from values format.
     if (auto * values = typeid_cast<ValuesBlockInputFormat *>(format.get()))
         values->setContext(context);
diff --git a/src/Formats/FormatFactory.h b/src/Formats/FormatFactory.h
index 0fe6f19f0b7..d78cc2e2740 100644
--- a/src/Formats/FormatFactory.h
+++ b/src/Formats/FormatFactory.h
@@ -54,7 +54,7 @@ public:
       * Reads at least min_chunk_bytes and some more until the end of the chunk, depends on the format.
       * Used in ParallelParsingBlockInputStream.
       */
-    using FileSegmentationEngine = std::function<bool(
+    using FileSegmentationEngine = std::function<std::pair<bool, size_t>(
         ReadBuffer & buf,
         DB::Memory<> & memory,
         size_t min_chunk_bytes)>;
diff --git a/src/Formats/FormatSettings.h b/src/Formats/FormatSettings.h
index 8fe3756e012..3f031fa2311 100644
--- a/src/Formats/FormatSettings.h
+++ b/src/Formats/FormatSettings.h
@@ -71,6 +71,7 @@ struct FormatSettings
         bool empty_as_default = false;
         bool crlf_end_of_line = false;
         bool input_format_enum_as_number = false;
+        bool input_format_arrays_as_nested_csv = false;
     } csv;
 
     struct Custom
diff --git a/src/Formats/JSONEachRowUtils.cpp b/src/Formats/JSONEachRowUtils.cpp
index a1d9b4a5fff..6017f3983c6 100644
--- a/src/Formats/JSONEachRowUtils.cpp
+++ b/src/Formats/JSONEachRowUtils.cpp
@@ -4,13 +4,14 @@
 namespace DB
 {
 
-bool fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {
     skipWhitespaceIfAny(in);
 
     char * pos = in.position();
     size_t balance = 0;
     bool quotes = false;
+    size_t number_of_rows = 0;
 
     while (loadAtPosition(in, memory, pos) && (balance || memory.size() + static_cast<size_t>(pos - in.position()) < min_chunk_size))
     {
@@ -57,11 +58,14 @@ bool fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memor
                 quotes = true;
                 ++pos;
             }
+
+            if (balance == 0)
+                ++number_of_rows;
         }
     }
 
     saveUpToPosition(in, memory, pos);
-    return loadAtPosition(in, memory, pos);
+    return {loadAtPosition(in, memory, pos), number_of_rows};
 }
 
 }
diff --git a/src/Formats/JSONEachRowUtils.h b/src/Formats/JSONEachRowUtils.h
index 92679fe3589..adf85f37a22 100644
--- a/src/Formats/JSONEachRowUtils.h
+++ b/src/Formats/JSONEachRowUtils.h
@@ -3,6 +3,6 @@
 namespace DB
 {
 
-bool fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size);
+std::pair<bool, size_t> fileSegmentationEngineJSONEachRowImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size);
 
 }
diff --git a/src/Functions/ExtractString.h b/src/Functions/ExtractString.h
new file mode 100644
index 00000000000..c4251f8c4a6
--- /dev/null
+++ b/src/Functions/ExtractString.h
@@ -0,0 +1,166 @@
+#pragma once
+#include <Common/PODArray.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/UTF8Helpers.h>
+
+#include <algorithm>
+#include <climits>
+#include <cstring>
+#include <memory>
+#include <utility>
+
+#ifdef __SSE4_2__
+#    include <nmmintrin.h>
+#endif
+
+namespace DB
+{
+// used by FunctionsStringSimilarity and FunctionsStringHash
+// includes extracting ASCII ngram, UTF8 ngram, ASCII word and UTF8 word
+template <size_t N, bool CaseInsensitive>
+struct ExtractStringImpl
+{
+    /// Padding form ColumnsString. It is a number of bytes we can always read starting from pos if pos < end.
+    static constexpr size_t default_padding = 16;
+
+    /// Functions are read `default_padding - (N - 1)` bytes into the buffer. Window of size N is used.
+    /// Read copies `N - 1` last bytes from buffer into beginning, and then reads new bytes.
+    static constexpr size_t buffer_size = default_padding + N - 1;
+
+    // the length of code_points = buffer_size
+    // pos: the current beginning location that we want to copy data
+    // end: the end location of the string
+    static ALWAYS_INLINE size_t readASCIICodePoints(UInt8 * code_points, const char *& pos, const char * end)
+    {
+        /// Offset before which we copy some data.
+        constexpr size_t padding_offset = default_padding - N + 1;
+        /// We have an array like this for ASCII (N == 4, other cases are similar)
+        /// |a0|a1|a2|a3|a4|a5|a6|a7|a8|a9|a10|a11|a12|a13|a14|a15|a16|a17|a18|
+        /// And we copy                                ^^^^^^^^^^^^^^^ these bytes to the start
+        /// Actually it is enough to copy 3 bytes, but memcpy for 4 bytes translates into 1 instruction
+        memcpy(code_points, code_points + padding_offset, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(UInt8));
+        /// Now we have an array
+        /// |a13|a14|a15|a16|a4|a5|a6|a7|a8|a9|a10|a11|a12|a13|a14|a15|a16|a17|a18|
+        ///              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+        /// Doing unaligned read of 16 bytes and copy them like above
+        /// 16 is also chosen to do two `movups`.
+        /// Such copying allow us to have 3 codepoints from the previous read to produce the 4-grams with them.
+        memcpy(code_points + (N - 1), pos, default_padding * sizeof(UInt8));
+
+        if constexpr (CaseInsensitive)
+        {
+            /// We really need template lambdas with C++20 to do it inline
+            unrollLowering<N - 1>(code_points, std::make_index_sequence<padding_offset>());
+        }
+        pos += padding_offset;
+        if (pos > end)
+            return default_padding - (pos - end);
+        return default_padding;
+    }
+
+    // read a ASCII word
+    static ALWAYS_INLINE inline size_t readOneASCIIWord(PaddedPODArray<UInt8> & word_buf, const char *& pos, const char * end)
+    {
+        // jump seperators
+        while (pos < end && !isAlphaNumericASCII(*pos))
+            ++pos;
+
+        // word start from here
+        const char * word_start = pos;
+        while (pos < end && isAlphaNumericASCII(*pos))
+            ++pos;
+
+        word_buf.assign(word_start, pos);
+        if (CaseInsensitive)
+        {
+            std::transform(word_buf.begin(), word_buf.end(), word_buf.begin(), [](UInt8 c) { return std::tolower(c); });
+        }
+        return word_buf.size();
+    }
+
+    static ALWAYS_INLINE inline size_t readUTF8CodePoints(UInt32 * code_points, const char *& pos, const char * end)
+    {
+        memcpy(code_points, code_points + default_padding - N + 1, roundUpToPowerOfTwoOrZero(N - 1) * sizeof(UInt32));
+
+        size_t num = N - 1;
+        while (num < default_padding && pos < end)
+        {
+            code_points[num++] = readOneUTF8Code(pos, end);
+        }
+        return num;
+    }
+
+    // read one UTF8 word from pos to word
+    static ALWAYS_INLINE inline size_t readOneUTF8Word(PaddedPODArray<UInt32> & word_buf, const char *& pos, const char * end)
+    {
+        // jump UTF8 seperator
+        while (pos < end && isUTF8Sep(*pos))
+            ++pos;
+        word_buf.clear();
+        // UTF8 word's character number
+        while (pos < end && !isUTF8Sep(*pos))
+        {
+            word_buf.push_back(readOneUTF8Code(pos, end));
+        }
+        return word_buf.size();
+    }
+
+private:
+    template <size_t Offset, typename Container, size_t... I>
+    static ALWAYS_INLINE inline void unrollLowering(Container & cont, const std::index_sequence<I...> &)
+    {
+        ((cont[Offset + I] = std::tolower(cont[Offset + I])), ...);
+    }
+
+    // we use ASCII non-alphanum character as UTF8 seperator
+    static ALWAYS_INLINE inline bool isUTF8Sep(const UInt8 c) { return c < 128 && !isAlphaNumericASCII(c); }
+
+    // read one UTF8 character and return it
+    static ALWAYS_INLINE inline UInt32 readOneUTF8Code(const char *& pos, const char * end)
+    {
+        size_t length = UTF8::seqLength(*pos);
+
+        if (pos + length > end)
+            length = end - pos;
+        UInt32 res;
+        switch (length)
+        {
+            case 1:
+                res = 0;
+                memcpy(&res, pos, 1);
+                break;
+            case 2:
+                res = 0;
+                memcpy(&res, pos, 2);
+                break;
+            case 3:
+                res = 0;
+                memcpy(&res, pos, 3);
+                break;
+            default:
+                memcpy(&res, pos, 4);
+        }
+
+        if constexpr (CaseInsensitive)
+        {
+            switch (length)
+            {
+                case 4:
+                    res &= ~(1u << (5 + 3 * CHAR_BIT));
+                    [[fallthrough]];
+                case 3:
+                    res &= ~(1u << (5 + 2 * CHAR_BIT));
+                    [[fallthrough]];
+                case 2:
+                    res &= ~(1u);
+                    res &= ~(1u << (5 + CHAR_BIT));
+                    [[fallthrough]];
+                default:
+                    res &= ~(1u << 5);
+            }
+        }
+        pos += length;
+        return res;
+    }
+};
+}
diff --git a/src/Functions/FunctionsComparison.h b/src/Functions/FunctionsComparison.h
index e674f8690ff..957c7e0ab3e 100644
--- a/src/Functions/FunctionsComparison.h
+++ b/src/Functions/FunctionsComparison.h
@@ -1216,7 +1216,10 @@ public:
         {
             return res;
         }
-        else if (isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))
+        else if ((isColumnedAsDecimal(left_type) || isColumnedAsDecimal(right_type))
+                 // Comparing Date and DateTime64 requires implicit conversion,
+                 // otherwise Date is treated as number.
+                 && !(date_and_datetime && (isDate(left_type) || isDate(right_type))))
         {
             // compare
             if (!allowDecimalComparison(left_type, right_type) && !date_and_datetime)
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
new file mode 100644
index 00000000000..d57be67ef7f
--- /dev/null
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -0,0 +1,626 @@
+#include <Functions/FunctionsStringHash.h>
+
+#include <Functions/ExtractString.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsHashing.h>
+#include <Common/HashTable/ClearableHashMap.h>
+#include <Common/HashTable/Hash.h>
+#include <Common/PODArray.h>
+
+#include <Core/Defines.h>
+
+#include <bitset>
+#include <functional>
+#include <memory>
+#include <tuple>
+#include <vector>
+#include <common/unaligned.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
+struct Hash
+{
+    static UInt64 crc32u64(UInt64 crc [[maybe_unused]], UInt64 val [[maybe_unused]])
+    {
+#ifdef __SSE4_2__
+        return _mm_crc32_u64(crc, val);
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+        return __crc32cd(crc, val);
+#else
+        throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
+#endif
+    }
+
+    static UInt64 crc32u32(UInt64 crc [[maybe_unused]], UInt32 val [[maybe_unused]])
+    {
+#ifdef __SSE4_2__
+        return _mm_crc32_u32(crc, val);
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+        return __crc32cw(crc, val);
+#else
+        throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
+#endif
+    }
+
+    static UInt64 crc32u8(UInt64 crc [[maybe_unused]], UInt8 val [[maybe_unused]])
+    {
+#ifdef __SSE4_2__
+        return _mm_crc32_u8(crc, val);
+#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
+        return __crc32cb(crc, val);
+#else
+        throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
+#endif
+    }
+
+    static ALWAYS_INLINE inline UInt64 ngramASCIIHash(const UInt8 * code_points)
+    {
+        return crc32u64(-1ULL, unalignedLoad<UInt32>(code_points));
+    }
+
+    static ALWAYS_INLINE inline UInt64 ngramUTF8Hash(const UInt32 * code_points)
+    {
+        UInt64 crc = -1ULL;
+        crc = crc32u64(crc, code_points[0]);
+        crc = crc32u64(crc, code_points[1]);
+        crc = crc32u64(crc, code_points[2]);
+        return crc;
+    }
+
+    static ALWAYS_INLINE inline UInt64 wordShinglesHash(const UInt64 * hashes, size_t size, size_t offset)
+    {
+        UInt64 crc1 = -1ULL;
+        UInt64 crc2 = -1ULL;
+
+        for (size_t i = offset; i < size; i += 2)
+            crc1 = crc32u64(crc1, hashes[i]);
+        for (size_t i = offset + 1; i < size; i += 2)
+            crc2 = crc32u64(crc2, hashes[i]);
+
+        if ((size - offset) & 1)
+        {
+            for (size_t i = 0; i < offset; i += 2)
+                crc2 = crc32u64(crc2, hashes[i]);
+            for (size_t i = 1; i < offset; i += 2)
+                crc1 = crc32u64(crc1, hashes[i]);
+        }
+        else
+        {
+            for (size_t i = 0; i < offset; i += 2)
+                crc1 = crc32u64(crc1, hashes[i]);
+            for (size_t i = 1; i < offset; i += 2)
+                crc2 = crc32u64(crc2, hashes[i]);
+        }
+
+        return crc1 | (crc2 << 32u);
+    }
+
+    static ALWAYS_INLINE inline UInt64 hashSum(const UInt8 * hashes [[maybe_unused]], size_t K [[maybe_unused]])
+    {
+        UInt64 crc1 = -1ULL;
+        UInt64 crc2 = -1ULL;
+
+        for (size_t i = 0; i < K; i += 2)
+            crc1 = crc32u8(crc1, hashes[i]);
+        for (size_t i = 1; i < K; i += 2)
+            crc2 = crc32u8(crc2, hashes[i]);
+
+        return crc1 | (crc2 << 32u);
+    }
+
+    static ALWAYS_INLINE inline UInt64 hashSum(const UInt32 * hashes [[maybe_unused]], size_t K [[maybe_unused]])
+    {
+        UInt64 crc1 = -1ULL;
+        UInt64 crc2 = -1ULL;
+
+        for (size_t i = 0; i < K; i += 2)
+            crc1 = crc32u32(crc1, hashes[i]);
+        for (size_t i = 1; i < K; i += 2)
+            crc2 = crc32u32(crc2, hashes[i]);
+
+        return crc1 | (crc2 << 32u);
+    }
+
+    static ALWAYS_INLINE inline UInt64 hashSum(const UInt64 * hashes, size_t K)
+    {
+        UInt64 crc1 = -1ULL;
+        UInt64 crc2 = -1ULL;
+
+        for (size_t i = 0; i < K; i += 2)
+            crc1 = crc32u64(crc1, hashes[i]);
+        for (size_t i = 1; i < K; i += 2)
+            crc2 = crc32u64(crc2, hashes[i]);
+
+        return crc1 | (crc2 << 32u);
+    }
+};
+
+// Simhash String -> UInt64
+// N: the length of ngram or words shingles
+// CodePoint: UInt8(ASCII) or UInt32(UTF8)
+// UTF8: means ASCII or UTF8, these two parameters CodePoint and UTF8 can only be (UInt8, false) or (UInt32, true)
+// Ngram: means ngram(true) or words shingles(false)
+// CaseInsensitive: means should we consider about letter case or not
+template <size_t N, typename CodePoint, bool UTF8, bool Ngram, bool CaseInsensitive>
+struct SimhashImpl
+{
+    using StrOp = ExtractStringImpl<N, CaseInsensitive>;
+    // we made an assumption that the size of one word cann't exceed 128, which may not true
+    // if some word's size exceed 128, it would be cut up to several word
+    static constexpr size_t max_string_size = 1u << 15;
+    static constexpr size_t simultaneously_codepoints_num = StrOp::buffer_size;
+
+    // Simhash ngram calculate function: String ->UInt64
+    // this function extracting ngram from input string, and maintain a 64-dimensions vector
+    // for each ngram, calculate a 64 bit hash value, and update the vector according the hash value
+    // finally return a 64 bit value(UInt64), i'th bit is 1 means vector[i] > 0, otherwise, vector[i] < 0
+    static ALWAYS_INLINE inline UInt64 ngramCalculateHashValue(
+        const char * data,
+        size_t size,
+        size_t (*read_code_points)(CodePoint *, const char *&, const char *),
+        UInt64 (*hash_functor)(const CodePoint *))
+    {
+        const char * start = data;
+        const char * end = data + size;
+        // fingerprint vector, all dimensions initialized to zero at the first
+        Int64 finger_vec[64] = {};
+        CodePoint cp[simultaneously_codepoints_num] = {};
+
+        size_t found = read_code_points(cp, start, end);
+        size_t iter = N - 1;
+
+        do
+        {
+            for (; iter + N <= found; ++iter)
+            {
+                // for each ngram, we can calculate an 64 bit hash
+                // then update finger_vec according to this hash value
+                // if the i'th bit is 1, finger_vec[i] plus 1, otherwise minus 1
+                UInt64 hash_value = hash_functor(cp + iter);
+                std::bitset<64> bits(hash_value);
+                for (size_t i = 0; i < 64; ++i)
+                {
+                    finger_vec[i] += ((bits.test(i)) ? 1 : -1);
+                }
+            }
+            iter = 0;
+        } while (start < end && (found = read_code_points(cp, start, end)));
+
+        // finally, we return a 64 bit value according to finger_vec
+        // if finger_vec[i] > 0, the i'th bit of the value is 1, otherwise 0
+        std::bitset<64> res_bit(0u);
+        for (size_t i = 0; i < 64; ++i)
+        {
+            if (finger_vec[i] > 0)
+                res_bit.set(i);
+        }
+        return res_bit.to_ullong();
+    }
+
+    // Simhash word shingle calculate funtion: String -> UInt64
+    // this function extracting n word shingle from input string, and maintain a 64-dimensions vector as well
+    // for each word shingle, calculate a 64 bit hash value, and update the vector according the hash value
+    // finally return a 64 bit value(UInt64), i'th bit is 1 means vector[i] > 0, otherwise, vector[i] < 0
+    //
+    // word shingle hash value calculate:
+    // 1. at the first, extracts N word shingles and calculate N hash values, store into an array, use this N hash values
+    // to calculate the first word shingle hash value
+    // 2. next, we extrac one word each time, and calculate a new hash value of the new word,then use the latest N hash
+    // values to caculate the next word shingle hash value
+    static ALWAYS_INLINE inline UInt64 wordShinglesCalculateHashValue(
+        const char * data,
+        size_t size,
+        size_t (*read_one_word)(PaddedPODArray<CodePoint> &, const char *&, const char *),
+        UInt64 (*hash_functor)(const UInt64 *, size_t, size_t))
+    {
+        const char * start = data;
+        const char * end = data + size;
+
+        // Also, a 64 bit vector initialized to zero
+        Int64 finger_vec[64] = {};
+        // a array to store N word hash values
+        UInt64 nword_hashes[N] = {};
+        // word buffer to store one word
+        PaddedPODArray<CodePoint> word_buf;
+        // get first word shingle
+        for (size_t i = 0; i < N && start < end; ++i)
+        {
+            read_one_word(word_buf, start, end);
+            if (!word_buf.empty())
+            {
+                // for each word, calculate a hash value and stored into the array
+                nword_hashes[i++] = Hash::hashSum(word_buf.data(), word_buf.size());
+            }
+        }
+
+        // calculate the first word shingle hash value
+        UInt64 hash_value = hash_functor(nword_hashes, N, 0);
+        std::bitset<64> first_bits(hash_value);
+        for (size_t i = 0; i < 64; ++i)
+        {
+            finger_vec[i] += ((first_bits.test(i)) ? 1 : -1);
+        }
+
+        size_t offset = 0;
+        while (start < end && read_one_word(word_buf, start, end))
+        {
+            // we need to store the new word hash value to the oldest location.
+            // for example, N = 5, array |a0|a1|a2|a3|a4|, now , a0 is the oldest location,
+            // so we need to store new word hash into location of a0, then ,this array become
+            // |a5|a1|a2|a3|a4|, next time, a1 become the oldest location, we need to store new
+            // word hash value into locaion of a1, then array become |a5|a6|a2|a3|a4|
+            nword_hashes[offset] = Hash::hashSum(word_buf.data(), word_buf.size());
+            offset = (offset + 1) % N;
+            // according to the word hash storation way, in order to not lose the word shingle's
+            // sequence information, when calculation word shingle hash value, we need provide the offset
+            // inforation, which is the offset of the first word's hash value of the word shingle
+            hash_value = hash_functor(nword_hashes, N, offset);
+            std::bitset<64> bits(hash_value);
+            for (size_t i = 0; i < 64; ++i)
+            {
+                finger_vec[i] += ((bits.test(i)) ? 1 : -1);
+            }
+        }
+
+        std::bitset<64> res_bit(0u);
+        for (size_t i = 0; i < 64; ++i)
+        {
+            if (finger_vec[i] > 0)
+                res_bit.set(i);
+        }
+        return res_bit.to_ullong();
+    }
+
+    static void apply(const ColumnString::Chars & data, const ColumnString::Offsets & offsets, PaddedPODArray<UInt64> & res)
+    {
+        for (size_t i = 0; i < offsets.size(); ++i)
+        {
+            const char * one_data = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
+            const size_t data_size = offsets[i] - offsets[i - 1] - 1;
+            if (data_size <= max_string_size)
+            {
+                if constexpr (Ngram)
+                {
+                    if constexpr (!UTF8)
+                        res[i] = ngramCalculateHashValue(one_data, data_size, StrOp::readASCIICodePoints, Hash::ngramASCIIHash);
+                    else
+                        res[i] = ngramCalculateHashValue(one_data, data_size, StrOp::readUTF8CodePoints, Hash::ngramUTF8Hash);
+                }
+                else
+                {
+                    if constexpr (!UTF8)
+                        res[i] = wordShinglesCalculateHashValue(one_data, data_size, StrOp::readOneASCIIWord, Hash::wordShinglesHash);
+                    else
+                        res[i] = wordShinglesCalculateHashValue(one_data, data_size, StrOp::readOneUTF8Word, Hash::wordShinglesHash);
+                }
+            }
+            else
+                res[i] = -1ull;
+        }
+    }
+};
+
+template <typename F, size_t K, size_t v>
+class FixedHeap
+{
+public:
+    FixedHeap() = delete;
+
+    explicit FixedHeap(F f_) : f(f_), data_t(std::make_shared<std::vector<UInt64>>(K, v))
+    {
+        std::make_heap(data_t->begin(), data_t->end(), f);
+    }
+
+    void insertAndReplace(UInt64 new_v)
+    {
+        data_t->push_back(new_v);
+        std::push_heap(data_t->begin(), data_t->end(), f);
+        std::pop_heap(data_t->begin(), data_t->end(), f);
+        data_t->pop_back();
+    }
+
+    const UInt64 * data() { return data_t->data(); }
+
+private:
+    F f;
+    std::shared_ptr<std::vector<UInt64>> data_t;
+};
+
+
+// Minhash: String -> Tuple(UInt64, UInt64)
+// for each string, we extract ngram or word shingle,
+// for each ngram or word shingle, calculate a hash value,
+// then we take the K minimum hash values to calculate a hashsum,
+// and take the K maximum hash values to calculate another hashsum,
+// return this two hashsum: Tuple(hashsum1, hashsum2)
+//
+// N: the length of ngram or words shingles
+// K: the number of minimum hashes and maximum hashes that we keep
+// CodePoint: UInt8(ASCII) or UInt32(UTF8)
+// UTF8: means ASCII or UTF8, these two parameters CodePoint and UTF8 can only be (UInt8, false) or (UInt32, true)
+// Ngram: means ngram(true) or words shingles(false)
+// CaseInsensitive: means should we consider about letter case or not
+template <size_t N, size_t K, typename CodePoint, bool UTF8, bool Ngram, bool CaseInsensitive>
+struct MinhashImpl
+{
+    using Less = std::less<size_t>;
+    using Greater = std::greater<size_t>;
+    using MaxHeap = FixedHeap<std::less<size_t>, K, -1ULL>;
+    using MinHeap = FixedHeap<std::greater<size_t>, K, 0>;
+    using StrOp = ExtractStringImpl<N, CaseInsensitive>;
+    static constexpr size_t max_string_size = 1u << 15;
+    static constexpr size_t simultaneously_codepoints_num = StrOp::buffer_size;
+
+    // Minhash ngram calculate function, String -> Tuple(UInt64, UInt64)
+    // we extract ngram from input string, and calculate a hash value for each ngram
+    // then we take the K minimum hash values to calculate a hashsum,
+    // and take the K maximum hash values to calculate another hashsum,
+    // return this two hashsum: Tuple(hashsum1, hashsum2)
+    static ALWAYS_INLINE inline std::tuple<UInt64, UInt64> ngramCalculateHashValue(
+        const char * data,
+        size_t size,
+        size_t (*read_code_points)(CodePoint *, const char *&, const char *),
+        UInt64 (*hash_functor)(const CodePoint *))
+    {
+        const char * start = data;
+        const char * end = data + size;
+        // we just maintain the K minimu and K maximum hash values
+        MaxHeap k_minimum_hashes(Less{});
+        MinHeap k_maximum_hashes(Greater{});
+        CodePoint cp[simultaneously_codepoints_num] = {};
+
+        size_t found = read_code_points(cp, start, end);
+        size_t iter = N - 1;
+
+        do
+        {
+            for (; iter + N <= found; ++iter)
+            {
+                auto new_hash = hash_functor(cp + iter);
+                // insert the new hash value into array used to store K minimum value
+                // and K maximum value
+                k_minimum_hashes.insertAndReplace(new_hash);
+                k_maximum_hashes.insertAndReplace(new_hash);
+            }
+            iter = 0;
+        } while (start < end && (found = read_code_points(cp, start, end)));
+
+        // calculate hashsum of the K minimum hash values and K maximum hash values
+        UInt64 res1 = Hash::hashSum(k_minimum_hashes.data(), K);
+        UInt64 res2 = Hash::hashSum(k_maximum_hashes.data(), K);
+        return std::make_tuple(res1, res2);
+    }
+
+    // Minhash word shingle hash value calculate function: String ->Tuple(UInt64, UInt64)
+    // for each word shingle, we calculate a hash value, but in fact, we just maintain the
+    // K minimum and K maximum hash value
+    static ALWAYS_INLINE inline std::tuple<UInt64, UInt64> wordShinglesCalculateHashValue(
+        const char * data,
+        size_t size,
+        size_t (*read_one_word)(PaddedPODArray<CodePoint> &, const char *&, const char *),
+        UInt64 (*hash_functor)(const UInt64 *, size_t, size_t))
+    {
+        const char * start = data;
+        const char * end = start + size;
+        // also we just store the K minimu and K maximum hash values
+        MaxHeap k_minimum_hashes(Less{});
+        MinHeap k_maximum_hashes(Greater{});
+        // array to store n word hashes
+        UInt64 nword_hashes[N] = {};
+        // word buffer to store one word
+        PaddedPODArray<CodePoint> word_buf;
+        // how word shingle hash value calculation and word hash storation is same as we
+        // have descripted in Simhash wordShinglesCalculateHashValue function
+        for (size_t i = 0; i < N && start < end; ++i)
+        {
+            read_one_word(word_buf, start, end);
+            if (!word_buf.empty())
+            {
+                nword_hashes[i++] = Hash::hashSum(word_buf.data(), word_buf.size());
+            }
+        }
+
+        auto new_hash = hash_functor(nword_hashes, N, 0);
+        k_minimum_hashes.insertAndReplace(new_hash);
+        k_maximum_hashes.insertAndReplace(new_hash);
+
+        size_t offset = 0;
+        while (start < end && read_one_word(word_buf, start, end))
+        {
+            nword_hashes[offset] = Hash::hashSum(word_buf.data(), word_buf.size());
+            offset = (offset + 1) % N;
+            new_hash = hash_functor(nword_hashes, N, offset);
+            k_minimum_hashes.insertAndReplace(new_hash);
+            k_maximum_hashes.insertAndReplace(new_hash);
+        }
+
+        // calculate hashsum
+        UInt64 res1 = Hash::hashSum(k_minimum_hashes.data(), K);
+        UInt64 res2 = Hash::hashSum(k_maximum_hashes.data(), K);
+        return std::make_tuple(res1, res2);
+    }
+
+    static void apply(
+        const ColumnString::Chars & data,
+        const ColumnString::Offsets & offsets,
+        PaddedPODArray<UInt64> & res1,
+        PaddedPODArray<UInt64> & res2)
+    {
+        for (size_t i = 0; i < offsets.size(); ++i)
+        {
+            const char * one_data = reinterpret_cast<const char *>(&data[offsets[i - 1]]);
+            const size_t data_size = offsets[i] - offsets[i - 1] - 1;
+            if (data_size <= max_string_size)
+            {
+                if constexpr (Ngram)
+                {
+                    if constexpr (!UTF8)
+                        std::tie(res1[i], res2[i]) = ngramCalculateHashValue(one_data, data_size, StrOp::readASCIICodePoints, Hash::ngramASCIIHash);
+                    else
+                        std::tie(res1[i], res2[i]) = ngramCalculateHashValue(one_data, data_size, StrOp::readUTF8CodePoints, Hash::ngramUTF8Hash);
+                }
+                else
+                {
+                    if constexpr (!UTF8)
+                        std::tie(res1[i], res2[i]) = wordShinglesCalculateHashValue(one_data, data_size, StrOp::readOneASCIIWord, Hash::wordShinglesHash);
+                    else
+                        std::tie(res1[i], res2[i]) = wordShinglesCalculateHashValue(one_data, data_size, StrOp::readOneUTF8Word, Hash::wordShinglesHash);
+                }
+            }
+            else
+                std::tie(res1[i], res2[i]) = std::make_tuple(-1ull, -1ull);
+        }
+    }
+};
+
+struct NameNgramSimhash
+{
+    static constexpr auto name = "ngramSimhash";
+};
+
+struct NameNgramSimhashCaseInsensitive
+{
+    static constexpr auto name = "ngramSimhashCaseInsensitive";
+};
+
+struct NameNgramSimhashUTF8
+{
+    static constexpr auto name = "ngramSimhashUTF8";
+};
+
+struct NameNgramSimhashCaseInsensitiveUTF8
+{
+    static constexpr auto name = "ngramSimhashCaseInsensitiveUTF8";
+};
+
+struct NameWordShingleSimhash
+{
+    static constexpr auto name = "wordShingleSimhash";
+};
+
+struct NameWordShingleSimhashCaseInsensitive
+{
+    static constexpr auto name = "wordShingleSimhashCaseInsensitive";
+};
+
+struct NameWordShingleSimhashUTF8
+{
+    static constexpr auto name = "wordShingleSimhashUTF8";
+};
+
+struct NameWordShingleSimhashCaseInsensitiveUTF8
+{
+    static constexpr auto name = "wordShingleSimhashCaseInsensitiveUTF8";
+};
+
+struct NameNgramMinhash
+{
+    static constexpr auto name = "ngramMinhash";
+};
+
+struct NameNgramMinhashCaseInsensitive
+{
+    static constexpr auto name = "ngramMinhashCaseInsensitive";
+};
+
+struct NameNgramMinhashUTF8
+{
+    static constexpr auto name = "ngramMinhashUTF8";
+};
+
+struct NameNgramMinhashCaseInsensitiveUTF8
+{
+    static constexpr auto name = "ngramMinhashCaseInsensitiveUTF8";
+};
+
+struct NameWordShingleMinhash
+{
+    static constexpr auto name = "wordShingleMinhash";
+};
+
+struct NameWordShingleMinhashCaseInsensitive
+{
+    static constexpr auto name = "wordShingleMinhashCaseInsensitive";
+};
+
+struct NameWordShingleMinhashUTF8
+{
+    static constexpr auto name = "wordShingleMinhashUTF8";
+};
+
+struct NameWordShingleMinhashCaseInsensitiveUTF8
+{
+    static constexpr auto name = "wordShingleMinhashCaseInsensitiveUTF8";
+};
+
+// Simhash
+using FunctionNgramSimhash = FunctionsStringHash<SimhashImpl<4, UInt8, false, true, false>, NameNgramSimhash, true>;
+
+using FunctionNgramSimhashCaseInsensitive
+    = FunctionsStringHash<SimhashImpl<4, UInt8, false, true, true>, NameNgramSimhashCaseInsensitive, true>;
+
+using FunctionNgramSimhashUTF8 = FunctionsStringHash<SimhashImpl<3, UInt32, true, true, false>, NameNgramSimhashUTF8, true>;
+
+using FunctionNgramSimhashCaseInsensitiveUTF8
+    = FunctionsStringHash<SimhashImpl<3, UInt32, true, true, true>, NameNgramSimhashCaseInsensitiveUTF8, true>;
+
+using FunctionWordShingleSimhash = FunctionsStringHash<SimhashImpl<3, UInt8, false, false, false>, NameWordShingleSimhash, true>;
+
+using FunctionWordShingleSimhashCaseInsensitive
+    = FunctionsStringHash<SimhashImpl<3, UInt8, false, false, true>, NameWordShingleSimhashCaseInsensitive, true>;
+
+using FunctionWordShingleSimhashUTF8 = FunctionsStringHash<SimhashImpl<3, UInt32, true, false, false>, NameWordShingleSimhashUTF8, true>;
+
+using FunctionWordShingleSimhashCaseInsensitiveUTF8
+    = FunctionsStringHash<SimhashImpl<3, UInt32, true, false, true>, NameWordShingleSimhashCaseInsensitiveUTF8, true>;
+
+// Minhash
+using FunctionNgramMinhash = FunctionsStringHash<MinhashImpl<4, 6, UInt8, false, true, false>, NameNgramMinhash, false>;
+
+using FunctionNgramMinhashCaseInsensitive
+    = FunctionsStringHash<MinhashImpl<4, 6, UInt8, false, true, true>, NameNgramMinhashCaseInsensitive, false>;
+
+using FunctionNgramMinhashUTF8 = FunctionsStringHash<MinhashImpl<4, 6, UInt32, true, true, false>, NameNgramMinhashUTF8, false>;
+
+using FunctionNgramMinhashCaseInsensitiveUTF8
+    = FunctionsStringHash<MinhashImpl<4, 6, UInt32, true, true, true>, NameNgramMinhashCaseInsensitiveUTF8, false>;
+
+using FunctionWordShingleMinhash = FunctionsStringHash<MinhashImpl<3, 6, UInt8, false, false, false>, NameWordShingleMinhash, false>;
+
+using FunctionWordShingleMinhashCaseInsensitive
+    = FunctionsStringHash<MinhashImpl<3, 6, UInt8, false, false, true>, NameWordShingleMinhashCaseInsensitive, false>;
+
+using FunctionWordShingleMinhashUTF8
+    = FunctionsStringHash<MinhashImpl<3, 6, UInt32, true, false, false>, NameWordShingleMinhashUTF8, false>;
+
+using FunctionWordShingleMinhashCaseInsensitiveUTF8
+    = FunctionsStringHash<MinhashImpl<3, 6, UInt32, true, false, true>, NameWordShingleMinhashCaseInsensitiveUTF8, false>;
+
+void registerFunctionsStringHash(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionNgramSimhash>();
+    factory.registerFunction<FunctionNgramSimhashCaseInsensitive>();
+    factory.registerFunction<FunctionNgramSimhashUTF8>();
+    factory.registerFunction<FunctionNgramSimhashCaseInsensitiveUTF8>();
+    factory.registerFunction<FunctionWordShingleSimhash>();
+    factory.registerFunction<FunctionWordShingleSimhashCaseInsensitive>();
+    factory.registerFunction<FunctionWordShingleSimhashUTF8>();
+    factory.registerFunction<FunctionWordShingleSimhashCaseInsensitiveUTF8>();
+
+    factory.registerFunction<FunctionNgramMinhash>();
+    factory.registerFunction<FunctionNgramMinhashCaseInsensitive>();
+    factory.registerFunction<FunctionNgramMinhashUTF8>();
+    factory.registerFunction<FunctionNgramMinhashCaseInsensitiveUTF8>();
+    factory.registerFunction<FunctionWordShingleMinhash>();
+    factory.registerFunction<FunctionWordShingleMinhashCaseInsensitive>();
+    factory.registerFunction<FunctionWordShingleMinhashUTF8>();
+    factory.registerFunction<FunctionWordShingleMinhashCaseInsensitiveUTF8>();
+}
+}
+
diff --git a/src/Functions/FunctionsStringHash.h b/src/Functions/FunctionsStringHash.h
new file mode 100644
index 00000000000..979f2bd8a9d
--- /dev/null
+++ b/src/Functions/FunctionsStringHash.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <Columns/ColumnConst.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypeFactory.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunctionImpl.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+// FunctionStringHash
+// Simhash: String -> UInt64
+// Minhash: String -> (UInt64, UInt64)
+template <typename Impl, typename Name, bool is_simhash>
+class FunctionsStringHash : public IFunction
+{
+public:
+    static constexpr auto name = Name::name;
+
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionsStringHash>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 1; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isString(arguments[0]))
+            throw Exception(ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT,
+                            "Function {} expect single String argument, got {}", getName(), arguments[0]->getName());
+
+        auto type = std::make_shared<DataTypeUInt64>();
+        if constexpr (is_simhash)
+            return type;
+
+        return std::make_shared<DataTypeTuple>(DataTypes{type, type});
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t) const override
+    {
+        const ColumnPtr & column = arguments[0].column;
+
+        if constexpr (is_simhash)
+        {
+            // non const string, const case is handled by useDefaultImplementationForConstants.
+            auto col_res = ColumnVector<UInt64>::create();
+            auto & vec_res = col_res->getData();
+            vec_res.resize(column->size());
+            const ColumnString * col_str_vector = checkAndGetColumn<ColumnString>(&*column);
+            Impl::apply(col_str_vector->getChars(), col_str_vector->getOffsets(), vec_res);
+            return col_res;
+        }
+        else // Min hash
+        {
+            // non const string
+            auto col_h1 = ColumnVector<UInt64>::create();
+            auto col_h2 = ColumnVector<UInt64>::create();
+            auto & vec_h1 = col_h1->getData();
+            auto & vec_h2 = col_h2->getData();
+            vec_h1.resize(column->size());
+            vec_h2.resize(column->size());
+            const ColumnString * col_str_vector = checkAndGetColumn<ColumnString>(&*column);
+            Impl::apply(col_str_vector->getChars(), col_str_vector->getOffsets(), vec_h1, vec_h2);
+            MutableColumns tuple_columns;
+            tuple_columns.emplace_back(std::move(col_h1));
+            tuple_columns.emplace_back(std::move(col_h2));
+            return ColumnTuple::create(std::move(tuple_columns));
+        }
+    }
+};
+}
+
diff --git a/src/Functions/bitHammingDistance.cpp b/src/Functions/bitHammingDistance.cpp
new file mode 100644
index 00000000000..9b9ff5b6c07
--- /dev/null
+++ b/src/Functions/bitHammingDistance.cpp
@@ -0,0 +1,160 @@
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Functions/castTypeToEither.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+
+template <typename A, typename B>
+struct BitHammingDistanceImpl
+{
+    using ResultType = UInt8;
+
+    static void NO_INLINE vectorVector(const PaddedPODArray<A> & a, const PaddedPODArray<B> & b, PaddedPODArray<ResultType> & c)
+    {
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = apply(a[i], b[i]);
+    }
+
+    static void NO_INLINE vectorConstant(const PaddedPODArray<A> & a, B b, PaddedPODArray<ResultType> & c)
+    {
+        size_t size = a.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = apply(a[i], b);
+    }
+
+    static void NO_INLINE constantVector(A a, const PaddedPODArray<B> & b, PaddedPODArray<ResultType> & c)
+    {
+        size_t size = b.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = apply(a, b[i]);
+    }
+
+private:
+    static inline UInt8 apply(UInt64 a, UInt64 b)
+    {
+        UInt64 res = a ^ b;
+        return __builtin_popcountll(res);
+    }
+};
+
+template <typename F>
+bool castType(const IDataType * type, F && f)
+{
+    return castTypeToEither<
+        DataTypeInt8,
+        DataTypeInt16,
+        DataTypeInt32,
+        DataTypeInt64,
+        DataTypeUInt8,
+        DataTypeUInt16,
+        DataTypeUInt32,
+        DataTypeUInt64>(type, std::forward<F>(f));
+}
+
+template <typename F>
+static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
+{
+    return castType(left, [&](const auto & left_) { return castType(right, [&](const auto & right_) { return f(left_, right_); }); });
+}
+
+// bitHammingDistance function: (Integer, Integer) -> UInt8
+class FunctionBitHammingDistance : public IFunction
+{
+public:
+    static constexpr auto name = "bitHammingDistance";
+    using ResultType = UInt8;
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionBitHammingDistance>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isInteger(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        if (!isInteger(arguments[1]))
+            throw Exception(
+                "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    bool useDefaultImplementationForConstants() const override { return true; }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const auto * left_generic = arguments[0].type.get();
+        const auto * right_generic = arguments[1].type.get();
+        ColumnPtr result_column;
+        bool valid = castBothTypes(left_generic, right_generic, [&](const auto & left, const auto & right)
+        {
+            using LeftDataType = std::decay_t<decltype(left)>;
+            using RightDataType = std::decay_t<decltype(right)>;
+            using T0 = typename LeftDataType::FieldType;
+            using T1 = typename RightDataType::FieldType;
+            using ColVecT0 = ColumnVector<T0>;
+            using ColVecT1 = ColumnVector<T1>;
+            using ColVecResult = ColumnVector<ResultType>;
+
+            using OpImpl = BitHammingDistanceImpl<T0, T1>;
+
+            const auto * const col_left_raw = arguments[0].column.get();
+            const auto * const col_right_raw = arguments[1].column.get();
+
+            typename ColVecResult::MutablePtr col_res = nullptr;
+            col_res = ColVecResult::create();
+
+            auto & vec_res = col_res->getData();
+            vec_res.resize(input_rows_count);
+
+            if (auto col_left_const = checkAndGetColumnConst<ColVecT0>(col_left_raw))
+            {
+                if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
+                {
+                    // constant integer - non-constant integer
+                    OpImpl::constantVector(col_left_const->template getValue<T0>(), col_right->getData(), vec_res);
+                }
+                else
+                    return false;
+            }
+            else if (auto col_left = checkAndGetColumn<ColVecT0>(col_left_raw))
+            {
+                if (auto col_right = checkAndGetColumn<ColVecT1>(col_right_raw))
+                    // non-constant integer - non-constant integer
+                    OpImpl::vectorVector(col_left->getData(), col_right->getData(), vec_res);
+                else if (auto col_right_const = checkAndGetColumnConst<ColVecT1>(col_right_raw))
+                    // non-constant integer - constant integer
+                    OpImpl::vectorConstant(col_left->getData(), col_right_const->template getValue<T1>(), vec_res);
+                else
+                    return false;
+            }
+            else
+                return false;
+
+            result_column = std::move(col_res);
+            return true;
+        });
+        if (!valid)
+            throw Exception(getName() + "'s arguments do not match the expected data types", ErrorCodes::ILLEGAL_COLUMN);
+
+        return result_column;
+    }
+};
+
+void registerFunctionBitHammingDistance(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionBitHammingDistance>();
+}
+}
diff --git a/src/Functions/registerFunctions.cpp b/src/Functions/registerFunctions.cpp
index 3f75746f861..d827cc40a86 100644
--- a/src/Functions/registerFunctions.cpp
+++ b/src/Functions/registerFunctions.cpp
@@ -42,7 +42,9 @@ void registerFunctionsNull(FunctionFactory &);
 void registerFunctionsJSON(FunctionFactory &);
 void registerFunctionsConsistentHashing(FunctionFactory & factory);
 void registerFunctionsUnixTimestamp64(FunctionFactory & factory);
-
+void registerFunctionBitHammingDistance(FunctionFactory & factory);
+void registerFunctionTupleHammingDistance(FunctionFactory & factory);
+void registerFunctionsStringHash(FunctionFactory & factory);
 #if !defined(ARCADIA_BUILD)
 void registerFunctionBayesAB(FunctionFactory &);
 #endif
@@ -57,7 +59,6 @@ void registerFunctionAESDecryptMysql(FunctionFactory & factory);
 
 #endif
 
-
 void registerFunctions()
 {
     auto & factory = FunctionFactory::instance();
@@ -99,6 +100,9 @@ void registerFunctions()
     registerFunctionsIntrospection(factory);
     registerFunctionsConsistentHashing(factory);
     registerFunctionsUnixTimestamp64(factory);
+    registerFunctionBitHammingDistance(factory);
+    registerFunctionTupleHammingDistance(factory);
+    registerFunctionsStringHash(factory);
 
 #if !defined(ARCADIA_BUILD)
     registerFunctionBayesAB(factory);
diff --git a/src/Functions/tupleHammingDistance.cpp b/src/Functions/tupleHammingDistance.cpp
new file mode 100644
index 00000000000..67d5f73065b
--- /dev/null
+++ b/src/Functions/tupleHammingDistance.cpp
@@ -0,0 +1,220 @@
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnVector.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionHelpers.h>
+#include <Functions/IFunction.h>
+#include <Functions/castTypeToEither.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_COLUMN;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+}
+
+template <typename A, typename B>
+struct TupleHammingDistanceImpl
+{
+    using ResultType = UInt8;
+
+    static void NO_INLINE vectorVector(
+        const PaddedPODArray<A> & a1,
+        const PaddedPODArray<A> & b1,
+        const PaddedPODArray<B> & a2,
+        const PaddedPODArray<B> & b2,
+        PaddedPODArray<ResultType> & c)
+    {
+        size_t size = a1.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = apply(a1[i], a2[i]) + apply(b1[i], b2[i]);
+    }
+
+    static void NO_INLINE
+    vectorConstant(const PaddedPODArray<A> & a1, const PaddedPODArray<A> & b1, UInt64 a2, UInt64 b2, PaddedPODArray<ResultType> & c)
+    {
+        size_t size = a1.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = apply(a1[i], a2) + apply(b1[i], b2);
+    }
+
+    static void NO_INLINE
+    constantVector(UInt64 a1, UInt64 b1, const PaddedPODArray<B> & a2, const PaddedPODArray<B> & b2, PaddedPODArray<ResultType> & c)
+    {
+        size_t size = a2.size();
+        for (size_t i = 0; i < size; ++i)
+            c[i] = apply(a1, a2[i]) + apply(b1, b2[i]);
+    }
+
+    static ResultType constantConstant(UInt64 a1, UInt64 b1, UInt64 a2, UInt64 b2) { return apply(a1, a2) + apply(b1, b2); }
+
+private:
+    static inline UInt8 apply(UInt64 a, UInt64 b) { return a != b; }
+};
+
+template <typename F>
+bool castType(const IDataType * type, F && f)
+{
+    return castTypeToEither<
+        DataTypeInt8,
+        DataTypeInt16,
+        DataTypeInt32,
+        DataTypeInt64,
+        DataTypeUInt8,
+        DataTypeUInt16,
+        DataTypeUInt32,
+        DataTypeUInt64>(type, std::forward<F>(f));
+}
+
+template <typename F>
+static bool castBothTypes(const IDataType * left, const IDataType * right, F && f)
+{
+    return castType(left, [&](const auto & left_) { return castType(right, [&](const auto & right_) { return f(left_, right_); }); });
+}
+
+// tupleHammingDistance function: (Tuple(Integer, Integer), Tuple(Integer, Integer))->0/1/2
+// in order to avoid code bloating, for non-constant tuple, we make sure that the elements
+// in the tuple should have same data type, and for constant tuple, elements can be any integer
+// data type, we cast all of them into UInt64
+class FunctionTupleHammingDistance : public IFunction
+{
+public:
+    static constexpr auto name = "tupleHammingDistance";
+    using ResultType = UInt8;
+    static FunctionPtr create(const Context &) { return std::make_shared<FunctionTupleHammingDistance>(); }
+
+    String getName() const override { return name; }
+
+    size_t getNumberOfArguments() const override { return 2; }
+
+    DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
+    {
+        if (!isTuple(arguments[0]))
+            throw Exception(
+                "Illegal type " + arguments[0]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        if (!isTuple(arguments[1]))
+            throw Exception(
+                "Illegal type " + arguments[1]->getName() + " of argument of function " + getName(), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+        return std::make_shared<DataTypeUInt8>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        const ColumnWithTypeAndName & arg1 = arguments[0];
+        const ColumnWithTypeAndName & arg2 = arguments[1];
+        const DataTypeTuple & type1 = static_cast<const DataTypeTuple &>(*arg1.type);
+        const DataTypeTuple & type2 = static_cast<const DataTypeTuple &>(*arg2.type);
+        const auto & left_elems = type1.getElements();
+        const auto & right_elems = type2.getElements();
+        if (left_elems.size() != 2 || right_elems.size() != 2)
+            throw Exception(
+                "Illegal column of arguments of function " + getName() + ", tuple should have exactly two elements.",
+                ErrorCodes::ILLEGAL_COLUMN);
+
+        ColumnPtr result_column;
+
+        bool valid = castBothTypes(left_elems[0].get(), right_elems[0].get(), [&](const auto & left, const auto & right)
+        {
+            using LeftDataType = std::decay_t<decltype(left)>;
+            using RightDataType = std::decay_t<decltype(right)>;
+            using T0 = typename LeftDataType::FieldType;
+            using T1 = typename RightDataType::FieldType;
+            using ColVecT0 = ColumnVector<T0>;
+            using ColVecT1 = ColumnVector<T1>;
+            using ColVecResult = ColumnVector<ResultType>;
+
+            using OpImpl = TupleHammingDistanceImpl<T0, T1>;
+
+            // we can not useDefaultImplementationForConstants,
+            // because with that, tupleHammingDistance((10, 300), (10, 20)) does not work,
+            // since 10 has data type UInt8, and 300 has data type UInt16
+            if (const ColumnConst * const_col_left = checkAndGetColumnConst<ColumnTuple>(arg1.column.get()))
+            {
+                if (const ColumnConst * const_col_right = checkAndGetColumnConst<ColumnTuple>(arg2.column.get()))
+                {
+                    auto cols1 = convertConstTupleToConstantElements(*const_col_left);
+                    auto cols2 = convertConstTupleToConstantElements(*const_col_right);
+                    Field a1, b1, a2, b2;
+                    cols1[0]->get(0, a1);
+                    cols1[1]->get(0, b1);
+                    cols2[0]->get(0, a2);
+                    cols2[1]->get(0, b2);
+                    auto res = OpImpl::constantConstant(a1.get<UInt64>(), b1.get<UInt64>(), a2.get<UInt64>(), b2.get<UInt64>());
+                    result_column = DataTypeUInt8().createColumnConst(const_col_left->size(), toField(res));
+                    return true;
+                }
+            }
+
+            typename ColVecResult::MutablePtr col_res = nullptr;
+            col_res = ColVecResult::create();
+            auto & vec_res = col_res->getData();
+            vec_res.resize(input_rows_count);
+            // constant tuple - non-constant tuple
+            if (const ColumnConst * const_col_left = checkAndGetColumnConst<ColumnTuple>(arg1.column.get()))
+            {
+                if (const ColumnTuple * col_right = typeid_cast<const ColumnTuple *>(arg2.column.get()))
+                {
+                    auto const_cols = convertConstTupleToConstantElements(*const_col_left);
+                    Field a1, b1;
+                    const_cols[0]->get(0, a1);
+                    const_cols[1]->get(0, b1);
+                    auto col_r1 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(0));
+                    auto col_r2 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(1));
+                    if (col_r1 && col_r2)
+                        OpImpl::constantVector(a1.get<UInt64>(), b1.get<UInt64>(), col_r1->getData(), col_r2->getData(), vec_res);
+                    else
+                        return false;
+                }
+                else
+                    return false;
+            }
+            else if (const ColumnTuple * col_left = typeid_cast<const ColumnTuple *>(arg1.column.get()))
+            {
+                auto col_l1 = checkAndGetColumn<ColVecT0>(&col_left->getColumn(0));
+                auto col_l2 = checkAndGetColumn<ColVecT0>(&col_left->getColumn(1));
+                if (col_l1 && col_l2)
+                {
+                    // non-constant tuple - constant tuple
+                    if (const ColumnConst * const_col_right = checkAndGetColumnConst<ColumnTuple>(arg2.column.get()))
+                    {
+                        auto const_cols = convertConstTupleToConstantElements(*const_col_right);
+                        Field a2, b2;
+                        const_cols[0]->get(0, a2);
+                        const_cols[1]->get(0, b2);
+                        OpImpl::vectorConstant(col_l1->getData(), col_l2->getData(), a2.get<UInt64>(), a2.get<UInt64>(), vec_res);
+                    }
+                    // non-constant tuple - non-constant tuple
+                    else if (const ColumnTuple * col_right = typeid_cast<const ColumnTuple *>(arg2.column.get()))
+                    {
+                        auto col_r1 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(0));
+                        auto col_r2 = checkAndGetColumn<ColVecT1>(&col_right->getColumn(1));
+                        if (col_r1 && col_r2)
+                            OpImpl::vectorVector(col_l1->getData(), col_l2->getData(), col_r1->getData(), col_r2->getData(), vec_res);
+                        else
+                            return false;
+                    }
+                    else
+                        return false;
+                }
+                else
+                    return false;
+            }
+            else
+                return false;
+            result_column = std::move(col_res);
+            return true;
+        });
+        if (!valid)
+            throw Exception(getName() + "'s arguments do not match the expected data types", ErrorCodes::ILLEGAL_COLUMN);
+
+        return result_column;
+    }
+};
+
+void registerFunctionTupleHammingDistance(FunctionFactory & factory)
+{
+    factory.registerFunction<FunctionTupleHammingDistance>();
+}
+}
diff --git a/src/Functions/ya.make b/src/Functions/ya.make
index 6e5d832db77..7e64deef64d 100644
--- a/src/Functions/ya.make
+++ b/src/Functions/ya.make
@@ -53,6 +53,7 @@ SRCS(
     FunctionsRandom.cpp
     FunctionsRound.cpp
     FunctionsStringArray.cpp
+    FunctionsStringHash.cpp
     FunctionsStringSimilarity.cpp
     GatherUtils/concat.cpp
     GatherUtils/createArraySink.cpp
@@ -185,6 +186,7 @@ SRCS(
     bitBoolMaskAnd.cpp
     bitBoolMaskOr.cpp
     bitCount.cpp
+    bitHammingDistance.cpp
     bitNot.cpp
     bitOr.cpp
     bitRotateLeft.cpp
@@ -504,6 +506,7 @@ SRCS(
     tryBase64Decode.cpp
     tuple.cpp
     tupleElement.cpp
+    tupleHammingDistance.cpp
     upper.cpp
     upperUTF8.cpp
     uptime.cpp
diff --git a/src/IO/ReadBufferFromPocoSocket.cpp b/src/IO/ReadBufferFromPocoSocket.cpp
index 5c66c3209f6..2c13446e693 100644
--- a/src/IO/ReadBufferFromPocoSocket.cpp
+++ b/src/IO/ReadBufferFromPocoSocket.cpp
@@ -28,10 +28,23 @@ bool ReadBufferFromPocoSocket::nextImpl()
     ssize_t bytes_read = 0;
     Stopwatch watch;
 
+    int flags = 0;
+    if (async_callback)
+        flags |= MSG_DONTWAIT;
+
     /// Add more details to exceptions.
     try
     {
-        bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size());
+        bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
+
+        /// If async_callback is specified, and read is blocking, run async_callback and try again later.
+        /// It is expected that file descriptor may be polled externally.
+        /// Note that receive timeout is not checked here. External code should check it while polling.
+        while (bytes_read < 0 && async_callback && errno == EAGAIN)
+        {
+            async_callback(socket);
+            bytes_read = socket.impl()->receiveBytes(internal_buffer.begin(), internal_buffer.size(), flags);
+        }
     }
     catch (const Poco::Net::NetException & e)
     {
diff --git a/src/IO/ReadBufferFromPocoSocket.h b/src/IO/ReadBufferFromPocoSocket.h
index f328b89d99c..8064cd39246 100644
--- a/src/IO/ReadBufferFromPocoSocket.h
+++ b/src/IO/ReadBufferFromPocoSocket.h
@@ -5,7 +5,6 @@
 #include <IO/ReadBuffer.h>
 #include <IO/BufferWithOwnMemory.h>
 
-
 namespace DB
 {
 
@@ -28,6 +27,11 @@ public:
     ReadBufferFromPocoSocket(Poco::Net::Socket & socket_, size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
 
     bool poll(size_t timeout_microseconds);
+
+    void setAsyncCallback(std::function<void(Poco::Net::Socket &)> async_callback_) { async_callback = std::move(async_callback_); }
+
+private:
+    std::function<void(Poco::Net::Socket &)> async_callback;
 };
 
 }
diff --git a/src/IO/ReadHelpers.cpp b/src/IO/ReadHelpers.cpp
index e290da39535..97a8d937d39 100644
--- a/src/IO/ReadHelpers.cpp
+++ b/src/IO/ReadHelpers.cpp
@@ -96,7 +96,7 @@ void NO_INLINE throwAtAssertionFailed(const char * s, ReadBuffer & buf)
     else
         out << " before: " << quote << String(buf.position(), std::min(SHOW_CHARS_ON_SYNTAX_ERROR, buf.buffer().end() - buf.position()));
 
-    throw Exception(out.str(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
+    throw ParsingException(out.str(), ErrorCodes::CANNOT_PARSE_INPUT_ASSERTION_FAILED);
 }
 
 
@@ -503,7 +503,7 @@ static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
 {
     if (buf.eof() || *buf.position() != quote)
     {
-        throw Exception(ErrorCodes::CANNOT_PARSE_QUOTED_STRING,
+        throw ParsingException(ErrorCodes::CANNOT_PARSE_QUOTED_STRING,
             "Cannot parse quoted string: expected opening quote '{}', got '{}'",
             std::string{quote}, buf.eof() ? "EOF" : std::string{*buf.position()});
     }
@@ -538,7 +538,7 @@ static void readAnyQuotedStringInto(Vector & s, ReadBuffer & buf)
             parseComplexEscapeSequence(s, buf);
     }
 
-    throw Exception("Cannot parse quoted string: expected closing quote",
+    throw ParsingException("Cannot parse quoted string: expected closing quote",
         ErrorCodes::CANNOT_PARSE_QUOTED_STRING);
 }
 
@@ -716,7 +716,7 @@ ReturnType readJSONStringInto(Vector & s, ReadBuffer & buf)
     auto error = [](const char * message [[maybe_unused]], int code [[maybe_unused]])
     {
         if constexpr (throw_exception)
-            throw Exception(message, code);
+            throw ParsingException(message, code);
         return ReturnType(false);
     };
 
@@ -861,7 +861,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
             s_pos[size] = 0;
 
             if constexpr (throw_exception)
-                throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
+                throw ParsingException(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
             else
                 return false;
         }
@@ -899,7 +899,7 @@ ReturnType readDateTimeTextFallback(time_t & datetime, ReadBuffer & buf, const D
         else
         {
             if constexpr (throw_exception)
-                throw Exception("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME);
+                throw ParsingException("Cannot parse datetime", ErrorCodes::CANNOT_PARSE_DATETIME);
             else
                 return false;
         }
diff --git a/src/IO/ReadHelpers.h b/src/IO/ReadHelpers.h
index dbbaae0816f..fa6b1fc2d8a 100644
--- a/src/IO/ReadHelpers.h
+++ b/src/IO/ReadHelpers.h
@@ -300,7 +300,7 @@ ReturnType readIntTextImpl(T & x, ReadBuffer & buf)
                 else
                 {
                     if constexpr (throw_exception)
-                        throw Exception("Unsigned type must not contain '-' symbol", ErrorCodes::CANNOT_PARSE_NUMBER);
+                        throw ParsingException("Unsigned type must not contain '-' symbol", ErrorCodes::CANNOT_PARSE_NUMBER);
                     else
                         return ReturnType(false);
                 }
@@ -648,7 +648,7 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf)
 
                 if constexpr (throw_exception)
                 {
-                    throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID);
+                    throw ParsingException(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID);
                 }
                 else
                 {
@@ -669,7 +669,7 @@ inline ReturnType readUUIDTextImpl(UUID & uuid, ReadBuffer & buf)
 
         if constexpr (throw_exception)
         {
-            throw Exception(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID);
+            throw ParsingException(std::string("Cannot parse uuid ") + s, ErrorCodes::CANNOT_PARSE_UUID);
         }
         else
         {
@@ -824,7 +824,7 @@ inline void readDateTimeText(LocalDateTime & datetime, ReadBuffer & buf)
     if (19 != size)
     {
         s[size] = 0;
-        throw Exception(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
+        throw ParsingException(std::string("Cannot parse datetime ") + s, ErrorCodes::CANNOT_PARSE_DATETIME);
     }
 
     datetime.year((s[0] - '0') * 1000 + (s[1] - '0') * 100 + (s[2] - '0') * 10 + (s[3] - '0'));
@@ -1016,7 +1016,7 @@ void readQuoted(std::vector<T> & x, ReadBuffer & buf)
             if (*buf.position() == ',')
                 ++buf.position();
             else
-                throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
+                throw ParsingException("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
         }
 
         first = false;
@@ -1039,7 +1039,7 @@ void readDoubleQuoted(std::vector<T> & x, ReadBuffer & buf)
             if (*buf.position() == ',')
                 ++buf.position();
             else
-                throw Exception("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
+                throw ParsingException("Cannot read array from text", ErrorCodes::CANNOT_READ_ARRAY_FROM_TEXT);
         }
 
         first = false;
diff --git a/src/IO/ReadWriteBufferFromHTTP.h b/src/IO/ReadWriteBufferFromHTTP.h
index 267800d8900..de10f268dc3 100644
--- a/src/IO/ReadWriteBufferFromHTTP.h
+++ b/src/IO/ReadWriteBufferFromHTTP.h
@@ -105,11 +105,11 @@ namespace detail
         RemoteHostFilter remote_host_filter;
         std::function<void(size_t)> next_callback;
 
-        std::istream * call(const Poco::URI uri_, Poco::Net::HTTPResponse & response)
+        std::istream * call(Poco::URI uri_, Poco::Net::HTTPResponse & response)
         {
             // With empty path poco will send "POST  HTTP/1.1" its bug.
-            if (uri.getPath().empty())
-                uri.setPath("/");
+            if (uri_.getPath().empty())
+                uri_.setPath("/");
 
             Poco::Net::HTTPRequest request(method, uri_.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1);
             request.setHost(uri_.getHost()); // use original, not resolved host name in header
@@ -125,7 +125,7 @@ namespace detail
             if (!credentials.getUsername().empty())
                 credentials.authenticate(request);
 
-            LOG_TRACE((&Poco::Logger::get("ReadWriteBufferFromHTTP")), "Sending request to {}", uri.toString());
+            LOG_TRACE((&Poco::Logger::get("ReadWriteBufferFromHTTP")), "Sending request to {}", uri_.toString());
 
             auto sess = session->getSession();
 
diff --git a/src/IO/parseDateTimeBestEffort.cpp b/src/IO/parseDateTimeBestEffort.cpp
index 8a188d22236..063955cdd1e 100644
--- a/src/IO/parseDateTimeBestEffort.cpp
+++ b/src/IO/parseDateTimeBestEffort.cpp
@@ -99,7 +99,7 @@ ReturnType parseDateTimeBestEffortImpl(
     auto on_error = [](const std::string & message [[maybe_unused]], int code [[maybe_unused]])
     {
         if constexpr (std::is_same_v<ReturnType, void>)
-            throw Exception(message, code);
+            throw ParsingException(message, code);
         else
             return false;
     };
diff --git a/src/IO/readDecimalText.h b/src/IO/readDecimalText.h
index 727dd67c389..203d8e3963b 100644
--- a/src/IO/readDecimalText.h
+++ b/src/IO/readDecimalText.h
@@ -120,7 +120,7 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp
                 if (!tryReadIntText(addition_exp, buf))
                 {
                     if constexpr (_throw_on_error)
-                        throw Exception("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
+                        throw ParsingException("Cannot parse exponent while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
                     else
                         return false;
                 }
@@ -133,7 +133,7 @@ inline bool readDigits(ReadBuffer & buf, T & x, uint32_t & digits, int32_t & exp
                 if (digits_only)
                 {
                     if constexpr (_throw_on_error)
-                        throw Exception("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
+                        throw ParsingException("Unexpected symbol while reading decimal", ErrorCodes::CANNOT_PARSE_NUMBER);
                     return false;
                 }
                 stop = true;
diff --git a/src/IO/readFloatText.h b/src/IO/readFloatText.h
index 5647d574c62..eac6183e332 100644
--- a/src/IO/readFloatText.h
+++ b/src/IO/readFloatText.h
@@ -160,7 +160,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf)
         if (unlikely(res.ec != std::errc()))
         {
             if constexpr (throw_exception)
-                throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
+                throw ParsingException("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
             else
                 return ReturnType(false);
         }
@@ -243,7 +243,7 @@ ReturnType readFloatTextPreciseImpl(T & x, ReadBuffer & buf)
         if (unlikely(res.ec != std::errc()))
         {
             if constexpr (throw_exception)
-                throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
+                throw ParsingException("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
             else
                 return ReturnType(false);
         }
@@ -331,7 +331,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
     if (in.eof())
     {
         if constexpr (throw_exception)
-            throw Exception("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
+            throw ParsingException("Cannot read floating point value", ErrorCodes::CANNOT_PARSE_NUMBER);
         else
             return false;
     }
@@ -387,7 +387,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
         if (in.eof())
         {
             if constexpr (throw_exception)
-                throw Exception("Cannot read floating point value: nothing after exponent", ErrorCodes::CANNOT_PARSE_NUMBER);
+                throw ParsingException("Cannot read floating point value: nothing after exponent", ErrorCodes::CANNOT_PARSE_NUMBER);
             else
                 return false;
         }
@@ -425,7 +425,7 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
         if (in.eof())
         {
             if constexpr (throw_exception)
-                throw Exception("Cannot read floating point value: no digits read", ErrorCodes::CANNOT_PARSE_NUMBER);
+                throw ParsingException("Cannot read floating point value: no digits read", ErrorCodes::CANNOT_PARSE_NUMBER);
             else
                 return false;
         }
@@ -436,14 +436,14 @@ ReturnType readFloatTextFastImpl(T & x, ReadBuffer & in)
             if (in.eof())
             {
                 if constexpr (throw_exception)
-                    throw Exception("Cannot read floating point value: nothing after plus sign", ErrorCodes::CANNOT_PARSE_NUMBER);
+                    throw ParsingException("Cannot read floating point value: nothing after plus sign", ErrorCodes::CANNOT_PARSE_NUMBER);
                 else
                     return false;
             }
             else if (negative)
             {
                 if constexpr (throw_exception)
-                    throw Exception("Cannot read floating point value: plus after minus sign", ErrorCodes::CANNOT_PARSE_NUMBER);
+                    throw ParsingException("Cannot read floating point value: plus after minus sign", ErrorCodes::CANNOT_PARSE_NUMBER);
                 else
                     return false;
             }
diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 62cb6047704..2fc78261f17 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -624,7 +624,7 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
             {
                 auto & input = inputs[res_elem.name];
                 if (input.empty())
-                    throw Exception("Cannot find column " + backQuoteIfNeed(res_elem.name) + " in source stream",
+                    throw Exception("Cannot find column " + backQuote(res_elem.name) + " in source stream",
                                     ErrorCodes::THERE_IS_NO_COLUMN);
 
                 src_node = actions_dag->inputs[input.front()];
@@ -641,12 +641,12 @@ ActionsDAGPtr ActionsDAG::makeConvertingActions(
                 if (ignore_constant_values)
                    src_node = const_cast<Node *>(&actions_dag->addColumn(res_elem, true));
                 else if (res_const->getField() != src_const->getField())
-                    throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + " because "
+                    throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
                                     "it is constant but values of constants are different in source and result",
                                     ErrorCodes::ILLEGAL_COLUMN);
             }
             else
-                throw Exception("Cannot convert column " + backQuoteIfNeed(res_elem.name) + " because "
+                throw Exception("Cannot convert column " + backQuote(res_elem.name) + " because "
                                 "it is non constant in source stream but must be constant in result",
                                 ErrorCodes::ILLEGAL_COLUMN);
         }
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 8aaf740b34b..c7bbc019518 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -735,6 +735,28 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
         }
     }
 
+    if (node.is_window_function)
+    {
+        // Also add columns from PARTITION BY and ORDER BY of window functions.
+        // Requiring a constant reference to a shared pointer to non-const AST
+        // doesn't really look sane, but the visitor does indeed require it.
+        if (node.window_partition_by)
+        {
+            visit(node.window_partition_by->clone(), data);
+        }
+        if (node.window_order_by)
+        {
+            visit(node.window_order_by->clone(), data);
+        }
+
+        // Don't need to do anything more for window functions here -- the
+        // resulting column is added in ExpressionAnalyzer, similar to the
+        // aggregate functions.
+        return;
+    }
+
+    // An aggregate function can also be calculated as a window function, but we
+    // checked for it above, so no need to do anything more.
     if (AggregateFunctionFactory::instance().isAggregateFunctionName(node.name))
         return;
 
diff --git a/src/Interpreters/AggregateDescription.cpp b/src/Interpreters/AggregateDescription.cpp
index e483eb1b7a1..2748a2abe9d 100644
--- a/src/Interpreters/AggregateDescription.cpp
+++ b/src/Interpreters/AggregateDescription.cpp
@@ -1,6 +1,7 @@
 #include <Interpreters/AggregateDescription.h>
 #include <Common/FieldVisitors.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTFunction.h>
 
 namespace DB
 {
@@ -99,4 +100,31 @@ void AggregateDescription::explain(WriteBuffer & out, size_t indent) const
     }
 }
 
+std::string WindowFunctionDescription::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "window function '" << column_name << "\n";
+    ss << "function node " << function_node->dumpTree() << "\n";
+    ss << "aggregate function '" << aggregate_function->getName() << "'\n";
+    if (!function_parameters.empty())
+    {
+        ss << "parameters " << toString(function_parameters) << "\n";
+    }
+
+    return ss.str();
+}
+
+std::string WindowDescription::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "window '" << window_name << "'\n";
+    ss << "partition_by " << dumpSortDescription(partition_by) << "\n";
+    ss << "order_by " << dumpSortDescription(order_by) << "\n";
+    ss << "full_sort_description " << dumpSortDescription(full_sort_description) << "\n";
+
+    return ss.str();
+}
+
 }
diff --git a/src/Interpreters/AggregateDescription.h b/src/Interpreters/AggregateDescription.h
index 396a62c446a..f1fc232d04d 100644
--- a/src/Interpreters/AggregateDescription.h
+++ b/src/Interpreters/AggregateDescription.h
@@ -1,13 +1,18 @@
 #pragma once
 
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <DataTypes/IDataType.h>
 #include <Core/ColumnNumbers.h>
 #include <Core/Names.h>
-#include <AggregateFunctions/IAggregateFunction.h>
+#include <Core/SortDescription.h>
+#include <Parsers/IAST_fwd.h>
 
 
 namespace DB
 {
 
+class ASTFunction;
+
 struct AggregateDescription
 {
     AggregateFunctionPtr function;
@@ -21,4 +26,44 @@ struct AggregateDescription
 
 using AggregateDescriptions = std::vector<AggregateDescription>;
 
+
+struct WindowFunctionDescription
+{
+    std::string column_name;
+    const ASTFunction * function_node;
+    AggregateFunctionPtr aggregate_function;
+    Array function_parameters;
+    DataTypes argument_types;
+    Names argument_names;
+
+    std::string dump() const;
+};
+
+struct WindowDescription
+{
+    std::string window_name;
+
+    // We don't care about the particular order of keys for PARTITION BY, only
+    // that they are sorted. For now we always require ASC, but we could be more
+    // flexible and match any direction, or even different order of columns.
+    SortDescription partition_by;
+
+    SortDescription order_by;
+
+    // To calculate the window function, we sort input data first by PARTITION BY,
+    // then by ORDER BY. This field holds this combined sort order.
+    SortDescription full_sort_description;
+
+    // No frame info as of yet.
+
+    // The window functions that are calculated for this window.
+    std::vector<WindowFunctionDescription> window_functions;
+
+    std::string dump() const;
+};
+
+using WindowFunctionDescriptions = std::vector<WindowFunctionDescription>;
+
+using WindowDescriptions = std::unordered_map<std::string, WindowDescription>;
+
 }
diff --git a/src/Interpreters/Aggregator.cpp b/src/Interpreters/Aggregator.cpp
index 87abca4d7cd..de2c86d7c12 100644
--- a/src/Interpreters/Aggregator.cpp
+++ b/src/Interpreters/Aggregator.cpp
@@ -913,15 +913,15 @@ template <typename Method>
 Block Aggregator::convertOneBucketToBlock(
     AggregatedDataVariants & data_variants,
     Method & method,
+    Arena * arena,
     bool final,
     size_t bucket) const
 {
     Block block = prepareBlockAndFill(data_variants, final, method.data.impls[bucket].size(),
-        [bucket, &method, this] (
+        [bucket, &method, arena, this] (
             MutableColumns & key_columns,
             AggregateColumnsData & aggregate_columns,
             MutableColumns & final_aggregate_columns,
-            Arena * arena,
             bool final_)
         {
             convertToBlockImpl(method, method.data.impls[bucket],
@@ -950,7 +950,7 @@ Block Aggregator::mergeAndConvertOneBucketToBlock(
         mergeBucketImpl<decltype(merged_data.NAME)::element_type>(variants, bucket, arena); \
         if (is_cancelled && is_cancelled->load(std::memory_order_seq_cst)) \
             return {}; \
-        block = convertOneBucketToBlock(merged_data, *merged_data.NAME, final, bucket); \
+        block = convertOneBucketToBlock(merged_data, *merged_data.NAME, arena, final, bucket); \
     }
 
     APPLY_FOR_VARIANTS_TWO_LEVEL(M)
@@ -982,7 +982,7 @@ void Aggregator::writeToTemporaryFileImpl(
 
     for (size_t bucket = 0; bucket < Method::Data::NUM_BUCKETS; ++bucket)
     {
-        Block block = convertOneBucketToBlock(data_variants, method, false, bucket);
+        Block block = convertOneBucketToBlock(data_variants, method, data_variants.aggregates_pool, false, bucket);
         out.write(block);
         update_max_sizes(block);
     }
@@ -1285,7 +1285,7 @@ Block Aggregator::prepareBlockAndFill(
         }
     }
 
-    filler(key_columns, aggregate_columns_data, final_aggregate_columns, data_variants.aggregates_pool, final);
+    filler(key_columns, aggregate_columns_data, final_aggregate_columns, final);
 
     Block res = header.cloneEmpty();
 
@@ -1352,7 +1352,6 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va
         MutableColumns & key_columns,
         AggregateColumnsData & aggregate_columns,
         MutableColumns & final_aggregate_columns,
-        Arena * arena,
         bool final_)
     {
         if (data_variants.type == AggregatedDataVariants::Type::without_key || params.overflow_row)
@@ -1367,7 +1366,8 @@ Block Aggregator::prepareBlockAndFillWithoutKey(AggregatedDataVariants & data_va
             }
             else
             {
-                insertAggregatesIntoColumns(data, final_aggregate_columns, arena);
+                /// Always single-thread. It's safe to pass current arena from 'aggregates_pool'.
+                insertAggregatesIntoColumns(data, final_aggregate_columns, data_variants.aggregates_pool);
             }
 
             if (params.overflow_row)
@@ -1395,13 +1395,12 @@ Block Aggregator::prepareBlockAndFillSingleLevel(AggregatedDataVariants & data_v
         MutableColumns & key_columns,
         AggregateColumnsData & aggregate_columns,
         MutableColumns & final_aggregate_columns,
-        Arena * arena,
         bool final_)
     {
     #define M(NAME) \
         else if (data_variants.type == AggregatedDataVariants::Type::NAME) \
             convertToBlockImpl(*data_variants.NAME, data_variants.NAME->data, \
-                key_columns, aggregate_columns, final_aggregate_columns, arena, final_);
+                key_columns, aggregate_columns, final_aggregate_columns, data_variants.aggregates_pool, final_);
 
         if (false) {} // NOLINT
         APPLY_FOR_VARIANTS_SINGLE_LEVEL(M)
@@ -1435,11 +1434,21 @@ BlocksList Aggregator::prepareBlocksAndFillTwoLevelImpl(
     bool final,
     ThreadPool * thread_pool) const
 {
+    size_t max_threads = thread_pool ? thread_pool->getMaxThreads() : 1;
+    if (max_threads > data_variants.aggregates_pools.size())
+        for (size_t i = data_variants.aggregates_pools.size(); i < max_threads; ++i)
+            data_variants.aggregates_pools.push_back(std::make_shared<Arena>());
+
     auto converter = [&](size_t bucket, ThreadGroupStatusPtr thread_group)
     {
         if (thread_group)
             CurrentThread::attachToIfDetached(thread_group);
-        return convertOneBucketToBlock(data_variants, method, final, bucket);
+
+        /// Select Arena to avoid race conditions
+        size_t thread_number = static_cast<size_t>(bucket) % max_threads;
+        Arena * arena = data_variants.aggregates_pools.at(thread_number).get();
+
+        return convertOneBucketToBlock(data_variants, method, arena, final, bucket);
     };
 
     /// packaged_task is used to ensure that exceptions are automatically thrown into the main stream.
@@ -1949,7 +1958,7 @@ private:
             else if (method == AggregatedDataVariants::Type::NAME) \
             { \
                 aggregator.mergeBucketImpl<decltype(merged_data.NAME)::element_type>(data, bucket_num, arena); \
-                block = aggregator.convertOneBucketToBlock(merged_data, *merged_data.NAME, final, bucket_num); \
+                block = aggregator.convertOneBucketToBlock(merged_data, *merged_data.NAME, arena, final, bucket_num); \
             }
 
             APPLY_FOR_VARIANTS_TWO_LEVEL(M)
diff --git a/src/Interpreters/Aggregator.h b/src/Interpreters/Aggregator.h
index c688da9d32d..86806b7fbad 100644
--- a/src/Interpreters/Aggregator.h
+++ b/src/Interpreters/Aggregator.h
@@ -1212,6 +1212,7 @@ protected:
     Block convertOneBucketToBlock(
         AggregatedDataVariants & data_variants,
         Method & method,
+        Arena * arena,
         bool final,
         size_t bucket) const;
 
diff --git a/src/Interpreters/AsynchronousMetrics.cpp b/src/Interpreters/AsynchronousMetrics.cpp
index 40a3aa520fa..4fe922252d5 100644
--- a/src/Interpreters/AsynchronousMetrics.cpp
+++ b/src/Interpreters/AsynchronousMetrics.cpp
@@ -212,18 +212,18 @@ void AsynchronousMetrics::update()
         {
             Int64 amount = total_memory_tracker.get();
             Int64 peak = total_memory_tracker.getPeak();
-            Int64 new_peak = data.resident;
+            Int64 new_amount = data.resident;
 
             LOG_DEBUG(&Poco::Logger::get("AsynchronousMetrics"),
                 "MemoryTracking: was {}, peak {}, will set to {} (RSS), difference: {}",
                 ReadableSize(amount),
                 ReadableSize(peak),
-                ReadableSize(new_peak),
-                ReadableSize(new_peak - peak)
+                ReadableSize(new_amount),
+                ReadableSize(new_amount - amount)
             );
 
-            total_memory_tracker.set(new_peak);
-            CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_peak);
+            total_memory_tracker.set(new_amount);
+            CurrentMetrics::set(CurrentMetrics::MemoryTracking, new_amount);
         }
     }
 #endif
@@ -247,6 +247,10 @@ void AsynchronousMetrics::update()
         size_t number_of_databases = databases.size();
         size_t total_number_of_tables = 0;
 
+        size_t total_number_of_bytes = 0;
+        size_t total_number_of_rows = 0;
+        size_t total_number_of_parts = 0;
+
         for (const auto & db : databases)
         {
             /// Check if database can contain MergeTree tables
@@ -295,6 +299,17 @@ void AsynchronousMetrics::update()
                 if (table_merge_tree)
                 {
                     calculateMax(max_part_count_for_partition, table_merge_tree->getMaxPartsCountForPartition());
+                    const auto & settings = global_context.getSettingsRef();
+                    total_number_of_bytes += table_merge_tree->totalBytes(settings).value();
+                    total_number_of_rows += table_merge_tree->totalRows(settings).value();
+                    total_number_of_parts += table_merge_tree->getPartsCount();
+                }
+                if (table_replicated_merge_tree)
+                {
+                    const auto & settings = global_context.getSettingsRef();
+                    total_number_of_bytes += table_replicated_merge_tree->totalBytes(settings).value();
+                    total_number_of_rows += table_replicated_merge_tree->totalRows(settings).value();
+                    total_number_of_parts += table_replicated_merge_tree->getPartsCount();
                 }
             }
         }
@@ -315,6 +330,10 @@ void AsynchronousMetrics::update()
         new_values["NumberOfDatabases"] = number_of_databases;
         new_values["NumberOfTables"] = total_number_of_tables;
 
+        new_values["TotalBytesOfMergeTreeTables"] = total_number_of_bytes;
+        new_values["TotalRowsOfMergeTreeTables"] = total_number_of_rows;
+        new_values["TotalPartsOfMergeTreeTables"] = total_number_of_parts;
+
         auto get_metric_name = [](const String & name) -> const char *
         {
             static std::map<String, const char *> metric_map = {
@@ -336,16 +355,22 @@ void AsynchronousMetrics::update()
                 return it->second;
         };
 
-        for (const auto & server : servers_to_start_before_tables)
+        if (servers_to_start_before_tables)
         {
-            if (const auto * name = get_metric_name(server.getPortName()))
-                new_values[name] = server.currentThreads();
+            for (const auto & server : *servers_to_start_before_tables)
+            {
+                if (const auto * name = get_metric_name(server.getPortName()))
+                    new_values[name] = server.currentThreads();
+            }
         }
 
-        for (const auto & server : servers)
+        if (servers)
         {
-            if (const auto * name = get_metric_name(server.getPortName()))
-                new_values[name] = server.currentThreads();
+            for (const auto & server : *servers)
+            {
+                if (const auto * name = get_metric_name(server.getPortName()))
+                    new_values[name] = server.currentThreads();
+            }
         }
     }
 
diff --git a/src/Interpreters/AsynchronousMetrics.h b/src/Interpreters/AsynchronousMetrics.h
index 610d8843537..88c2221be76 100644
--- a/src/Interpreters/AsynchronousMetrics.h
+++ b/src/Interpreters/AsynchronousMetrics.h
@@ -26,14 +26,26 @@ using AsynchronousMetricValues = std::unordered_map<std::string, AsynchronousMet
 class AsynchronousMetrics
 {
 public:
-    // The default value of update_period_seconds is for ClickHouse-over-YT
-    // in Arcadia -- it uses its own server implementation that also uses these
-    // metrics.
+#if defined(ARCADIA_BUILD)
+    /// This constructor needs only to provide backward compatibility with some other projects (hello, Arcadia).
+    /// Never use this in the ClickHouse codebase.
+    AsynchronousMetrics(
+        Context & global_context_,
+        int update_period_seconds = 60)
+        : global_context(global_context_)
+        , update_period(update_period_seconds)
+    {
+    }
+#endif
+
+    /// The default value of update_period_seconds is for ClickHouse-over-YT
+    /// in Arcadia -- it uses its own server implementation that also uses these
+    /// metrics.
     AsynchronousMetrics(
         Context & global_context_,
         int update_period_seconds,
-        const std::vector<ProtocolServerAdapter> & servers_to_start_before_tables_,
-        const std::vector<ProtocolServerAdapter> & servers_)
+        std::shared_ptr<std::vector<ProtocolServerAdapter>> servers_to_start_before_tables_,
+        std::shared_ptr<std::vector<ProtocolServerAdapter>> servers_)
         : global_context(global_context_)
         , update_period(update_period_seconds)
         , servers_to_start_before_tables(servers_to_start_before_tables_)
@@ -55,8 +67,8 @@ public:
 private:
     Context & global_context;
     const std::chrono::seconds update_period;
-    const std::vector<ProtocolServerAdapter> & servers_to_start_before_tables;
-    const std::vector<ProtocolServerAdapter> & servers;
+    std::shared_ptr<std::vector<ProtocolServerAdapter>> servers_to_start_before_tables{nullptr};
+    std::shared_ptr<std::vector<ProtocolServerAdapter>> servers{nullptr};
 
     mutable std::mutex mutex;
     std::condition_variable wait_cond;
diff --git a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index e2a7c5b55dc..47726e49d50 100644
--- a/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -126,6 +126,7 @@ void SelectStreamFactory::createForShard(
     bool add_agg_info = processed_stage == QueryProcessingStage::WithMergeableState;
     bool add_totals = false;
     bool add_extremes = false;
+    bool async_read = context_ptr->getSettingsRef().async_socket_for_remote;
     if (processed_stage == QueryProcessingStage::Complete)
     {
         add_totals = query_ast->as<ASTSelectQuery &>().group_by_with_totals;
@@ -153,7 +154,7 @@ void SelectStreamFactory::createForShard(
         if (!table_func_ptr)
             remote_query_executor->setMainTable(main_table);
 
-        remote_pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes));
+        remote_pipes.emplace_back(createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read));
         remote_pipes.back().addInterpreterContext(context_ptr);
     };
 
@@ -249,7 +250,7 @@ void SelectStreamFactory::createForShard(
                 pool = shard_info.pool, shard_num = shard_info.shard_num, modified_query, header = header, modified_query_ast,
                 &context, context_ptr, throttler,
                 main_table = main_table, table_func_ptr = table_func_ptr, scalars = scalars, external_tables = external_tables,
-                stage = processed_stage, local_delay, add_agg_info, add_totals, add_extremes]()
+                stage = processed_stage, local_delay, add_agg_info, add_totals, add_extremes, async_read]()
             -> Pipe
         {
             auto current_settings = context.getSettingsRef();
@@ -295,7 +296,7 @@ void SelectStreamFactory::createForShard(
                 auto remote_query_executor = std::make_shared<RemoteQueryExecutor>(
                     std::move(connections), modified_query, header, context, throttler, scalars, external_tables, stage);
 
-                return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes);
+                return createRemoteSourcePipe(remote_query_executor, add_agg_info, add_totals, add_extremes, async_read);
             }
         };
 
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 1b93c090842..31c12490408 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -50,6 +50,9 @@
 #include <Interpreters/GlobalSubqueriesVisitor.h>
 #include <Interpreters/GetAggregatesVisitor.h>
 
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+
 namespace DB
 {
 
@@ -58,12 +61,14 @@ using LogAST = DebugASTLog<false>; /// set to true to enable logs
 
 namespace ErrorCodes
 {
-    extern const int UNKNOWN_TYPE_OF_AST_NODE;
-    extern const int UNKNOWN_IDENTIFIER;
+    extern const int BAD_ARGUMENTS;
     extern const int ILLEGAL_PREWHERE;
-    extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int ILLEGAL_TYPE_OF_COLUMN_FOR_FILTER;
+    extern const int LOGICAL_ERROR;
+    extern const int NOT_IMPLEMENTED;
+    extern const int UNKNOWN_IDENTIFIER;
+    extern const int UNKNOWN_TYPE_OF_AST_NODE;
 }
 
 namespace
@@ -283,6 +288,8 @@ void ExpressionAnalyzer::analyzeAggregation()
     {
         aggregated_columns = temp_actions->getNamesAndTypesList();
     }
+
+    has_window = makeWindowDescriptions(temp_actions);
 }
 
 
@@ -444,7 +451,11 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
 
             auto it = index.find(name);
             if (it == index.end())
-                throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER, "Unknown identifier (in aggregate function '{}'): {}", node->name, name);
+            {
+                throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
+                    "Unknown identifier '{}' in aggregate function '{}'",
+                    name, node->formatForErrorMessage());
+            }
 
             types[i] = (*it)->result_type;
             aggregate.argument_names[i] = name;
@@ -461,6 +472,128 @@ bool ExpressionAnalyzer::makeAggregateDescriptions(ActionsDAGPtr & actions)
 }
 
 
+bool ExpressionAnalyzer::makeWindowDescriptions(ActionsDAGPtr & actions)
+{
+    // Convenient to check here because at least we have the Context.
+    if (!syntax->window_function_asts.empty() &&
+        !context.getSettingsRef().allow_experimental_window_functions)
+    {
+        throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+            "Window functions are not implemented (while processing '{}')",
+            syntax->window_function_asts[0]->formatForErrorMessage());
+    }
+
+    for (const ASTFunction * function_node : syntax->window_function_asts)
+    {
+        assert(function_node->is_window_function);
+
+        WindowDescription window_description;
+        window_description.window_name = function_node->getWindowDescription();
+
+        if (function_node->window_partition_by)
+        {
+            for (const auto & column_ast
+                : function_node->window_partition_by->children)
+            {
+                const auto * with_alias = dynamic_cast<const ASTWithAlias *>(
+                    column_ast.get());
+                if (!with_alias)
+                {
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS,
+                        "Expected a column in PARTITION BY for window '{}',"
+                        " got '{}'", window_description.window_name,
+                        column_ast->formatForErrorMessage());
+                }
+                window_description.partition_by.push_back(
+                    SortColumnDescription(
+                        with_alias->getColumnName(), 1 /* direction */,
+                        1 /* nulls_direction */));
+            }
+        }
+
+        if (function_node->window_order_by)
+        {
+            for (const auto & column_ast
+                : function_node->window_order_by->children)
+            {
+                // Parser should have checked that we have a proper element here.
+                const auto & order_by_element
+                    = column_ast->as<ASTOrderByElement &>();
+                // Ignore collation for now.
+                window_description.order_by.push_back(
+                    SortColumnDescription(
+                        order_by_element.children.front()->getColumnName(),
+                        order_by_element.direction,
+                        order_by_element.nulls_direction));
+            }
+        }
+
+        window_description.full_sort_description = window_description.partition_by;
+        window_description.full_sort_description.insert(
+            window_description.full_sort_description.end(),
+            window_description.order_by.begin(),
+            window_description.order_by.end());
+
+        WindowFunctionDescription window_function;
+        window_function.function_node = function_node;
+        window_function.column_name
+            = window_function.function_node->getColumnName();
+        window_function.function_parameters
+            = window_function.function_node->parameters
+                ? getAggregateFunctionParametersArray(
+                    window_function.function_node->parameters)
+                : Array();
+
+        // Requiring a constant reference to a shared pointer to non-const AST
+        // doesn't really look sane, but the visitor does indeed require it.
+        // Hence we clone the node (not very sane either, I know).
+        getRootActionsNoMakeSet(window_function.function_node->clone(),
+            true, actions);
+
+        const ASTs & arguments
+            = window_function.function_node->arguments->children;
+        window_function.argument_types.resize(arguments.size());
+        window_function.argument_names.resize(arguments.size());
+        const auto & index = actions->getIndex();
+        for (size_t i = 0; i < arguments.size(); ++i)
+        {
+            const std::string & name = arguments[i]->getColumnName();
+
+            auto it = index.find(name);
+            if (it == index.end())
+            {
+                throw Exception(ErrorCodes::UNKNOWN_IDENTIFIER,
+                    "Unknown identifier '{}' in window function '{}'",
+                    name, window_function.function_node->formatForErrorMessage());
+            }
+
+            window_function.argument_types[i] = (*it)->result_type;
+            window_function.argument_names[i] = name;
+        }
+
+        AggregateFunctionProperties properties;
+        window_function.aggregate_function
+            = AggregateFunctionFactory::instance().get(
+                window_function.function_node->name,
+                window_function.argument_types,
+                window_function.function_parameters, properties);
+
+        auto [it, inserted] = window_descriptions.insert(
+            {window_description.window_name, window_description});
+
+        if (!inserted)
+        {
+            assert(it->second.full_sort_description
+                == window_description.full_sort_description);
+        }
+
+        it->second.window_functions.push_back(window_function);
+    }
+
+    return !syntax->window_function_asts.empty();
+}
+
+
 const ASTSelectQuery * ExpressionAnalyzer::getSelectQuery() const
 {
     const auto * select_query = query->as<ASTSelectQuery>();
@@ -831,6 +964,65 @@ void SelectQueryExpressionAnalyzer::appendAggregateFunctionsArguments(Expression
                 getRootActions(argument, only_types, step.actions());
 }
 
+void SelectQueryExpressionAnalyzer::appendWindowFunctionsArguments(
+    ExpressionActionsChain & chain, bool /* only_types */)
+{
+    ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
+
+    // 1) Add actions for window functions and their arguments;
+    // 2) Mark the columns that are really required.
+    for (const auto & [_, w] : window_descriptions)
+    {
+        for (const auto & f : w.window_functions)
+        {
+            // 1.1) arguments of window functions;
+            // Requiring a constant reference to a shared pointer to non-const AST
+            // doesn't really look sane, but the visitor does indeed require it.
+            getRootActionsNoMakeSet(f.function_node->clone(),
+                true /* no_subqueries */, step.actions());
+
+            // 1.2) result of window function: an empty INPUT.
+            // It is an aggregate function, so it won't be added by getRootActions.
+            // This is something of a hack. Other options:
+            //  a] do it like aggregate function -- break the chain of actions
+            //     and manually add window functions to the starting list of
+            //     input columns. Logically this is similar to what we're doing
+            //     now, but would require to split the window function processing
+            //     into a full-fledged step after plain functions. This would be
+            //     somewhat cumbersome. With INPUT hack we can avoid a separate
+            //     step and pretend that window functions are almost "normal"
+            //     select functions. The limitation of both these ways is that
+            //     we can't reference window functions in other SELECT
+            //     expressions.
+            //  b] add a WINDOW action type, then sort, then split the chain on
+            //     each WINDOW action and insert the Window pipeline between the
+            //     Expression pipelines. This is a "proper" way that would allow
+            //     us to depend on window functions in other functions. But it's
+            //     complicated so I avoid doing it for now.
+            ColumnWithTypeAndName col;
+            col.type = f.aggregate_function->getReturnType();
+            col.column = col.type->createColumn();
+            col.name = f.column_name;
+
+            step.actions()->addInput(col);
+
+            for (const auto & a : f.function_node->arguments->children)
+            {
+                // 2.1) function arguments;
+                step.required_output.push_back(a->getColumnName());
+            }
+            // 2.2) function result;
+            step.required_output.push_back(f.column_name);
+        }
+
+        // 2.3) PARTITION BY and ORDER BY columns.
+        for (const auto & c : w.full_sort_description)
+        {
+            step.required_output.push_back(c.column_name);
+        }
+    }
+}
+
 bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, bool only_types)
 {
     const auto * select_query = getAggregatingQuery();
@@ -855,7 +1047,9 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
     getRootActions(select_query->select(), only_types, step.actions());
 
     for (const auto & child : select_query->select()->children)
+    {
         step.required_output.push_back(child->getColumnName());
+    }
 }
 
 ActionsDAGPtr SelectQueryExpressionAnalyzer::appendOrderBy(ExpressionActionsChain & chain, bool only_types, bool optimize_read_in_order,
@@ -1076,6 +1270,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
     : first_stage(first_stage_)
     , second_stage(second_stage_)
     , need_aggregate(query_analyzer.hasAggregation())
+    , has_window(query_analyzer.hasWindow())
 {
     /// first_stage: Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
     /// second_stage: Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
@@ -1225,6 +1420,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
         /// If there is aggregation, we execute expressions in SELECT and ORDER BY on the initiating server, otherwise on the source servers.
         query_analyzer.appendSelect(chain, only_types || (need_aggregate ? !second_stage : !first_stage));
+
+        query_analyzer.appendWindowFunctionsArguments(chain, only_types || !first_stage);
+
         selected_columns = chain.getLastStep().required_output;
         has_order_by = query.orderBy() != nullptr;
         before_order_and_select = query_analyzer.appendOrderBy(
@@ -1321,4 +1519,75 @@ void ExpressionAnalysisResult::checkActions() const
     }
 }
 
+std::string ExpressionAnalysisResult::dump() const
+{
+    WriteBufferFromOwnString ss;
+
+    ss << "need_aggregate " << need_aggregate << "\n";
+    ss << "has_order_by " << has_order_by << "\n";
+    ss << "has_window " << has_window << "\n";
+
+    if (before_array_join)
+    {
+        ss << "before_array_join " << before_array_join->dumpDAG() << "\n";
+    }
+
+    if (array_join)
+    {
+        ss << "array_join " << "FIXME doesn't have dump" << "\n";
+    }
+
+    if (before_join)
+    {
+        ss << "before_join " << before_join->dumpDAG() << "\n";
+    }
+
+    if (before_where)
+    {
+        ss << "before_where " << before_where->dumpDAG() << "\n";
+    }
+
+    if (prewhere_info)
+    {
+        ss << "prewhere_info " << prewhere_info->dump() << "\n";
+    }
+
+    if (filter_info)
+    {
+        ss << "filter_info " << filter_info->dump() << "\n";
+    }
+
+    if (before_aggregation)
+    {
+        ss << "before_aggregation " << before_aggregation->dumpDAG() << "\n";
+    }
+
+    if (before_having)
+    {
+        ss << "before_having " << before_having->dumpDAG() << "\n";
+    }
+
+    if (before_window)
+    {
+        ss << "before_window " << before_window->dumpDAG() << "\n";
+    }
+
+    if (before_order_and_select)
+    {
+        ss << "before_order_and_select " << before_order_and_select->dumpDAG() << "\n";
+    }
+
+    if (before_limit_by)
+    {
+        ss << "before_limit_by " << before_limit_by->dumpDAG() << "\n";
+    }
+
+    if (final_projection)
+    {
+        ss << "final_projection " << final_projection->dumpDAG() << "\n";
+    }
+
+    return ss.str();
+}
+
 }
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 2567b32e37e..fb0cb4ea4c3 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -60,6 +60,10 @@ struct ExpressionAnalyzerData
     NamesAndTypesList aggregation_keys;
     AggregateDescriptions aggregate_descriptions;
 
+    bool has_window = false;
+    WindowDescriptions window_descriptions;
+    NamesAndTypesList window_columns;
+
     bool has_global_subqueries = false;
 
     /// All new temporary tables obtained by performing the GLOBAL IN/JOIN subqueries.
@@ -116,6 +120,9 @@ public:
     /// Get intermediates for tests
     const ExpressionAnalyzerData & getAnalyzedData() const { return *this; }
 
+    /// A list of windows for window functions.
+    const WindowDescriptions & windowDescriptions() const { return window_descriptions; }
+
 protected:
     ExpressionAnalyzer(
         const ASTPtr & query_,
@@ -159,6 +166,8 @@ protected:
     void analyzeAggregation();
     bool makeAggregateDescriptions(ActionsDAGPtr & actions);
 
+    bool makeWindowDescriptions(ActionsDAGPtr & actions);
+
     const ASTSelectQuery * getSelectQuery() const;
 
     bool isRemoteStorage() const { return syntax->is_remote_storage; }
@@ -169,6 +178,8 @@ class SelectQueryExpressionAnalyzer;
 /// Result of SelectQueryExpressionAnalyzer: expressions for InterpreterSelectQuery
 struct ExpressionAnalysisResult
 {
+    std::string dump() const;
+
     /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
     bool first_stage = false;
     /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
@@ -176,6 +187,7 @@ struct ExpressionAnalysisResult
 
     bool need_aggregate = false;
     bool has_order_by   = false;
+    bool has_window = false;
 
     bool remove_where_filter = false;
     bool optimize_read_in_order = false;
@@ -189,6 +201,7 @@ struct ExpressionAnalysisResult
     ActionsDAGPtr before_where;
     ActionsDAGPtr before_aggregation;
     ActionsDAGPtr before_having;
+    ActionsDAGPtr before_window;
     ActionsDAGPtr before_order_and_select;
     ActionsDAGPtr before_limit_by;
     ActionsDAGPtr final_projection;
@@ -256,6 +269,7 @@ public:
 
     /// Does the expression have aggregate functions or a GROUP BY or HAVING section.
     bool hasAggregation() const { return has_aggregation; }
+    bool hasWindow() const { return has_window; }
     bool hasGlobalSubqueries() { return has_global_subqueries; }
     bool hasTableJoin() const { return syntax->ast_join; }
 
@@ -326,6 +340,7 @@ private:
     bool appendWhere(ExpressionActionsChain & chain, bool only_types);
     bool appendGroupBy(ExpressionActionsChain & chain, bool only_types, bool optimize_aggregation_in_order, ManyExpressionActions &);
     void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
+    void appendWindowFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
 
     /// After aggregation:
     bool appendHaving(ExpressionActionsChain & chain, bool only_types);
diff --git a/src/Interpreters/ExtractExpressionInfoVisitor.cpp b/src/Interpreters/ExtractExpressionInfoVisitor.cpp
index 75e94de0db5..2d9339447b1 100644
--- a/src/Interpreters/ExtractExpressionInfoVisitor.cpp
+++ b/src/Interpreters/ExtractExpressionInfoVisitor.cpp
@@ -19,9 +19,18 @@ void ExpressionInfoMatcher::visit(const ASTPtr & ast, Data & data)
 void ExpressionInfoMatcher::visit(const ASTFunction & ast_function, const ASTPtr &, Data & data)
 {
     if (ast_function.name == "arrayJoin")
+    {
         data.is_array_join = true;
-    else if (AggregateFunctionFactory::instance().isAggregateFunctionName(ast_function.name))
+    }
+    // "is_aggregate_function" doesn't mean much by itself. Apparently here it is
+    // used to move filters from HAVING to WHERE, and probably for this purpose
+    // an aggregate function calculated as a window function is not relevant.
+    else if (!ast_function.is_window_function
+        && AggregateFunctionFactory::instance().isAggregateFunctionName(
+            ast_function.name))
+    {
         data.is_aggregate_function = true;
+    }
     else
     {
         const auto & function = FunctionFactory::instance().tryGet(ast_function.name, data.context);
diff --git a/src/Interpreters/GetAggregatesVisitor.h b/src/Interpreters/GetAggregatesVisitor.h
index ba1501fc624..d416a5f240e 100644
--- a/src/Interpreters/GetAggregatesVisitor.h
+++ b/src/Interpreters/GetAggregatesVisitor.h
@@ -19,8 +19,12 @@ public:
     struct Data
     {
         const char * assert_no_aggregates = nullptr;
-        std::unordered_set<String> uniq_names;
-        std::vector<const ASTFunction *> aggregates;
+        const char * assert_no_windows = nullptr;
+        // Explicit empty initializers are needed to make designated initializers
+        // work on GCC 10.
+        std::unordered_set<String> uniq_names {};
+        std::vector<const ASTFunction *> aggregates {};
+        std::vector<const ASTFunction *> window_functions {};
     };
 
     static bool needChildVisit(const ASTPtr & node, const ASTPtr & child)
@@ -28,8 +32,13 @@ public:
         if (child->as<ASTSubquery>() || child->as<ASTSelectQuery>())
             return false;
         if (auto * func = node->as<ASTFunction>())
-            if (isAggregateFunction(func->name))
+        {
+            if (isAggregateFunction(*func)
+                || func->is_window_function)
+            {
                 return false;
+            }
+        }
         return true;
     }
 
@@ -42,33 +51,56 @@ public:
 private:
     static void visit(const ASTFunction & node, const ASTPtr &, Data & data)
     {
-        if (!isAggregateFunction(node.name))
-            return;
+        if (isAggregateFunction(node))
+        {
+            if (data.assert_no_aggregates)
+                throw Exception("Aggregate function " + node.getColumnName()  + " is found " + String(data.assert_no_aggregates) + " in query",
+                                ErrorCodes::ILLEGAL_AGGREGATION);
 
-        if (data.assert_no_aggregates)
-            throw Exception("Aggregate function " + node.getColumnName()  + " is found " + String(data.assert_no_aggregates) + " in query",
-                            ErrorCodes::ILLEGAL_AGGREGATION);
+            String column_name = node.getColumnName();
+            if (data.uniq_names.count(column_name))
+                return;
 
-        String column_name = node.getColumnName();
-        if (data.uniq_names.count(column_name))
-            return;
+            data.uniq_names.insert(column_name);
+            data.aggregates.push_back(&node);
+        }
+        else if (node.is_window_function)
+        {
+            if (data.assert_no_windows)
+                throw Exception("Window function " + node.getColumnName()  + " is found " + String(data.assert_no_windows) + " in query",
+                                ErrorCodes::ILLEGAL_AGGREGATION);
 
-        data.uniq_names.insert(column_name);
-        data.aggregates.push_back(&node);
+            String column_name = node.getColumnName();
+            if (data.uniq_names.count(column_name))
+                return;
+
+            data.uniq_names.insert(column_name);
+            data.window_functions.push_back(&node);
+        }
     }
 
-    static bool isAggregateFunction(const String & name)
+    static bool isAggregateFunction(const ASTFunction & node)
     {
-        return AggregateFunctionFactory::instance().isAggregateFunctionName(name);
+        // Aggregate functions can also be calculated as window functions, but
+        // here we are interested in aggregate functions calculated in GROUP BY.
+        return !node.is_window_function
+            && AggregateFunctionFactory::instance().isAggregateFunctionName(
+                node.name);
     }
 };
 
 using GetAggregatesVisitor = GetAggregatesMatcher::Visitor;
 
 
+inline void assertNoWindows(const ASTPtr & ast, const char * description)
+{
+    GetAggregatesVisitor::Data data{.assert_no_windows = description};
+    GetAggregatesVisitor(data).visit(ast);
+}
+
 inline void assertNoAggregates(const ASTPtr & ast, const char * description)
 {
-    GetAggregatesVisitor::Data data{description, {}, {}};
+    GetAggregatesVisitor::Data data{.assert_no_aggregates = description};
     GetAggregatesVisitor(data).visit(ast);
 }
 
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 1a487f0a861..38cc19a00d6 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -35,36 +35,37 @@
 #include <Interpreters/QueryAliasesVisitor.h>
 
 #include <Processors/Pipe.h>
-#include <Processors/Sources/SourceFromInputStream.h>
-#include <Processors/Sources/NullSource.h>
-#include <Processors/Transforms/ExpressionTransform.h>
-#include <Processors/Transforms/JoiningTransform.h>
-#include <Processors/Transforms/AggregatingTransform.h>
-#include <Processors/Transforms/FilterTransform.h>
-#include <Processors/QueryPlan/ArrayJoinStep.h>
-#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
-#include <Processors/QueryPlan/ExpressionStep.h>
-#include <Processors/QueryPlan/FilterStep.h>
-#include <Processors/QueryPlan/ReadNothingStep.h>
-#include <Processors/QueryPlan/ReadFromPreparedSource.h>
-#include <Processors/QueryPlan/PartialSortingStep.h>
-#include <Processors/QueryPlan/MergeSortingStep.h>
-#include <Processors/QueryPlan/MergingSortedStep.h>
-#include <Processors/QueryPlan/DistinctStep.h>
-#include <Processors/QueryPlan/LimitByStep.h>
-#include <Processors/QueryPlan/LimitStep.h>
-#include <Processors/QueryPlan/MergingAggregatedStep.h>
 #include <Processors/QueryPlan/AddingDelayedSourceStep.h>
 #include <Processors/QueryPlan/AggregatingStep.h>
+#include <Processors/QueryPlan/ArrayJoinStep.h>
 #include <Processors/QueryPlan/CreatingSetsStep.h>
-#include <Processors/QueryPlan/TotalsHavingStep.h>
-#include <Processors/QueryPlan/RollupStep.h>
 #include <Processors/QueryPlan/CubeStep.h>
-#include <Processors/QueryPlan/FillingStep.h>
+#include <Processors/QueryPlan/DistinctStep.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/QueryPlan/ExtremesStep.h>
-#include <Processors/QueryPlan/OffsetStep.h>
+#include <Processors/QueryPlan/FillingStep.h>
+#include <Processors/QueryPlan/FilterStep.h>
 #include <Processors/QueryPlan/FinishSortingStep.h>
+#include <Processors/QueryPlan/LimitByStep.h>
+#include <Processors/QueryPlan/LimitStep.h>
+#include <Processors/QueryPlan/MergeSortingStep.h>
+#include <Processors/QueryPlan/MergingAggregatedStep.h>
+#include <Processors/QueryPlan/MergingSortedStep.h>
+#include <Processors/QueryPlan/OffsetStep.h>
+#include <Processors/QueryPlan/PartialSortingStep.h>
 #include <Processors/QueryPlan/QueryPlan.h>
+#include <Processors/QueryPlan/ReadFromPreparedSource.h>
+#include <Processors/QueryPlan/ReadNothingStep.h>
+#include <Processors/QueryPlan/RollupStep.h>
+#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
+#include <Processors/QueryPlan/TotalsHavingStep.h>
+#include <Processors/QueryPlan/WindowStep.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/Sources/SourceFromInputStream.h>
+#include <Processors/Transforms/AggregatingTransform.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/Transforms/FilterTransform.h>
+#include <Processors/Transforms/JoiningTransform.h>
 
 #include <Storages/MergeTree/MergeTreeData.h>
 #include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
@@ -958,6 +959,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
             else
             {
                 executeExpression(query_plan, expressions.before_order_and_select, "Before ORDER BY and SELECT");
+                executeWindow(query_plan);
                 executeDistinct(query_plan, true, expressions.selected_columns, true);
             }
 
@@ -1004,6 +1006,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
                     executeHaving(query_plan, expressions.before_having);
 
                 executeExpression(query_plan, expressions.before_order_and_select, "Before ORDER BY and SELECT");
+                executeWindow(query_plan);
                 executeDistinct(query_plan, true, expressions.selected_columns, true);
 
             }
@@ -1749,6 +1752,58 @@ void InterpreterSelectQuery::executeExpression(QueryPlan & query_plan, const Act
 }
 
 
+void InterpreterSelectQuery::executeWindow(QueryPlan & query_plan)
+{
+    for (const auto & [_, w] : query_analyzer->windowDescriptions())
+    {
+        const Settings & settings = context->getSettingsRef();
+
+        auto partial_sorting = std::make_unique<PartialSortingStep>(
+            query_plan.getCurrentDataStream(),
+            w.full_sort_description,
+            0 /* LIMIT */,
+            SizeLimits(settings.max_rows_to_sort, settings.max_bytes_to_sort,
+                settings.sort_overflow_mode));
+        partial_sorting->setStepDescription("Sort each block for window '"
+            + w.window_name + "'");
+        query_plan.addStep(std::move(partial_sorting));
+
+        auto merge_sorting_step = std::make_unique<MergeSortingStep>(
+            query_plan.getCurrentDataStream(),
+            w.full_sort_description,
+            settings.max_block_size,
+            0 /* LIMIT */,
+            settings.max_bytes_before_remerge_sort,
+            settings.remerge_sort_lowered_memory_bytes_ratio,
+            settings.max_bytes_before_external_sort,
+            context->getTemporaryVolume(),
+            settings.min_free_disk_space_for_temporary_data);
+        merge_sorting_step->setStepDescription("Merge sorted blocks for window '"
+            + w.window_name + "'");
+        query_plan.addStep(std::move(merge_sorting_step));
+
+        // First MergeSorted, now MergingSorted.
+        auto merging_sorted = std::make_unique<MergingSortedStep>(
+            query_plan.getCurrentDataStream(),
+            w.full_sort_description,
+            settings.max_block_size,
+            0 /* LIMIT */);
+        merging_sorted->setStepDescription("Merge sorted streams for window '"
+            + w.window_name + "'");
+        query_plan.addStep(std::move(merging_sorted));
+
+        auto window_step = std::make_unique<WindowStep>(
+            query_plan.getCurrentDataStream(),
+            w,
+            w.window_functions);
+        window_step->setStepDescription("Window step for window '"
+            + w.window_name + "'");
+
+        query_plan.addStep(std::move(window_step));
+    }
+}
+
+
 void InterpreterSelectQuery::executeOrderOptimized(QueryPlan & query_plan, InputOrderInfoPtr input_sorting_info, UInt64 limit, SortDescription & output_order_descr)
 {
     const Settings & settings = context->getSettingsRef();
@@ -1795,9 +1850,13 @@ void InterpreterSelectQuery::executeOrder(QueryPlan & query_plan, InputOrderInfo
     /// Merge the sorted blocks.
     auto merge_sorting_step = std::make_unique<MergeSortingStep>(
             query_plan.getCurrentDataStream(),
-            output_order_descr, settings.max_block_size, limit,
-            settings.max_bytes_before_remerge_sort, settings.remerge_sort_lowered_memory_bytes_ratio,
-            settings.max_bytes_before_external_sort, context->getTemporaryVolume(),
+            output_order_descr,
+            settings.max_block_size,
+            limit,
+            settings.max_bytes_before_remerge_sort,
+            settings.remerge_sort_lowered_memory_bytes_ratio,
+            settings.max_bytes_before_external_sort,
+            context->getTemporaryVolume(),
             settings.min_free_disk_space_for_temporary_data);
 
     merge_sorting_step->setStepDescription("Merge sorted blocks for ORDER BY");
diff --git a/src/Interpreters/InterpreterSelectQuery.h b/src/Interpreters/InterpreterSelectQuery.h
index d82ea4e1187..1fff316e1d4 100644
--- a/src/Interpreters/InterpreterSelectQuery.h
+++ b/src/Interpreters/InterpreterSelectQuery.h
@@ -120,6 +120,8 @@ private:
     void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final);
     void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression);
     static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description);
+    /// FIXME should go through ActionsDAG to behave as a proper function
+    void executeWindow(QueryPlan & query_plan);
     void executeOrder(QueryPlan & query_plan, InputOrderInfoPtr sorting_info);
     void executeOrderOptimized(QueryPlan & query_plan, InputOrderInfoPtr sorting_info, UInt64 limit, SortDescription & output_order_descr);
     void executeWithFill(QueryPlan & query_plan);
diff --git a/src/Interpreters/MonotonicityCheckVisitor.h b/src/Interpreters/MonotonicityCheckVisitor.h
index 137f8d25b4a..87571a44eb0 100644
--- a/src/Interpreters/MonotonicityCheckVisitor.h
+++ b/src/Interpreters/MonotonicityCheckVisitor.h
@@ -43,9 +43,14 @@ public:
             if (group_by_function_hashes.count(key))
                 return false;
 
-            /// if ORDER BY contains aggregate function it shouldn't be optimized
-            if (AggregateFunctionFactory::instance().isAggregateFunctionName(ast_function.name))
+            /// if ORDER BY contains aggregate function or window functions, it
+            /// shouldn't be optimized
+            if (ast_function.is_window_function
+                || AggregateFunctionFactory::instance().isAggregateFunctionName(
+                    ast_function.name))
+            {
                 return false;
+            }
 
             return true;
         }
diff --git a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
index e5263f54696..7f4da0638d4 100644
--- a/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersMySQLDDLQuery.cpp
@@ -202,10 +202,10 @@ static inline std::tuple<NamesAndTypesList, NamesAndTypesList, NamesAndTypesList
                 keys->arguments->children.insert(keys->arguments->children.end(),
                     index_columns->children.begin(), index_columns->children.end());
             else if (startsWith(declare_index->index_type, "UNIQUE_"))
-                unique_keys->arguments->children.insert(keys->arguments->children.end(),
+                unique_keys->arguments->children.insert(unique_keys->arguments->children.end(),
                     index_columns->children.begin(), index_columns->children.end());
             if (startsWith(declare_index->index_type, "PRIMARY_KEY_"))
-                primary_keys->arguments->children.insert(keys->arguments->children.end(),
+                primary_keys->arguments->children.insert(primary_keys->arguments->children.end(),
                     index_columns->children.begin(), index_columns->children.end());
         }
     }
diff --git a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp
index d7209330ec6..0d8e57aafc5 100644
--- a/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp
+++ b/src/Interpreters/MySQL/tests/gtest_create_rewritten.cpp
@@ -195,3 +195,15 @@ TEST(MySQLCreateRewritten, RewrittenQueryWithPrefixKey)
         "ReplacingMergeTree(_version) PARTITION BY intDiv(key, 4294967) ORDER BY (key, prefix_key)");
 }
 
+TEST(MySQLCreateRewritten, UniqueKeysConvert)
+{
+    tryRegisterFunctions();
+    const auto & context_holder = getContext();
+
+    EXPECT_EQ(queryToString(tryRewrittenCreateQuery(
+        "CREATE TABLE `test_database`.`test_table_1` (code varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,name varchar(255) CHARACTER SET utf8 COLLATE utf8_bin NOT NULL,"
+        " id bigint NOT NULL AUTO_INCREMENT, tenant_id bigint NOT NULL, PRIMARY KEY (id), UNIQUE KEY code_id (code, tenant_id), UNIQUE KEY name_id (name, tenant_id))"
+        " ENGINE=InnoDB AUTO_INCREMENT=100 DEFAULT CHARSET=utf8 COLLATE=utf8_bin;", context_holder.context)),
+        "CREATE TABLE test_database.test_table_1 (`code` String, `name` String, `id` Int64, `tenant_id` Int64, `_sign` Int8() MATERIALIZED 1, `_version` UInt64() MATERIALIZED 1)"
+        " ENGINE = ReplacingMergeTree(_version) PARTITION BY intDiv(id, 18446744073709551) ORDER BY (code, name, tenant_id, id)");
+}
diff --git a/src/Interpreters/RewriteAnyFunctionVisitor.cpp b/src/Interpreters/RewriteAnyFunctionVisitor.cpp
index 7c3f1bf31b2..e8f05962862 100644
--- a/src/Interpreters/RewriteAnyFunctionVisitor.cpp
+++ b/src/Interpreters/RewriteAnyFunctionVisitor.cpp
@@ -38,8 +38,16 @@ bool extractIdentifiers(const ASTFunction & func, std::unordered_set<ASTPtr *> &
             if (arg_func->name == "lambda")
                 return false;
 
-            if (AggregateFunctionFactory::instance().isAggregateFunctionName(arg_func->name))
+            // We are looking for identifiers inside a function calculated inside
+            // the aggregate function `any()`. Window or aggregate function can't
+            // be inside `any`, but this check in GetAggregatesMatcher happens
+            // later, so we have to explicitly skip these nested functions here.
+            if (arg_func->is_window_function
+                || AggregateFunctionFactory::instance().isAggregateFunctionName(
+                    arg_func->name))
+            {
                 return false;
+            }
 
             if (!extractIdentifiers(*arg_func, identifiers))
                 return false;
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 1ad22efc3fc..9816d1a3940 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -286,6 +286,17 @@ void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query,
         {
             new_elements.push_back(elem);
         }
+        else
+        {
+            ASTFunction * func = elem->as<ASTFunction>();
+            if (func && func->name == "untuple")
+                for (const auto & col : required_result_columns)
+                    if (col.rfind("_ut_", 0) == 0)
+                    {
+                        new_elements.push_back(elem);
+                        break;
+                    }
+        }
     }
 
     elements = std::move(new_elements);
@@ -428,12 +439,46 @@ std::vector<const ASTFunction *> getAggregates(ASTPtr & query, const ASTSelectQu
 
     /// There can not be other aggregate functions within the aggregate functions.
     for (const ASTFunction * node : data.aggregates)
+    {
         if (node->arguments)
+        {
             for (auto & arg : node->arguments->children)
+            {
                 assertNoAggregates(arg, "inside another aggregate function");
+                assertNoWindows(arg, "inside an aggregate function");
+            }
+        }
+    }
     return data.aggregates;
 }
 
+std::vector<const ASTFunction *> getWindowFunctions(ASTPtr & query, const ASTSelectQuery & select_query)
+{
+    /// There can not be window functions inside the WHERE and PREWHERE.
+    if (select_query.where())
+        assertNoWindows(select_query.where(), "in WHERE");
+    if (select_query.prewhere())
+        assertNoWindows(select_query.prewhere(), "in PREWHERE");
+
+    GetAggregatesVisitor::Data data;
+    GetAggregatesVisitor(data).visit(query);
+
+    /// There can not be other window functions within the aggregate functions.
+    for (const ASTFunction * node : data.window_functions)
+    {
+        if (node->arguments)
+        {
+            for (auto & arg : node->arguments->children)
+            {
+                assertNoAggregates(arg, "inside a window function");
+                assertNoWindows(arg, "inside another window function");
+            }
+        }
+    }
+
+    return data.window_functions;
+}
+
 }
 
 TreeRewriterResult::TreeRewriterResult(
@@ -624,14 +669,24 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
         for (const auto & name : columns_context.requiredColumns())
             ss << " '" << name << "'";
 
-        if (!source_column_names.empty())
+        if (storage)
         {
-            ss << ", source columns:";
-            for (const auto & name : source_column_names)
-                ss << " '" << name << "'";
+            ss << ", maybe you meant: ";
+            for (const auto & name : columns_context.requiredColumns())
+            {
+                auto hints = storage->getHints(name);
+                if (!hints.empty())
+                    ss << " '" << toString(hints) << "'";
+            }
         }
         else
-            ss << ", no source columns";
+        {
+            if (!source_column_names.empty())
+                for (const auto & name : columns_context.requiredColumns())
+                    ss << " '" << name << "'";
+            else
+                ss << ", no source columns";
+        }
 
         if (columns_context.has_table_join)
         {
@@ -717,6 +772,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases);
 
     result.aggregates = getAggregates(query, *select_query);
+    result.window_function_asts = getWindowFunctions(query, *select_query);
     result.collectUsedColumns(query, true);
     result.ast_join = select_query->join();
 
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index 4075be4326d..d9f98ee40bd 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -35,6 +35,8 @@ struct TreeRewriterResult
     Aliases aliases;
     std::vector<const ASTFunction *> aggregates;
 
+    std::vector<const ASTFunction *> window_function_asts;
+
     /// Which column is needed to be ARRAY-JOIN'ed to get the specified.
     /// For example, for `SELECT s.v ... ARRAY JOIN a AS s` will get "s.v" -> "a.v".
     NameToNameMap array_join_result_to_source;
diff --git a/src/Interpreters/tests/CMakeLists.txt b/src/Interpreters/tests/CMakeLists.txt
index 1bc9d7fbacb..2c8440299fb 100644
--- a/src/Interpreters/tests/CMakeLists.txt
+++ b/src/Interpreters/tests/CMakeLists.txt
@@ -32,6 +32,9 @@ target_link_libraries (string_hash_map_aggregation PRIVATE dbms)
 add_executable (string_hash_set string_hash_set.cpp)
 target_link_libraries (string_hash_set PRIVATE dbms)
 
+add_executable (context context.cpp)
+target_link_libraries (context PRIVATE dbms)
+
 add_executable (two_level_hash_map two_level_hash_map.cpp)
 target_include_directories (two_level_hash_map SYSTEM BEFORE PRIVATE ${SPARSEHASH_INCLUDE_DIR})
 target_link_libraries (two_level_hash_map PRIVATE dbms)
diff --git a/src/Interpreters/tests/context.cpp b/src/Interpreters/tests/context.cpp
new file mode 100644
index 00000000000..9b908e26248
--- /dev/null
+++ b/src/Interpreters/tests/context.cpp
@@ -0,0 +1,90 @@
+#include <iostream>
+/// #define BOOST_USE_UCONTEXT
+#include <Common/Fiber.h>
+// #include <boost/context/pooled_fixedsize_stack.hpp>
+// #include <boost/context/segmented_stack.hpp>
+#include <Common/Exception.h>
+#include <Common/FiberStack.h>
+
+void __attribute__((__noinline__)) foo(std::exception_ptr exception)
+{
+    if (exception)
+        std::rethrow_exception(exception);
+}
+
+void __attribute__((__noinline__)) bar(int a)
+{
+    std::cout << StackTrace().toString() << std::endl;
+
+    if (a > 0)
+        throw DB::Exception(0, "hello");
+}
+
+void __attribute__((__noinline__)) gar(int a)
+{
+    char buf[1024];
+    buf[1023] = a & 255;
+    if (a > 2)
+        return gar(a - 1);
+    else
+        bar(a);
+}
+
+int main(int, char **)
+try {
+    namespace ctx=boost::context;
+    int a;
+    std::exception_ptr exception;
+    // ctx::protected_fixedsize allocator
+    // ctx::pooled_fixedsize_stack(1024 * 64 + 2 * 2 * 1024 * 1024 * 16, 1)
+    ctx::fiber source{std::allocator_arg_t(), FiberStack(), [&](ctx::fiber&& sink)
+    {
+        a=0;
+        int b=1;
+        for (size_t i = 0; i < 9; ++i)
+        {
+            sink=std::move(sink).resume();
+            int next=a+b;
+            a=b;
+            b=next;
+        }
+        try
+        {
+            gar(1024);
+        }
+        catch (...)
+        {
+            std::cout << "Saving exception\n";
+            exception = std::current_exception();
+        }
+        return std::move(sink);
+    }};
+
+    for (int j=0;j<10;++j)
+    {
+        try
+        {
+            source=std::move(source).resume();
+        }
+        catch (DB::Exception & e)
+        {
+            std::cout << "Caught exception in resume " << e.getStackTraceString() << std::endl;
+        }
+        std::cout << a << " ";
+    }
+
+    std::cout << std::endl;
+
+    try
+    {
+        foo(exception);
+    }
+    catch (const DB::Exception & e)
+    {
+        std::cout << e.getStackTraceString() << std::endl;
+    }
+}
+catch (...)
+{
+    std::cerr << "Uncaught exception\n";
+}
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index c86a8e5c139..d5d03a540c9 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -1,14 +1,15 @@
-#include <Common/typeid_cast.h>
-#include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
-#include <Parsers/ASTWithAlias.h>
-#include <Parsers/ASTSubquery.h>
-#include <Parsers/ASTExpressionList.h>
-#include <IO/WriteHelpers.h>
-#include <IO/WriteBufferFromString.h>
-#include <Common/SipHash.h>
-#include <IO/Operators.h>
 
+#include <Common/SipHash.h>
+#include <Common/typeid_cast.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+#include <IO/WriteHelpers.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSubquery.h>
+#include <Parsers/ASTWithAlias.h>
 
 namespace DB
 {
@@ -54,6 +55,21 @@ ASTPtr ASTFunction::clone() const
     if (arguments) { res->arguments = arguments->clone(); res->children.push_back(res->arguments); }
     if (parameters) { res->parameters = parameters->clone(); res->children.push_back(res->parameters); }
 
+    if (window_name)
+    {
+        res->set(res->window_name, window_name->clone());
+    }
+
+    if (window_partition_by)
+    {
+        res->set(res->window_partition_by, window_partition_by->clone());
+    }
+
+    if (window_order_by)
+    {
+        res->set(res->window_order_by, window_order_by->clone());
+    }
+
     return res;
 }
 
@@ -411,44 +427,91 @@ void ASTFunction::formatImplWithoutAlias(const FormatSettings & settings, Format
         }
     }
 
-    if (!written)
+    if (written)
     {
-        settings.ostr << (settings.hilite ? hilite_function : "") << name;
+        return;
+    }
 
-        if (parameters)
+    settings.ostr << (settings.hilite ? hilite_function : "") << name;
+
+    if (parameters)
+    {
+        settings.ostr << '(' << (settings.hilite ? hilite_none : "");
+        parameters->formatImpl(settings, state, nested_dont_need_parens);
+        settings.ostr << (settings.hilite ? hilite_function : "") << ')';
+    }
+
+    if ((arguments && !arguments->children.empty()) || !no_empty_args)
+        settings.ostr << '(' << (settings.hilite ? hilite_none : "");
+
+    if (arguments)
+    {
+        bool special_hilite_regexp = settings.hilite
+            && (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne"
+                || name == "replaceRegexpAll");
+
+        for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
         {
-            settings.ostr << '(' << (settings.hilite ? hilite_none : "");
-            parameters->formatImpl(settings, state, nested_dont_need_parens);
-            settings.ostr << (settings.hilite ? hilite_function : "") << ')';
+            if (i != 0)
+                settings.ostr << ", ";
+
+            bool special_hilite = false;
+            if (i == 1 && special_hilite_regexp)
+                special_hilite = highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-");
+
+            if (!special_hilite)
+                arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
         }
+    }
 
-        if ((arguments && !arguments->children.empty()) || !no_empty_args)
-            settings.ostr << '(' << (settings.hilite ? hilite_none : "");
+    if ((arguments && !arguments->children.empty()) || !no_empty_args)
+        settings.ostr << (settings.hilite ? hilite_function : "") << ')';
 
-        if (arguments)
-        {
-            bool special_hilite_regexp = settings.hilite
-                && (name == "match" || name == "extract" || name == "extractAll" || name == "replaceRegexpOne"
-                    || name == "replaceRegexpAll");
+    settings.ostr << (settings.hilite ? hilite_none : "");
 
-            for (size_t i = 0, size = arguments->children.size(); i < size; ++i)
-            {
-                if (i != 0)
-                    settings.ostr << ", ";
+    if (!is_window_function)
+    {
+        return;
+    }
 
-                bool special_hilite = false;
-                if (i == 1 && special_hilite_regexp)
-                    special_hilite = highlightStringLiteralWithMetacharacters(arguments->children[i], settings, "|()^$.[]?*+{:-");
+    settings.ostr << " OVER (";
+    appendWindowDescription(settings, state, nested_dont_need_parens);
+    settings.ostr << ")";
+}
 
-                if (!special_hilite)
-                    arguments->children[i]->formatImpl(settings, state, nested_dont_need_parens);
-            }
-        }
+std::string ASTFunction::getWindowDescription() const
+{
+    WriteBufferFromOwnString ostr;
+    FormatSettings settings{ostr, true /* one_line */};
+    FormatState state;
+    FormatStateStacked frame;
+    appendWindowDescription(settings, state, frame);
+    return ostr.str();
+}
 
-        if ((arguments && !arguments->children.empty()) || !no_empty_args)
-            settings.ostr << (settings.hilite ? hilite_function : "") << ')';
+void ASTFunction::appendWindowDescription(const FormatSettings & settings,
+    FormatState & state, FormatStateStacked frame) const
+{
+    if (!is_window_function)
+    {
+        return;
+    }
 
-        settings.ostr << (settings.hilite ? hilite_none : "");
+    if (window_partition_by)
+    {
+        settings.ostr << "PARTITION BY ";
+        window_partition_by->formatImpl(settings, state, frame);
+    }
+
+    if (window_partition_by && window_order_by)
+    {
+        settings.ostr << " ";
+    }
+
+    if (window_order_by)
+    {
+        settings.ostr << "ORDER BY ";
+        window_order_by->formatImpl(settings, state, frame);
     }
 }
 
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index a6e3834ac1a..38e5f3f095c 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -8,6 +8,8 @@
 namespace DB
 {
 
+class ASTIdentifier;
+
 /** AST for function application or operator.
   */
 class ASTFunction : public ASTWithAlias
@@ -18,6 +20,11 @@ public:
     /// parameters - for parametric aggregate function. Example: quantile(0.9)(x) - what in first parens are 'parameters'.
     ASTPtr parameters;
 
+    bool is_window_function = false;
+    ASTIdentifier * window_name;
+    ASTExpressionList * window_partition_by;
+    ASTExpressionList * window_order_by;
+
     /// do not print empty parentheses if there are no args - compatibility with new AST for data types and engine names.
     bool no_empty_args = false;
 
@@ -32,6 +39,11 @@ public:
 
     ASTPtr toLiteral() const;  // Try to convert functions like Array or Tuple to a literal form.
 
+    void appendWindowDescription(const FormatSettings & settings,
+        FormatState & state, FormatStateStacked frame) const;
+
+    std::string getWindowDescription() const;
+
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ASTIndexDeclaration.cpp b/src/Parsers/ASTIndexDeclaration.cpp
index 0e8f0d0f7e8..d8ebf825674 100644
--- a/src/Parsers/ASTIndexDeclaration.cpp
+++ b/src/Parsers/ASTIndexDeclaration.cpp
@@ -1,6 +1,8 @@
 #include <Parsers/ASTIndexDeclaration.h>
+
 #include <Common/quoteString.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTFunction.h>
 
 
 namespace DB
diff --git a/src/Parsers/ASTIndexDeclaration.h b/src/Parsers/ASTIndexDeclaration.h
index 64ef6eb2db1..8416ec6b0a6 100644
--- a/src/Parsers/ASTIndexDeclaration.h
+++ b/src/Parsers/ASTIndexDeclaration.h
@@ -1,12 +1,12 @@
 #pragma once
 
-#include <Parsers/ASTFunction.h>
 #include <Parsers/IAST.h>
 
-
 namespace DB
 {
 
+class ASTFunction;
+
 /** name BY expr TYPE typename(args) GRANULARITY int in create query
   */
 class ASTIndexDeclaration : public IAST
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 21dd6274739..726e28005e3 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -263,6 +263,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
     ParserKeyword distinct("DISTINCT");
     ParserExpressionList contents(false);
     ParserSelectWithUnionQuery select;
+    ParserKeyword over("OVER");
 
     bool has_distinct_modifier = false;
 
@@ -382,10 +383,96 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         function_node->children.push_back(function_node->parameters);
     }
 
+    if (over.ignore(pos, expected))
+    {
+        function_node->is_window_function = true;
+
+        // We are slightly breaking the parser interface by parsing the window
+        // definition into an existing ASTFunction. Normally it would take a
+        // reference to ASTPtr and assign it the new node. We only have a pointer
+        // of a different type, hence this workaround with a temporary pointer.
+        ASTPtr function_node_as_iast = function_node;
+
+        ParserWindowDefinition window_definition;
+        if (!window_definition.parse(pos, function_node_as_iast, expected))
+        {
+            return false;
+        }
+    }
+
     node = function_node;
     return true;
 }
 
+bool ParserWindowDefinition::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
+{
+    ASTFunction * function = dynamic_cast<ASTFunction *>(node.get());
+
+    // Variant 1:
+    // function_name ( * ) OVER window_name
+    // FIXME doesn't work anyway for now -- never used anywhere, window names
+    // can't be defined, and TreeRewriter thinks the window name is a column so
+    // the query fails.
+    if (pos->type != TokenType::OpeningRoundBracket)
+    {
+        ASTPtr window_name_ast;
+        ParserIdentifier window_name_parser;
+        if (window_name_parser.parse(pos, window_name_ast, expected))
+        {
+            function->set(function->window_name, window_name_ast);
+            return true;
+        }
+        else
+        {
+            return false;
+        }
+    }
+    ++pos;
+
+    // Variant 2:
+    // function_name ( * ) OVER ( window_definition )
+    ParserKeyword keyword_partition_by("PARTITION BY");
+    ParserNotEmptyExpressionList columns_partition_by(
+        false /* we don't allow declaring aliases here*/);
+    ParserKeyword keyword_order_by("ORDER BY");
+    ParserOrderByExpressionList columns_order_by;
+
+    if (keyword_partition_by.ignore(pos, expected))
+    {
+        ASTPtr partition_by_ast;
+        if (columns_partition_by.parse(pos, partition_by_ast, expected))
+        {
+            function->set(function->window_partition_by, partition_by_ast);
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    if (keyword_order_by.ignore(pos, expected))
+    {
+        ASTPtr order_by_ast;
+        if (columns_order_by.parse(pos, order_by_ast, expected))
+        {
+            function->set(function->window_order_by, order_by_ast);
+        }
+        else
+        {
+            return false;
+        }
+    }
+
+    if (pos->type != TokenType::ClosingRoundBracket)
+    {
+        expected.add(pos, "')'");
+        return false;
+    }
+    ++pos;
+
+    return true;
+}
+
 bool ParserCodecDeclarationList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     return ParserList(std::make_unique<ParserIdentifierWithOptionalParameters>(),
diff --git a/src/Parsers/ExpressionElementParsers.h b/src/Parsers/ExpressionElementParsers.h
index 6de32ab731d..917f084a700 100644
--- a/src/Parsers/ExpressionElementParsers.h
+++ b/src/Parsers/ExpressionElementParsers.h
@@ -156,6 +156,13 @@ protected:
     bool allow_function_parameters;
 };
 
+// Window definition (the thing that goes after OVER) for window function.
+class ParserWindowDefinition : public IParserBase
+{
+    const char * getName() const override { return "window definition"; }
+    bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
+};
+
 class ParserCodecDeclarationList : public IParserBase
 {
 protected:
diff --git a/src/Parsers/IAST.cpp b/src/Parsers/IAST.cpp
index 56fffe3891a..e223235b8e4 100644
--- a/src/Parsers/IAST.cpp
+++ b/src/Parsers/IAST.cpp
@@ -161,4 +161,11 @@ void IAST::dumpTree(WriteBuffer & ostr, size_t indent) const
     }
 }
 
+std::string IAST::dumpTree(size_t indent) const
+{
+    WriteBufferFromOwnString wb;
+    dumpTree(wb, indent);
+    return wb.str();
+}
+
 }
diff --git a/src/Parsers/IAST.h b/src/Parsers/IAST.h
index 01ce4971c45..bed6c5bcdf9 100644
--- a/src/Parsers/IAST.h
+++ b/src/Parsers/IAST.h
@@ -19,9 +19,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int NOT_A_COLUMN;
-    extern const int UNKNOWN_TYPE_OF_AST_NODE;
-    extern const int UNKNOWN_ELEMENT_IN_AST;
     extern const int LOGICAL_ERROR;
 }
 
@@ -46,7 +43,7 @@ public:
     String getColumnName() const;
     virtual void appendColumnName(WriteBuffer &) const
     {
-        throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::NOT_A_COLUMN);
+        throw Exception("Trying to get name of not a column: " + getID(), ErrorCodes::LOGICAL_ERROR);
     }
 
     /** Get the alias, if any, or the canonical name of the column, if it is not. */
@@ -58,7 +55,7 @@ public:
     /** Set the alias. */
     virtual void setAlias(const String & /*to*/)
     {
-        throw Exception("Can't set alias of " + getColumnName(), ErrorCodes::UNKNOWN_TYPE_OF_AST_NODE);
+        throw Exception("Can't set alias of " + getColumnName(), ErrorCodes::LOGICAL_ERROR);
     }
 
     /** Get the text that identifies this element. */
@@ -77,6 +74,7 @@ public:
     virtual void updateTreeHashImpl(SipHash & hash_state) const;
 
     void dumpTree(WriteBuffer & ostr, size_t indent = 0) const;
+    std::string dumpTree(size_t indent = 0) const;
 
     /** Check the depth of the tree.
       * If max_depth is specified and the depth is greater - throw an exception.
@@ -160,6 +158,7 @@ public:
         bool always_quote_identifiers = false;
         IdentifierQuotingStyle identifier_quoting_style = IdentifierQuotingStyle::Backticks;
 
+        // Newline or whitespace.
         char nl_or_ws;
 
         FormatSettings(WriteBuffer & ostr_, bool one_line_)
@@ -208,7 +207,7 @@ public:
 
     virtual void formatImpl(const FormatSettings & /*settings*/, FormatState & /*state*/, FormatStateStacked /*frame*/) const
     {
-        throw Exception("Unknown element in AST: " + getID(), ErrorCodes::UNKNOWN_ELEMENT_IN_AST);
+        throw Exception("Unknown element in AST: " + getID(), ErrorCodes::LOGICAL_ERROR);
     }
 
     // A simple way to add some user-readable context to an error message.
diff --git a/src/Parsers/New/AST/TableElementExpr.cpp b/src/Parsers/New/AST/TableElementExpr.cpp
index 1336ea06b27..d994ea7eef6 100644
--- a/src/Parsers/New/AST/TableElementExpr.cpp
+++ b/src/Parsers/New/AST/TableElementExpr.cpp
@@ -2,6 +2,7 @@
 
 #include <Parsers/ASTColumnDeclaration.h>
 #include <Parsers/ASTConstraintDeclaration.h>
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/New/AST/ColumnExpr.h>
 #include <Parsers/New/AST/ColumnTypeExpr.h>
diff --git a/src/Processors/Executors/PipelineExecutor.cpp b/src/Processors/Executors/PipelineExecutor.cpp
index 517e07a3ba4..1646a1a01fe 100644
--- a/src/Processors/Executors/PipelineExecutor.cpp
+++ b/src/Processors/Executors/PipelineExecutor.cpp
@@ -164,7 +164,7 @@ bool PipelineExecutor::expandPipeline(Stack & stack, UInt64 pid)
     return true;
 }
 
-bool PipelineExecutor::tryAddProcessorToStackIfUpdated(ExecutingGraph::Edge & edge, Queue & queue, size_t thread_number)
+bool PipelineExecutor::tryAddProcessorToStackIfUpdated(ExecutingGraph::Edge & edge, Queue & queue, Queue & async_queue, size_t thread_number)
 {
     /// In this method we have ownership on edge, but node can be concurrently accessed.
 
@@ -185,7 +185,7 @@ bool PipelineExecutor::tryAddProcessorToStackIfUpdated(ExecutingGraph::Edge & ed
     if (status == ExecutingGraph::ExecStatus::Idle)
     {
         node.status = ExecutingGraph::ExecStatus::Preparing;
-        return prepareProcessor(edge.to, thread_number, queue, std::move(lock));
+        return prepareProcessor(edge.to, thread_number, queue, async_queue, std::move(lock));
     }
     else
         graph->nodes[edge.to]->processor->onUpdatePorts();
@@ -193,7 +193,7 @@ bool PipelineExecutor::tryAddProcessorToStackIfUpdated(ExecutingGraph::Edge & ed
     return true;
 }
 
-bool PipelineExecutor::prepareProcessor(UInt64 pid, size_t thread_number, Queue & queue, std::unique_lock<std::mutex> node_lock)
+bool PipelineExecutor::prepareProcessor(UInt64 pid, size_t thread_number, Queue & queue, Queue & async_queue, std::unique_lock<std::mutex> node_lock)
 {
     /// In this method we have ownership on node.
     auto & node = *graph->nodes[pid];
@@ -248,15 +248,9 @@ bool PipelineExecutor::prepareProcessor(UInt64 pid, size_t thread_number, Queue
             }
             case IProcessor::Status::Async:
             {
-                throw Exception("Async is temporary not supported.", ErrorCodes::LOGICAL_ERROR);
-
-//            node.status = ExecStatus::Executing;
-//            addAsyncJob(pid);
-//            break;
-            }
-            case IProcessor::Status::Wait:
-            {
-                throw Exception("Wait is temporary not supported.", ErrorCodes::LOGICAL_ERROR);
+                node.status = ExecutingGraph::ExecStatus::Executing;
+                async_queue.push(&node);
+                break;
             }
             case IProcessor::Status::ExpandPipeline:
             {
@@ -288,13 +282,13 @@ bool PipelineExecutor::prepareProcessor(UInt64 pid, size_t thread_number, Queue
     {
         for (auto & edge : updated_direct_edges)
         {
-            if (!tryAddProcessorToStackIfUpdated(*edge, queue, thread_number))
+            if (!tryAddProcessorToStackIfUpdated(*edge, queue, async_queue, thread_number))
                 return false;
         }
 
         for (auto & edge : updated_back_edges)
         {
-            if (!tryAddProcessorToStackIfUpdated(*edge, queue, thread_number))
+            if (!tryAddProcessorToStackIfUpdated(*edge, queue, async_queue, thread_number))
                 return false;
         }
     }
@@ -325,7 +319,7 @@ bool PipelineExecutor::prepareProcessor(UInt64 pid, size_t thread_number, Queue
         while (!stack.empty())
         {
             auto item = stack.top();
-            if (!prepareProcessor(item, thread_number, queue, std::unique_lock<std::mutex>(graph->nodes[item]->status_mutex)))
+            if (!prepareProcessor(item, thread_number, queue, async_queue, std::unique_lock<std::mutex>(graph->nodes[item]->status_mutex)))
                 return false;
 
             stack.pop();
@@ -378,6 +372,7 @@ void PipelineExecutor::finish()
     {
         std::lock_guard lock(task_queue_mutex);
         finished = true;
+        async_task_queue.finish();
     }
 
     std::lock_guard guard(executor_contexts_mutex);
@@ -502,11 +497,21 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st
             {
                 std::unique_lock lock(task_queue_mutex);
 
-                if (!task_queue.empty())
+                if (!context->async_tasks.empty())
                 {
+                    node = context->async_tasks.front();
+                    context->async_tasks.pop();
+                    --num_waiting_async_tasks;
+
+                    if (context->async_tasks.empty())
+                        context->has_async_tasks = false;
+                }
+                else if (!task_queue.empty())
                     node = task_queue.pop(thread_num);
 
-                    if (!task_queue.empty() && !threads_queue.empty() /*&& task_queue.quota() > threads_queue.size()*/)
+                if (node)
+                {
+                    if (!task_queue.empty() && !threads_queue.empty())
                     {
                         auto thread_to_wake = task_queue.getAnyThreadWithTasks(thread_num + 1 == num_threads ? 0 : (thread_num + 1));
 
@@ -522,13 +527,26 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st
                     break;
                 }
 
-                if (threads_queue.size() + 1 == num_threads)
+                if (threads_queue.size() + 1 == num_threads && async_task_queue.empty() && num_waiting_async_tasks == 0)
                 {
                     lock.unlock();
                     finish();
                     break;
                 }
 
+#if defined(OS_LINUX)
+                if (num_threads == 1)
+                {
+                    /// If we execute in single thread, wait for async tasks here.
+                    auto res = async_task_queue.wait(lock);
+                    if (!res)
+                        throw Exception("Empty task was returned from async task queue", ErrorCodes::LOGICAL_ERROR);
+
+                    node = static_cast<ExecutingGraph::Node *>(res.data);
+                    break;
+                }
+#endif
+
                 threads_queue.push(thread_num);
             }
 
@@ -579,6 +597,7 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st
             /// Try to execute neighbour processor.
             {
                 Queue queue;
+                Queue async_queue;
 
                 ++num_processing_executors;
                 while (auto * task = expand_pipeline_task.load())
@@ -587,31 +606,39 @@ void PipelineExecutor::executeStepImpl(size_t thread_num, size_t num_threads, st
                 /// Prepare processor after execution.
                 {
                     auto lock = std::unique_lock<std::mutex>(node->status_mutex);
-                    if (!prepareProcessor(node->processors_id, thread_num, queue, std::move(lock)))
+                    if (!prepareProcessor(node->processors_id, thread_num, queue, async_queue, std::move(lock)))
                         finish();
                 }
 
                 node = nullptr;
 
                 /// Take local task from queue if has one.
-                if (!queue.empty())
+                if (!queue.empty() && !context->has_async_tasks)
                 {
                     node = queue.front();
                     queue.pop();
                 }
 
                 /// Push other tasks to global queue.
-                if (!queue.empty())
+                if (!queue.empty() || !async_queue.empty())
                 {
                     std::unique_lock lock(task_queue_mutex);
 
+#if defined(OS_LINUX)
+                    while (!async_queue.empty() && !finished)
+                    {
+                        async_task_queue.addTask(thread_num, async_queue.front(), async_queue.front()->processor->schedule());
+                        async_queue.pop();
+                    }
+#endif
+
                     while (!queue.empty() && !finished)
                     {
                         task_queue.push(queue.front(), thread_num);
                         queue.pop();
                     }
 
-                    if (!threads_queue.empty() && !finished /* && task_queue.quota() > threads_queue.size()*/)
+                    if (!threads_queue.empty() && !task_queue.empty() && !finished)
                     {
                         auto thread_to_wake = task_queue.getAnyThreadWithTasks(thread_num + 1 == num_threads ? 0 : (thread_num + 1));
 
@@ -669,6 +696,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads)
         std::lock_guard lock(task_queue_mutex);
 
         Queue queue;
+        Queue async_queue;
         size_t next_thread = 0;
 
         while (!stack.empty())
@@ -676,7 +704,7 @@ void PipelineExecutor::initializeExecution(size_t num_threads)
             UInt64 proc = stack.top();
             stack.pop();
 
-            prepareProcessor(proc, 0, queue, std::unique_lock<std::mutex>(graph->nodes[proc]->status_mutex));
+            prepareProcessor(proc, 0, queue, async_queue, std::unique_lock<std::mutex>(graph->nodes[proc]->status_mutex));
 
             while (!queue.empty())
             {
@@ -687,6 +715,10 @@ void PipelineExecutor::initializeExecution(size_t num_threads)
                 if (next_thread >= num_threads)
                     next_thread = 0;
             }
+
+            while (!async_queue.empty())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Async is only possible after work() call. Processor {}",
+                                async_queue.front()->processor->getName());
         }
     }
 }
@@ -747,6 +779,26 @@ void PipelineExecutor::executeImpl(size_t num_threads)
             });
         }
 
+#if defined(OS_LINUX)
+        {
+            /// Wait for async tasks.
+            std::unique_lock lock(task_queue_mutex);
+            while (auto task = async_task_queue.wait(lock))
+            {
+                auto * node = static_cast<ExecutingGraph::Node *>(task.data);
+                executor_contexts[task.thread_num]->async_tasks.push(node);
+                executor_contexts[task.thread_num]->has_async_tasks = true;
+                ++num_waiting_async_tasks;
+
+                if (threads_queue.has(task.thread_num))
+                {
+                    threads_queue.pop(task.thread_num);
+                    wakeUpExecutor(task.thread_num);
+                }
+            }
+        }
+#endif
+
         for (auto & thread : threads)
             if (thread.joinable())
                 thread.join();
diff --git a/src/Processors/Executors/PipelineExecutor.h b/src/Processors/Executors/PipelineExecutor.h
index b457cca34b1..213446ad43f 100644
--- a/src/Processors/Executors/PipelineExecutor.h
+++ b/src/Processors/Executors/PipelineExecutor.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Processors/IProcessor.h>
+#include <Processors/Executors/PollingQueue.h>
 #include <Processors/Executors/ThreadsQueue.h>
 #include <Processors/Executors/TasksQueue.h>
 #include <Processors/Executors/ExecutingGraph.h>
@@ -57,6 +58,12 @@ private:
     /// Stores processors need to be prepared. Preparing status is already set for them.
     TaskQueue<ExecutingGraph::Node> task_queue;
 
+    /// Queue which stores tasks where processors returned Async status after prepare.
+    /// If multiple threads are using, main thread will wait for async tasks.
+    /// For single thread, will wait for async tasks only when task_queue is empty.
+    PollingQueue async_task_queue;
+    size_t num_waiting_async_tasks = 0;
+
     ThreadsQueue threads_queue;
     std::mutex task_queue_mutex;
 
@@ -90,6 +97,9 @@ private:
         /// This can be solved by using atomic shard ptr.
         std::list<ExpandPipelineTask> task_list;
 
+        std::queue<ExecutingGraph::Node *> async_tasks;
+        std::atomic_bool has_async_tasks = false;
+
         std::condition_variable condvar;
         std::mutex mutex;
         bool wake_flag = false;
@@ -126,14 +136,14 @@ private:
 
     /// Pipeline execution related methods.
     void addChildlessProcessorsToStack(Stack & stack);
-    bool tryAddProcessorToStackIfUpdated(ExecutingGraph::Edge & edge, Queue & queue, size_t thread_number);
+    bool tryAddProcessorToStackIfUpdated(ExecutingGraph::Edge & edge, Queue & queue, Queue & async_queue, size_t thread_number);
     static void addJob(ExecutingGraph::Node * execution_state);
     // TODO: void addAsyncJob(UInt64 pid);
 
     /// Prepare processor with pid number.
     /// Check parents and children of current processor and push them to stacks if they also need to be prepared.
     /// If processor wants to be expanded, ExpandPipelineTask from thread_number's execution context will be used.
-    bool prepareProcessor(UInt64 pid, size_t thread_number, Queue & queue, std::unique_lock<std::mutex> node_lock);
+    bool prepareProcessor(UInt64 pid, size_t thread_number, Queue & queue, Queue & async_queue, std::unique_lock<std::mutex> node_lock);
     bool doExpandPipeline(ExpandPipelineTask * task, bool processing);
 
     /// Continue executor (in case there are tasks in queue).
diff --git a/src/Processors/Executors/PollingQueue.cpp b/src/Processors/Executors/PollingQueue.cpp
new file mode 100644
index 00000000000..7383824a592
--- /dev/null
+++ b/src/Processors/Executors/PollingQueue.cpp
@@ -0,0 +1,115 @@
+#include <Processors/Executors/PollingQueue.h>
+
+#if defined(OS_LINUX)
+
+#include <Common/Exception.h>
+#include <sys/epoll.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int CANNOT_OPEN_FILE;
+    extern const int CANNOT_READ_FROM_SOCKET;
+    extern const int LOGICAL_ERROR;
+}
+
+
+PollingQueue::PollingQueue()
+{
+    epoll_fd = epoll_create(1);
+    if (-1 == epoll_fd)
+        throwFromErrno("Cannot create epoll descriptor", ErrorCodes::CANNOT_OPEN_FILE);
+
+    if (-1 == pipe2(pipe_fd, O_NONBLOCK))
+        throwFromErrno("Cannot create pipe", ErrorCodes::CANNOT_OPEN_FILE);
+
+    epoll_event socket_event;
+    socket_event.events = EPOLLIN | EPOLLPRI;
+    socket_event.data.ptr = pipe_fd;
+
+    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pipe_fd[0], &socket_event))
+        throwFromErrno("Cannot add pipe descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+}
+
+PollingQueue::~PollingQueue()
+{
+    close(epoll_fd);
+    close(pipe_fd[0]);
+    close(pipe_fd[1]);
+}
+
+void PollingQueue::addTask(size_t thread_number, void * data, int fd)
+{
+    std::uintptr_t key = reinterpret_cast<uintptr_t>(data);
+    if (tasks.count(key))
+        throw Exception("Task was already added to task queue", ErrorCodes::LOGICAL_ERROR);
+
+    tasks[key] = TaskData{thread_number, data, fd};
+
+    epoll_event socket_event;
+    socket_event.events = EPOLLIN | EPOLLPRI;
+    socket_event.data.ptr = data;
+
+    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_ADD, fd, &socket_event))
+        throwFromErrno("Cannot add socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+}
+
+PollingQueue::TaskData PollingQueue::wait(std::unique_lock<std::mutex> & lock)
+{
+    if (is_finished)
+        return {};
+
+    lock.unlock();
+
+    epoll_event event;
+    event.data.ptr = nullptr;
+    int num_events = 0;
+
+    while (num_events == 0)
+    {
+        num_events = epoll_wait(epoll_fd, &event, 1, 0);
+        if (num_events == -1)
+            throwFromErrno("Failed to epoll_wait", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    }
+
+    lock.lock();
+
+    if (event.data.ptr == pipe_fd)
+        return {};
+
+    std::uintptr_t key = reinterpret_cast<uintptr_t>(event.data.ptr);
+    auto it = tasks.find(key);
+    if (it == tasks.end())
+        throw Exception("Task was not found in task queue", ErrorCodes::LOGICAL_ERROR);
+
+    auto res = it->second;
+    tasks.erase(it);
+
+    if (-1 == epoll_ctl(epoll_fd, EPOLL_CTL_DEL, res.fd, &event))
+        throwFromErrno("Cannot remove socket descriptor to epoll", ErrorCodes::CANNOT_OPEN_FILE);
+
+    return res;
+}
+
+void PollingQueue::finish()
+{
+    is_finished = true;
+    tasks.clear();
+
+    uint64_t buf = 0;
+    while (-1 == write(pipe_fd[1], &buf, sizeof(buf)))
+    {
+        if (errno == EAGAIN)
+            break;
+
+        if (errno != EINTR)
+            throwFromErrno("Cannot write to pipe", ErrorCodes::CANNOT_READ_FROM_SOCKET);
+    }
+}
+
+}
+#endif
diff --git a/src/Processors/Executors/PollingQueue.h b/src/Processors/Executors/PollingQueue.h
new file mode 100644
index 00000000000..9d37bf0a2cc
--- /dev/null
+++ b/src/Processors/Executors/PollingQueue.h
@@ -0,0 +1,60 @@
+#pragma once
+#include <cstddef>
+#include <cstdint>
+#include <mutex>
+#include <atomic>
+#include <unordered_map>
+
+namespace DB
+{
+
+#if defined(OS_LINUX)
+
+/// This queue is used to poll descriptors. Generally, just a wrapper over epoll.
+class PollingQueue
+{
+public:
+    struct TaskData
+    {
+        size_t thread_num;
+
+        void * data = nullptr;
+        int fd = -1;
+
+        explicit operator bool() const { return data; }
+    };
+
+private:
+    int epoll_fd;
+    int pipe_fd[2];
+    std::atomic_bool is_finished = false;
+    std::unordered_map<std::uintptr_t, TaskData> tasks;
+
+public:
+    PollingQueue();
+    ~PollingQueue();
+
+    size_t size() const { return tasks.size(); }
+    bool empty() const { return tasks.empty(); }
+
+    /// Add new task to queue.
+    void addTask(size_t thread_number, void * data, int fd);
+
+    /// Wait for any descriptor. If no descriptors in queue, blocks.
+    /// Returns ptr which was inserted into queue or nullptr if finished was called.
+    /// Lock is unlocked during waiting.
+    TaskData wait(std::unique_lock<std::mutex> & lock);
+
+    /// Interrupt waiting.
+    void finish();
+};
+#else
+class PollingQueue
+{
+public:
+    bool empty() { return true; }
+    void finish() {}
+};
+#endif
+
+}
diff --git a/src/Processors/Formats/IRowInputFormat.cpp b/src/Processors/Formats/IRowInputFormat.cpp
index f8a14722d19..79090ae2b89 100644
--- a/src/Processors/Formats/IRowInputFormat.cpp
+++ b/src/Processors/Formats/IRowInputFormat.cpp
@@ -53,18 +53,20 @@ Chunk IRowInputFormat::generate()
     ///auto chunk_missing_values = std::make_unique<ChunkMissingValues>();
     block_missing_values.clear();
 
+    size_t num_rows = 0;
+
     try
     {
         RowReadExtension info;
-        for (size_t rows = 0; rows < params.max_block_size; ++rows)
+        bool continue_reading = true;
+        for (size_t rows = 0; rows < params.max_block_size && continue_reading; ++rows)
         {
             try
             {
                 ++total_rows;
 
                 info.read_columns.clear();
-                if (!readRow(columns, info))
-                    break;
+                continue_reading = readRow(columns, info);
 
                 for (size_t column_idx = 0; column_idx < info.read_columns.size(); ++column_idx)
                 {
@@ -76,6 +78,18 @@ Chunk IRowInputFormat::generate()
                         block_missing_values.setBit(column_idx, column_size - 1);
                     }
                 }
+
+                /// Some formats may read row AND say the read is finished.
+                /// For such a case, get the number or rows from first column.
+                if (!columns.empty())
+                    num_rows = columns.front()->size();
+
+                if (!continue_reading)
+                    break;
+
+                /// The case when there is no columns. Just count rows.
+                if (columns.empty())
+                    ++num_rows;
             }
             catch (Exception & e)
             {
@@ -107,21 +121,37 @@ Chunk IRowInputFormat::generate()
 
                 syncAfterError();
 
-                /// Truncate all columns in block to minimal size (remove values, that was appended to only part of columns).
-
-                size_t min_size = std::numeric_limits<size_t>::max();
-                for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
-                    min_size = std::min(min_size, columns[column_idx]->size());
+                /// Truncate all columns in block to initial size (remove values, that was appended to only part of columns).
 
                 for (size_t column_idx = 0; column_idx < num_columns; ++column_idx)
                 {
                     auto & column = columns[column_idx];
-                    if (column->size() > min_size)
-                        column->popBack(column->size() - min_size);
+                    if (column->size() > num_rows)
+                        column->popBack(column->size() - num_rows);
                 }
             }
         }
     }
+    catch (ParsingException & e)
+    {
+        String verbose_diagnostic;
+        try
+        {
+            verbose_diagnostic = getDiagnosticInfo();
+        }
+        catch (const Exception & exception)
+        {
+            verbose_diagnostic = "Cannot get verbose diagnostic: " + exception.message();
+        }
+        catch (...)
+        {
+            /// Error while trying to obtain verbose diagnostic. Ok to ignore.
+        }
+
+        e.setLineNumber(total_rows);
+        e.addMessage(verbose_diagnostic);
+        throw;
+    }
     catch (Exception & e)
     {
         if (!isParseError(e.code()))
@@ -157,7 +187,6 @@ Chunk IRowInputFormat::generate()
         return {};
     }
 
-    auto num_rows = columns.front()->size();
     Chunk chunk(std::move(columns), num_rows);
     //chunk.setChunkInfo(std::move(chunk_missing_values));
     return chunk;
diff --git a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
index 64a9ce68ef7..4edef1f1365 100644
--- a/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockInputFormat.cpp
@@ -48,12 +48,12 @@ Chunk ArrowBlockInputFormat::generate()
     }
 
     if (!batch_result.ok())
-        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
+        throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
             "Error while reading batch of Arrow data: {}", batch_result.status().ToString());
 
     auto table_result = arrow::Table::FromRecordBatches({*batch_result});
     if (!table_result.ok())
-        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA,
+        throw ParsingException(ErrorCodes::CANNOT_READ_ALL_DATA,
             "Error while reading batch of Arrow data: {}", table_result.status().ToString());
 
     ++record_batch_current;
diff --git a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
index 756172a5a68..c1abdd1a759 100644
--- a/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
+++ b/src/Processors/Formats/Impl/ArrowBlockOutputFormat.cpp
@@ -62,9 +62,9 @@ void ArrowBlockOutputFormat::prepareWriter(const std::shared_ptr<arrow::Schema>
 
     // TODO: should we use arrow::ipc::IpcOptions::alignment?
     if (stream)
-        writer_status = arrow::ipc::NewStreamWriter(arrow_ostream.get(), schema);
+        writer_status = arrow::ipc::MakeStreamWriter(arrow_ostream.get(), schema);
     else
-        writer_status = arrow::ipc::NewFileWriter(arrow_ostream.get(), schema);
+        writer_status = arrow::ipc::MakeFileWriter(arrow_ostream.get(), schema);
 
     if (!writer_status.ok())
         throw Exception(ErrorCodes::UNKNOWN_EXCEPTION,
diff --git a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
index ccfe65bbba3..a8d71790f41 100644
--- a/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/AvroRowInputFormat.cpp
@@ -190,7 +190,7 @@ AvroDeserializer::DeserializeFn AvroDeserializer::createDeserializeFn(avro::Node
                 {
                     decoder.decodeString(tmp);
                     if (tmp.length() != 36)
-                        throw Exception(std::string("Cannot parse uuid ") + tmp, ErrorCodes::CANNOT_PARSE_UUID);
+                        throw ParsingException(std::string("Cannot parse uuid ") + tmp, ErrorCodes::CANNOT_PARSE_UUID);
 
                     UUID uuid;
                     parseUUID(reinterpret_cast<const UInt8 *>(tmp.data()), std::reverse_iterator<UInt8 *>(reinterpret_cast<UInt8 *>(&uuid) + 16));
diff --git a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
index 7bca5f2c5d9..8422f09e364 100644
--- a/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/CSVRowInputFormat.cpp
@@ -424,11 +424,12 @@ void registerInputFormatProcessorCSV(FormatFactory & factory)
     }
 }
 
-static bool fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+static std::pair<bool, size_t> fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {
     char * pos = in.position();
     bool quotes = false;
     bool need_more_data = true;
+    size_t number_of_rows = 0;
 
     while (loadAtPosition(in, memory, pos) && need_more_data)
     {
@@ -458,6 +459,7 @@ static bool fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory
             }
             else if (*pos == '\n')
             {
+                ++number_of_rows;
                 if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
                     need_more_data = false;
                 ++pos;
@@ -470,13 +472,16 @@ static bool fileSegmentationEngineCSVImpl(ReadBuffer & in, DB::Memory<> & memory
                     need_more_data = false;
                 ++pos;
                 if (loadAtPosition(in, memory, pos) && *pos == '\n')
+                {
                     ++pos;
+                    ++number_of_rows;
+                }
             }
         }
     }
 
     saveUpToPosition(in, memory, pos);
-    return loadAtPosition(in, memory, pos);
+    return {loadAtPosition(in, memory, pos), number_of_rows};
 }
 
 void registerFileSegmentationEngineCSV(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
index f20e764dcfd..1fc5041b1f3 100644
--- a/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONCompactEachRowRowInputFormat.cpp
@@ -171,7 +171,7 @@ bool JSONCompactEachRowRowInputFormat::readRow(DB::MutableColumns &columns, DB::
 
         skipWhitespaceIfAny(in);
         if (in.eof())
-            throw Exception("Unexpected end of stream while parsing JSONCompactEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA);
+            throw ParsingException("Unexpected end of stream while parsing JSONCompactEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA);
         if (file_column + 1 != column_indexes_for_input_fields.size())
         {
             assertChar(',', in);
diff --git a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
index 234839b41f5..8a707ae6554 100644
--- a/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/JSONEachRowRowInputFormat.cpp
@@ -173,7 +173,7 @@ inline bool JSONEachRowRowInputFormat::advanceToNextKey(size_t key_index)
     skipWhitespaceIfAny(in);
 
     if (in.eof())
-        throw Exception("Unexpected end of stream while parsing JSONEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA);
+        throw ParsingException("Unexpected end of stream while parsing JSONEachRow format", ErrorCodes::CANNOT_READ_ALL_DATA);
     else if (*in.position() == '}')
     {
         ++in.position();
diff --git a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
index a12ca09eec0..8bbf0fc089b 100644
--- a/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ORCBlockInputFormat.cpp
@@ -38,7 +38,7 @@ Chunk ORCBlockInputFormat::generate()
     std::shared_ptr<arrow::Table> table;
     arrow::Status read_status = file_reader->Read(&table);
     if (!read_status.ok())
-        throw Exception{"Error while reading ORC data: " + read_status.ToString(),
+        throw ParsingException{"Error while reading ORC data: " + read_status.ToString(),
                         ErrorCodes::CANNOT_READ_ALL_DATA};
 
     ArrowColumnToCHColumn::arrowTableToCHChunk(res, table, header, "ORC");
diff --git a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
index 031974dc357..bb55c71b7ca 100644
--- a/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
+++ b/src/Processors/Formats/Impl/ParquetBlockInputFormat.cpp
@@ -47,7 +47,7 @@ Chunk ParquetBlockInputFormat::generate()
     std::shared_ptr<arrow::Table> table;
     arrow::Status read_status = file_reader->ReadRowGroup(row_group_current, column_indices, &table);
     if (!read_status.ok())
-        throw Exception{"Error while reading Parquet data: " + read_status.ToString(),
+        throw ParsingException{"Error while reading Parquet data: " + read_status.ToString(),
                         ErrorCodes::CANNOT_READ_ALL_DATA};
 
     ++row_group_current;
diff --git a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
index c3f9d07b893..6e14a1dc3c8 100644
--- a/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/RegexpRowInputFormat.cpp
@@ -173,10 +173,11 @@ void registerInputFormatProcessorRegexp(FormatFactory & factory)
     });
 }
 
-static bool fileSegmentationEngineRegexpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+static std::pair<bool, size_t> fileSegmentationEngineRegexpImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {
     char * pos = in.position();
     bool need_more_data = true;
+    size_t number_of_rows = 0;
 
     while (loadAtPosition(in, memory, pos) && need_more_data)
     {
@@ -196,12 +197,12 @@ static bool fileSegmentationEngineRegexpImpl(ReadBuffer & in, DB::Memory<> & mem
             need_more_data = false;
 
         ++pos;
-
+        ++number_of_rows;
     }
 
     saveUpToPosition(in, memory, pos);
 
-    return loadAtPosition(in, memory, pos);
+    return {loadAtPosition(in, memory, pos), number_of_rows};
 }
 
 void registerFileSegmentationEngineRegexp(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
index abb468741c5..8d769cab346 100644
--- a/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TSKVRowInputFormat.cpp
@@ -89,7 +89,7 @@ static bool readName(ReadBuffer & buf, StringRef & ref, String & tmp)
         }
     }
 
-    throw Exception("Unexpected end of stream while reading key name from TSKV format", ErrorCodes::CANNOT_READ_ALL_DATA);
+    throw ParsingException("Unexpected end of stream while reading key name from TSKV format", ErrorCodes::CANNOT_READ_ALL_DATA);
 }
 
 
@@ -157,7 +157,7 @@ bool TSKVRowInputFormat::readRow(MutableColumns & columns, RowReadExtension & ex
 
             if (in.eof())
             {
-                throw Exception("Unexpected end of stream after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_READ_ALL_DATA);
+                throw ParsingException("Unexpected end of stream after field in TSKV format: " + name_ref.toString(), ErrorCodes::CANNOT_READ_ALL_DATA);
             }
             else if (*in.position() == '\t')
             {
diff --git a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
index 529b70e4e09..69a5e61caf2 100644
--- a/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TabSeparatedRowInputFormat.cpp
@@ -423,10 +423,11 @@ void registerInputFormatProcessorTabSeparated(FormatFactory & factory)
     }
 }
 
-static bool fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
+static std::pair<bool, size_t> fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<> & memory, size_t min_chunk_size)
 {
     bool need_more_data = true;
     char * pos = in.position();
+    size_t number_of_rows = 0;
 
     while (loadAtPosition(in, memory, pos) && need_more_data)
     {
@@ -443,6 +444,9 @@ static bool fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<>
         }
         else if (*pos == '\n' || *pos == '\r')
         {
+            if (*pos == '\n')
+                ++number_of_rows;
+
             if (memory.size() + static_cast<size_t>(pos - in.position()) >= min_chunk_size)
                 need_more_data = false;
             ++pos;
@@ -451,7 +455,7 @@ static bool fileSegmentationEngineTabSeparatedImpl(ReadBuffer & in, DB::Memory<>
 
     saveUpToPosition(in, memory, pos);
 
-    return loadAtPosition(in, memory, pos);
+    return {loadAtPosition(in, memory, pos), number_of_rows};
 }
 
 void registerFileSegmentationEngineTabSeparated(FormatFactory & factory)
diff --git a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
index ecb529a99af..6023b38e4de 100644
--- a/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
+++ b/src/Processors/Formats/Impl/TemplateRowInputFormat.cpp
@@ -489,7 +489,7 @@ void TemplateRowInputFormat::skipToNextDelimiterOrEof(const String & delimiter)
 
 void TemplateRowInputFormat::throwUnexpectedEof()
 {
-    throw Exception("Unexpected EOF while parsing row " + std::to_string(row_num) + ". "
+    throw ParsingException("Unexpected EOF while parsing row " + std::to_string(row_num) + ". "
                     "Maybe last row has wrong format or input doesn't contain specified suffix before EOF.",
                     ErrorCodes::CANNOT_READ_ALL_DATA);
 }
diff --git a/src/Processors/Formats/InputStreamFromInputFormat.h b/src/Processors/Formats/InputStreamFromInputFormat.h
index 792d2e45f7f..4369287d39e 100644
--- a/src/Processors/Formats/InputStreamFromInputFormat.h
+++ b/src/Processors/Formats/InputStreamFromInputFormat.h
@@ -56,7 +56,6 @@ protected:
 
                 case IProcessor::Status::NeedData:
                 case IProcessor::Status::Async:
-                case IProcessor::Status::Wait:
                 case IProcessor::Status::ExpandPipeline:
                     throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR);
             }
diff --git a/src/Processors/IProcessor.cpp b/src/Processors/IProcessor.cpp
index 36beeea8476..a2533ee4c8c 100644
--- a/src/Processors/IProcessor.cpp
+++ b/src/Processors/IProcessor.cpp
@@ -33,8 +33,6 @@ std::string IProcessor::statusToName(Status status)
             return "Ready";
         case Status::Async:
             return "Async";
-        case Status::Wait:
-            return "Wait";
         case Status::ExpandPipeline:
             return "ExpandPipeline";
     }
diff --git a/src/Processors/IProcessor.h b/src/Processors/IProcessor.h
index c774b43a9b2..c3abe40c3b7 100644
--- a/src/Processors/IProcessor.h
+++ b/src/Processors/IProcessor.h
@@ -146,13 +146,10 @@ public:
         /// You may call 'work' method and processor will do some work synchronously.
         Ready,
 
-        /// You may call 'schedule' method and processor will initiate some background work.
+        /// You may call 'schedule' method and processor will return descriptor.
+        /// You need to poll this descriptor and call work() afterwards.
         Async,
 
-        /// Processor is doing some work in background.
-        /// You may wait for next event or do something else and then you should call 'prepare' again.
-        Wait,
-
         /// Processor wants to add other processors to pipeline.
         /// New processors must be obtained by expandPipeline() call.
         ExpandPipeline,
@@ -198,16 +195,21 @@ public:
         throw Exception("Method 'work' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED);
     }
 
-    /** You may call this method if 'prepare' returned Async.
+    /** Executor must call this method when 'prepare' returned Async.
       * This method cannot access any ports. It should use only data that was prepared by 'prepare' method.
       *
-      * This method should return instantly and fire an event (or many events) when asynchronous job will be done.
-      * When the job is not done, method 'prepare' will return Wait and the user may block and wait for next event before checking again.
+      * This method should instantly return epollable file descriptor which will be readable when asynchronous job is done.
+      * When descriptor is readable, method `work` is called to continue data processing.
       *
-      * Note that it can fire many events in EventCounter while doing its job,
-      *  and you have to wait for next event (or do something else) every time when 'prepare' returned Wait.
+      * NOTE: it would be more logical to let `work()` return ASYNC status instead of prepare. This will get
+      * prepare() -> work() -> schedule() -> work() -> schedule() -> .. -> work() -> prepare()
+      * chain instead of
+      * prepare() -> work() -> prepare() -> schedule() -> work() -> prepare() -> schedule() -> .. -> work() -> prepare()
+      *
+      * It is expected that executor epoll using level-triggered notifications.
+      * Read all available data from descriptor before returning ASYNC.
       */
-    virtual void schedule(EventCounter & /*watch*/)
+    virtual int schedule()
     {
         throw Exception("Method 'schedule' is not implemented for " + getName() + " processor", ErrorCodes::NOT_IMPLEMENTED);
     }
diff --git a/src/Processors/ISource.cpp b/src/Processors/ISource.cpp
index 90f3962b83e..7ae988f7cdb 100644
--- a/src/Processors/ISource.cpp
+++ b/src/Processors/ISource.cpp
@@ -4,6 +4,11 @@
 namespace DB
 {
 
+namespace ErrorCodes
+{
+    extern const int NOT_IMPLEMENTED;
+}
+
 ISource::ISource(Block header)
     : IProcessor({}, {std::move(header)}), output(outputs.front())
 {
@@ -45,11 +50,17 @@ void ISource::work()
 {
     try
     {
-        current_chunk.chunk = generate();
-        if (!current_chunk.chunk || isCancelled())
-            finished = true;
+        if (auto chunk = tryGenerate())
+        {
+            current_chunk.chunk = std::move(*chunk);
+            if (current_chunk.chunk)
+                has_input = true;
+        }
         else
-            has_input = true;
+            finished = true;
+
+        if (isCancelled())
+            finished = true;
     }
     catch (...)
     {
@@ -58,5 +69,19 @@ void ISource::work()
     }
 }
 
+Chunk ISource::generate()
+{
+    throw Exception(ErrorCodes::NOT_IMPLEMENTED, "generate is not implemented for {}", getName());
+}
+
+std::optional<Chunk> ISource::tryGenerate()
+{
+    auto chunk = generate();
+    if (!chunk)
+        return {};
+
+    return chunk;
+}
+
 }
 
diff --git a/src/Processors/ISource.h b/src/Processors/ISource.h
index 9be21c3a398..b7e2b5dce8e 100644
--- a/src/Processors/ISource.h
+++ b/src/Processors/ISource.h
@@ -15,7 +15,8 @@ protected:
     bool got_exception = false;
     Port::Data current_chunk;
 
-    virtual Chunk generate() = 0;
+    virtual Chunk generate();
+    virtual std::optional<Chunk> tryGenerate();
 
 public:
     ISource(Block header);
diff --git a/src/Processors/Pipe.cpp b/src/Processors/Pipe.cpp
index 12c1f628e8d..e8943790e68 100644
--- a/src/Processors/Pipe.cpp
+++ b/src/Processors/Pipe.cpp
@@ -28,7 +28,7 @@ static void checkSource(const IProcessor & source)
                         ErrorCodes::LOGICAL_ERROR);
 
     if (source.getOutputs().size() > 1)
-        throw Exception("Source for pipe should have single or two outputs, but " + source.getName() + " has " +
+        throw Exception("Source for pipe should have single output, but " + source.getName() + " has " +
                         toString(source.getOutputs().size()) + " outputs.", ErrorCodes::LOGICAL_ERROR);
 }
 
diff --git a/src/Processors/QueryPlan/WindowStep.cpp b/src/Processors/QueryPlan/WindowStep.cpp
new file mode 100644
index 00000000000..82c589b8b20
--- /dev/null
+++ b/src/Processors/QueryPlan/WindowStep.cpp
@@ -0,0 +1,111 @@
+#include <Processors/QueryPlan/WindowStep.h>
+
+#include <Processors/Transforms/WindowTransform.h>
+#include <Processors/Transforms/ExpressionTransform.h>
+#include <Processors/QueryPipeline.h>
+#include <Interpreters/ExpressionActions.h>
+#include <IO/Operators.h>
+
+namespace DB
+{
+
+static ITransformingStep::Traits getTraits()
+{
+    return ITransformingStep::Traits
+    {
+        {
+            .preserves_distinct_columns = true,
+            .returns_single_stream = false,
+            .preserves_number_of_streams = true,
+            .preserves_sorting = true,
+        },
+        {
+            .preserves_number_of_rows = true
+        }
+    };
+}
+
+static Block addWindowFunctionResultColumns(const Block & block,
+    std::vector<WindowFunctionDescription> window_functions)
+{
+    auto result = block;
+
+    for (const auto & f : window_functions)
+    {
+        ColumnWithTypeAndName column_with_type;
+        column_with_type.name = f.column_name;
+        column_with_type.type = f.aggregate_function->getReturnType();
+        column_with_type.column = column_with_type.type->createColumn();
+
+        result.insert(column_with_type);
+    }
+
+    return result;
+}
+
+WindowStep::WindowStep(const DataStream & input_stream_,
+        const WindowDescription & window_description_,
+        const std::vector<WindowFunctionDescription> & window_functions_)
+    : ITransformingStep(
+        input_stream_,
+            addWindowFunctionResultColumns(input_stream_.header,
+                window_functions_),
+        getTraits())
+    , window_description(window_description_)
+    , window_functions(window_functions_)
+    , input_header(input_stream_.header)
+{
+    // We don't remove any columns, only add, so probably we don't have to update
+    // the output DataStream::distinct_columns.
+}
+
+void WindowStep::transformPipeline(QueryPipeline & pipeline)
+{
+    pipeline.addSimpleTransform([&](const Block & /*header*/)
+    {
+        return std::make_shared<WindowTransform>(input_header,
+            output_stream->header, window_description, window_functions);
+    });
+
+    assertBlocksHaveEqualStructure(pipeline.getHeader(), output_stream->header,
+        "WindowStep transform for '" + window_description.window_name + "'");
+}
+
+void WindowStep::describeActions(FormatSettings & settings) const
+{
+    String prefix(settings.offset, ' ');
+    settings.out << prefix << "Window: (";
+    if (!window_description.partition_by.empty())
+    {
+        settings.out << "PARTITION BY ";
+        for (size_t i = 0; i < window_description.partition_by.size(); ++i)
+        {
+            if (i > 0)
+            {
+                settings.out << ", ";
+            }
+
+            settings.out << window_description.partition_by[i].column_name;
+        }
+    }
+    if (!window_description.partition_by.empty()
+        && !window_description.order_by.empty())
+    {
+        settings.out << " ";
+    }
+    if (!window_description.order_by.empty())
+    {
+        settings.out << "ORDER BY "
+            << dumpSortDescription(window_description.order_by);
+    }
+    settings.out << ")\n";
+
+    for (size_t i = 0; i < window_functions.size(); ++i)
+    {
+        settings.out << prefix << (i == 0 ? "Functions: "
+                                          : "           ");
+        settings.out << window_functions[i].column_name << "\n";
+    }
+}
+
+}
diff --git a/src/Processors/QueryPlan/WindowStep.h b/src/Processors/QueryPlan/WindowStep.h
new file mode 100644
index 00000000000..069d02c655c
--- /dev/null
+++ b/src/Processors/QueryPlan/WindowStep.h
@@ -0,0 +1,33 @@
+#pragma once
+#include <Processors/QueryPlan/ITransformingStep.h>
+
+#include <Interpreters/AggregateDescription.h>
+
+namespace DB
+{
+
+class ActionsDAG;
+using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
+
+class WindowTransform;
+
+class WindowStep : public ITransformingStep
+{
+public:
+    explicit WindowStep(const DataStream & input_stream_,
+            const WindowDescription & window_description_,
+            const std::vector<WindowFunctionDescription> & window_functions_);
+
+    String getName() const override { return "Window"; }
+
+    void transformPipeline(QueryPipeline & pipeline) override;
+
+    void describeActions(FormatSettings & settings) const override;
+
+private:
+    WindowDescription window_description;
+    std::vector<WindowFunctionDescription> window_functions;
+    Block input_header;
+};
+
+}
diff --git a/src/Processors/Sources/RemoteSource.cpp b/src/Processors/Sources/RemoteSource.cpp
index 2efbf3ed7c2..bf3ef32214d 100644
--- a/src/Processors/Sources/RemoteSource.cpp
+++ b/src/Processors/Sources/RemoteSource.cpp
@@ -1,14 +1,16 @@
 #include <Processors/Sources/RemoteSource.h>
 #include <DataStreams/RemoteQueryExecutor.h>
+#include <DataStreams/RemoteQueryExecutorReadContext.h>
 #include <Processors/Transforms/AggregatingTransform.h>
 #include <DataTypes/DataTypeAggregateFunction.h>
 
 namespace DB
 {
 
-RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_)
+RemoteSource::RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_, bool async_read_)
     : SourceWithProgress(executor->getHeader(), false)
     , add_aggregation_info(add_aggregation_info_), query_executor(std::move(executor))
+    , async_read(async_read_)
 {
     /// Add AggregatedChunkInfo if we expect DataTypeAggregateFunction as a result.
     const auto & sample = getPort().getHeader();
@@ -21,15 +23,28 @@ RemoteSource::~RemoteSource() = default;
 
 ISource::Status RemoteSource::prepare()
 {
+    /// Check if query was cancelled before returning Async status. Otherwise it may lead to infinite loop.
+    if (was_query_canceled)
+    {
+        getPort().finish();
+        return Status::Finished;
+    }
+
+    if (is_async_state)
+        return Status::Async;
+
     Status status = SourceWithProgress::prepare();
     /// To avoid resetting the connection (because of "unfinished" query) in the
     /// RemoteQueryExecutor it should be finished explicitly.
     if (status == Status::Finished)
-        query_executor->finish();
+    {
+        query_executor->finish(&read_context);
+        is_async_state = false;
+    }
     return status;
 }
 
-Chunk RemoteSource::generate()
+std::optional<Chunk> RemoteSource::tryGenerate()
 {
     /// onCancel() will do the cancel if the query was sent.
     if (was_query_canceled)
@@ -52,11 +67,28 @@ Chunk RemoteSource::generate()
         was_query_sent = true;
     }
 
-    auto block = query_executor->read();
+    Block block;
+
+    if (async_read)
+    {
+        auto res = query_executor->read(read_context);
+        if (std::holds_alternative<int>(res))
+        {
+            fd = std::get<int>(res);
+            is_async_state = true;
+            return Chunk();
+        }
+
+        is_async_state = false;
+
+        block = std::get<Block>(std::move(res));
+    }
+    else
+        block = query_executor->read();
 
     if (!block)
     {
-        query_executor->finish();
+        query_executor->finish(&read_context);
         return {};
     }
 
@@ -77,7 +109,18 @@ Chunk RemoteSource::generate()
 void RemoteSource::onCancel()
 {
     was_query_canceled = true;
-    query_executor->cancel();
+    query_executor->cancel(&read_context);
+    // is_async_state = false;
+}
+
+void RemoteSource::onUpdatePorts()
+{
+    if (getPort().isFinished())
+    {
+        was_query_canceled = true;
+        query_executor->finish(&read_context);
+        // is_async_state = false;
+    }
 }
 
 
@@ -123,9 +166,9 @@ Chunk RemoteExtremesSource::generate()
 
 Pipe createRemoteSourcePipe(
     RemoteQueryExecutorPtr query_executor,
-    bool add_aggregation_info, bool add_totals, bool add_extremes)
+    bool add_aggregation_info, bool add_totals, bool add_extremes, bool async_read)
 {
-    Pipe pipe(std::make_shared<RemoteSource>(query_executor, add_aggregation_info));
+    Pipe pipe(std::make_shared<RemoteSource>(query_executor, add_aggregation_info, async_read));
 
     if (add_totals)
         pipe.addTotalsSource(std::make_shared<RemoteTotalsSource>(query_executor));
diff --git a/src/Processors/Sources/RemoteSource.h b/src/Processors/Sources/RemoteSource.h
index 7b537023306..2109cb5eba4 100644
--- a/src/Processors/Sources/RemoteSource.h
+++ b/src/Processors/Sources/RemoteSource.h
@@ -11,6 +11,8 @@ namespace DB
 class RemoteQueryExecutor;
 using RemoteQueryExecutorPtr = std::shared_ptr<RemoteQueryExecutor>;
 
+class RemoteQueryExecutorReadContext;
+
 /// Source from RemoteQueryExecutor. Executes remote query and returns query result chunks.
 class RemoteSource : public SourceWithProgress
 {
@@ -18,7 +20,7 @@ public:
     /// Flag add_aggregation_info tells if AggregatedChunkInfo should be added to result chunk.
     /// AggregatedChunkInfo stores the bucket number used for two-level aggregation.
     /// This flag should be typically enabled for queries with GROUP BY which are executed till WithMergeableState.
-    RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_);
+    RemoteSource(RemoteQueryExecutorPtr executor, bool add_aggregation_info_, bool async_read_);
     ~RemoteSource() override;
 
     Status prepare() override;
@@ -27,14 +29,12 @@ public:
     void setRowsBeforeLimitCounter(RowsBeforeLimitCounterPtr counter) { rows_before_limit.swap(counter); }
 
     /// Stop reading from stream if output port is finished.
-    void onUpdatePorts() override
-    {
-        if (getPort().isFinished())
-            cancel();
-    }
+    void onUpdatePorts() override;
+
+    int schedule() override { return fd; }
 
 protected:
-    Chunk generate() override;
+    std::optional<Chunk> tryGenerate() override;
     void onCancel() override;
 
 private:
@@ -43,6 +43,11 @@ private:
     bool add_aggregation_info = false;
     RemoteQueryExecutorPtr query_executor;
     RowsBeforeLimitCounterPtr rows_before_limit;
+
+    const bool async_read;
+    bool is_async_state = false;
+    std::unique_ptr<RemoteQueryExecutorReadContext> read_context;
+    int fd = -1;
 };
 
 /// Totals source from RemoteQueryExecutor.
@@ -80,6 +85,6 @@ private:
 /// Create pipe with remote sources.
 Pipe createRemoteSourcePipe(
     RemoteQueryExecutorPtr query_executor,
-    bool add_aggregation_info, bool add_totals, bool add_extremes);
+    bool add_aggregation_info, bool add_totals, bool add_extremes, bool async_read);
 
 }
diff --git a/src/Processors/Transforms/WindowTransform.cpp b/src/Processors/Transforms/WindowTransform.cpp
new file mode 100644
index 00000000000..6e8b0ea8e39
--- /dev/null
+++ b/src/Processors/Transforms/WindowTransform.cpp
@@ -0,0 +1,184 @@
+#include <Processors/Transforms/WindowTransform.h>
+
+#include <Interpreters/ExpressionActions.h>
+
+#include <Common/Arena.h>
+
+namespace DB
+{
+
+WindowTransform::WindowTransform(const Block & input_header_,
+        const Block & output_header_,
+        const WindowDescription & window_description_,
+        const std::vector<WindowFunctionDescription> & window_function_descriptions
+        )
+    : ISimpleTransform(input_header_, output_header_,
+        false /* skip_empty_chunks */)
+    , input_header(input_header_)
+    , window_description(window_description_)
+{
+    workspaces.reserve(window_function_descriptions.size());
+    for (const auto & f : window_function_descriptions)
+    {
+        WindowFunctionWorkspace workspace;
+        workspace.window_function = f;
+
+        const auto & aggregate_function
+            = workspace.window_function.aggregate_function;
+        if (!arena && aggregate_function->allocatesMemoryInArena())
+        {
+            arena = std::make_unique<Arena>();
+        }
+
+        workspace.argument_column_indices.reserve(
+            workspace.window_function.argument_names.size());
+        workspace.argument_columns.reserve(
+            workspace.window_function.argument_names.size());
+        for (const auto & argument_name : workspace.window_function.argument_names)
+        {
+            workspace.argument_column_indices.push_back(
+                input_header.getPositionByName(argument_name));
+        }
+
+        workspace.aggregate_function_state.reset(aggregate_function->sizeOfData(),
+            aggregate_function->alignOfData());
+        aggregate_function->create(workspace.aggregate_function_state.data());
+
+        workspaces.push_back(std::move(workspace));
+    }
+
+    partition_by_indices.reserve(window_description.partition_by.size());
+    for (const auto & column : window_description.partition_by)
+    {
+        partition_by_indices.push_back(
+            input_header.getPositionByName(column.column_name));
+    }
+    partition_start_columns.resize(partition_by_indices.size(), nullptr);
+    partition_start_row = 0;
+}
+
+WindowTransform::~WindowTransform()
+{
+    // Some states may be not created yet if the creation failed.
+    for (auto & ws : workspaces)
+    {
+        ws.window_function.aggregate_function->destroy(
+            ws.aggregate_function_state.data());
+    }
+}
+
+void WindowTransform::transform(Chunk & chunk)
+{
+    const size_t num_rows = chunk.getNumRows();
+    auto columns = chunk.detachColumns();
+
+    for (auto & ws : workspaces)
+    {
+        ws.argument_columns.clear();
+        for (const auto column_index : ws.argument_column_indices)
+        {
+            ws.argument_columns.push_back(columns[column_index].get());
+        }
+
+        ws.result_column = ws.window_function.aggregate_function->getReturnType()
+            ->createColumn();
+    }
+
+    // We loop for all window functions for each row. Switching the loops might
+    // be more efficient, because we would run less code and access less data in
+    // the inner loop. If you change this, don't forget to fix the calculation of
+    // partition boundaries. Probably it has to be precalculated and stored as
+    // an array of offsets. An interesting optimization would be to pass it as
+    // an extra column from the previous sorting step -- that step might need to
+    // make similar comparison anyway, if it's sorting only by the PARTITION BY
+    // columns.
+    for (size_t row = 0; row < num_rows; row++)
+    {
+        // Check whether the new partition has started. We have to reset the
+        // aggregate functions when the new partition starts.
+        assert(partition_start_columns.size() == partition_by_indices.size());
+        bool new_partition = false;
+        if (partition_start_columns.empty())
+        {
+            // No PARTITION BY at all, do nothing.
+        }
+        else if (partition_start_columns[0] == nullptr)
+        {
+            // This is the first partition.
+            new_partition = true;
+            partition_start_columns.clear();
+            for (const auto i : partition_by_indices)
+            {
+                partition_start_columns.push_back(columns[i]);
+            }
+            partition_start_row = row;
+        }
+        else
+        {
+            // Check whether the new partition started, by comparing all the
+            // PARTITION BY columns.
+            size_t first_inequal_column = 0;
+            for (; first_inequal_column < partition_start_columns.size();
+                  ++first_inequal_column)
+            {
+                const auto * current_column = columns[
+                    partition_by_indices[first_inequal_column]].get();
+
+                if (current_column->compareAt(row, partition_start_row,
+                    *partition_start_columns[first_inequal_column],
+                    1 /* nan_direction_hint */) != 0)
+                {
+                    break;
+                }
+            }
+
+            if (first_inequal_column < partition_start_columns.size())
+            {
+                // The new partition has started. Remember where.
+                new_partition = true;
+                partition_start_columns.clear();
+                for (const auto i : partition_by_indices)
+                {
+                    partition_start_columns.push_back(columns[i]);
+                }
+                partition_start_row = row;
+            }
+        }
+
+        for (auto & ws : workspaces)
+        {
+            const auto & f = ws.window_function;
+            const auto * a = f.aggregate_function.get();
+            auto * buf = ws.aggregate_function_state.data();
+
+            if (new_partition)
+            {
+                // Reset the aggregate function states.
+                a->destroy(buf);
+                a->create(buf);
+            }
+
+            // Update the aggregate function state and save the result.
+            a->add(buf,
+                ws.argument_columns.data(),
+                row,
+                arena.get());
+
+            a->insertResultInto(buf,
+                *ws.result_column,
+                arena.get());
+        }
+    }
+
+    // We have to release the mutable reference to the result column before we
+    // return this block, or else extra copying may occur when the subsequent
+    // processors modify the block. Workspaces live longer than individual blocks.
+    for (auto & ws : workspaces)
+    {
+        columns.push_back(std::move(ws.result_column));
+    }
+
+    chunk.setColumns(std::move(columns), num_rows);
+}
+
+}
diff --git a/src/Processors/Transforms/WindowTransform.h b/src/Processors/Transforms/WindowTransform.h
new file mode 100644
index 00000000000..3d284263171
--- /dev/null
+++ b/src/Processors/Transforms/WindowTransform.h
@@ -0,0 +1,77 @@
+#pragma once
+#include <Processors/ISimpleTransform.h>
+
+#include <Interpreters/AggregateDescription.h>
+
+#include <Common/AlignedBuffer.h>
+
+namespace DB
+{
+
+class ExpressionActions;
+using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
+
+class Arena;
+
+// Runtime data for computing one window function
+struct WindowFunctionWorkspace
+{
+    WindowFunctionDescription window_function;
+    AlignedBuffer aggregate_function_state;
+    std::vector<size_t> argument_column_indices;
+
+    // Argument and result columns. Be careful, they are per-chunk.
+    std::vector<const IColumn *> argument_columns;
+    MutableColumnPtr result_column;
+};
+
+/*
+ * Computes several window functions that share the same window. The input must
+ * be sorted correctly for this window (PARTITION BY, then ORDER BY).
+ */
+class WindowTransform : public ISimpleTransform
+{
+public:
+    WindowTransform(
+            const Block & input_header_,
+            const Block & output_header_,
+            const WindowDescription & window_description_,
+            const std::vector<WindowFunctionDescription> &
+                window_function_descriptions);
+
+    ~WindowTransform() override;
+
+    String getName() const override
+    {
+        return "WindowTransform";
+    }
+
+    static Block transformHeader(Block header, const ExpressionActionsPtr & expression);
+
+    void transform(Chunk & chunk) override;
+
+public:
+    Block input_header;
+
+    WindowDescription window_description;
+
+    // Indices of the PARTITION BY columns in block.
+    std::vector<size_t> partition_by_indices;
+
+    // The columns for PARTITION BY and the row in these columns where the
+    // current partition started. They might be in some of the previous blocks,
+    // so we have to keep the shared ownership of the columns. We don't keep the
+    // entire block to save memory, only the needed columns, in the same order
+    // as the partition_by_indices array.
+    // Can be empty if there is no PARTITION BY.
+    // Columns are nullptr when it is the first partition.
+    std::vector<ColumnPtr> partition_start_columns;
+    size_t partition_start_row = 0;
+
+    // Data for computing the window functions.
+    std::vector<WindowFunctionWorkspace> workspaces;
+
+    std::unique_ptr<Arena> arena;
+};
+
+}
diff --git a/src/Processors/ya.make b/src/Processors/ya.make
index eacc0c44d24..263c24ff35c 100644
--- a/src/Processors/ya.make
+++ b/src/Processors/ya.make
@@ -17,6 +17,7 @@ SRCS(
     Executors/ExecutingGraph.cpp
     Executors/PipelineExecutingBlockInputStream.cpp
     Executors/PipelineExecutor.cpp
+    Executors/PollingQueue.cpp
     Executors/PullingAsyncPipelineExecutor.cpp
     Executors/PullingPipelineExecutor.cpp
     ForkProcessor.cpp
@@ -123,6 +124,7 @@ SRCS(
     QueryPlan/SettingQuotaAndLimitsStep.cpp
     QueryPlan/TotalsHavingStep.cpp
     QueryPlan/UnionStep.cpp
+    QueryPlan/WindowStep.cpp
     ResizeProcessor.cpp
     Sources/DelayedSource.cpp
     Sources/RemoteSource.cpp
@@ -155,6 +157,7 @@ SRCS(
     Transforms/RollupTransform.cpp
     Transforms/SortingTransform.cpp
     Transforms/TotalsHavingTransform.cpp
+    Transforms/WindowTransform.cpp
     printPipeline.cpp
 
 )
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.cpp b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
index d24967256a0..040f33ea02e 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.cpp
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.cpp
@@ -138,11 +138,22 @@ void DistributedBlockOutputStream::write(const Block & block)
 
 void DistributedBlockOutputStream::writeAsync(const Block & block)
 {
-    if (storage.getShardingKeyExpr() && (cluster->getShardsInfo().size() > 1))
-        return writeSplitAsync(block);
+    const Settings & settings = context.getSettingsRef();
+    bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key;
 
-    writeAsyncImpl(block);
-    ++inserted_blocks;
+    if (random_shard_insert)
+    {
+        writeAsyncImpl(block, storage.getRandomShardIndex(cluster->getShardsInfo()));
+    }
+    else
+    {
+
+        if (storage.getShardingKeyExpr() && (cluster->getShardsInfo().size() > 1))
+            return writeSplitAsync(block);
+
+        writeAsyncImpl(block);
+        ++inserted_blocks;
+    }
 }
 
 
@@ -175,18 +186,18 @@ std::string DistributedBlockOutputStream::getCurrentStateDescription()
 }
 
 
-void DistributedBlockOutputStream::initWritingJobs(const Block & first_block)
+void DistributedBlockOutputStream::initWritingJobs(const Block & first_block, size_t start, size_t end)
 {
     const Settings & settings = context.getSettingsRef();
     const auto & addresses_with_failovers = cluster->getShardsAddresses();
     const auto & shards_info = cluster->getShardsInfo();
-    size_t num_shards = shards_info.size();
+    size_t num_shards = end - start;
 
     remote_jobs_count = 0;
     local_jobs_count = 0;
     per_shard_jobs.resize(shards_info.size());
 
-    for (size_t shard_index : ext::range(0, shards_info.size()))
+    for (size_t shard_index : ext::range(start, end))
     {
         const auto & shard_info = shards_info[shard_index];
         auto & shard_jobs = per_shard_jobs[shard_index];
@@ -242,10 +253,11 @@ void DistributedBlockOutputStream::waitForJobs()
 }
 
 
-ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobReplica & job, const Block & current_block)
+ThreadPool::Job
+DistributedBlockOutputStream::runWritingJob(DistributedBlockOutputStream::JobReplica & job, const Block & current_block, size_t num_shards)
 {
     auto thread_group = CurrentThread::getGroup();
-    return [this, thread_group, &job, &current_block]()
+    return [this, thread_group, &job, &current_block, num_shards]()
     {
         if (thread_group)
             CurrentThread::attachToIfDetached(thread_group);
@@ -262,7 +274,6 @@ ThreadPool::Job DistributedBlockOutputStream::runWritingJob(DistributedBlockOutp
         });
 
         const auto & shard_info = cluster->getShardsInfo()[job.shard_index];
-        size_t num_shards = cluster->getShardsInfo().size();
         auto & shard_job = per_shard_jobs[job.shard_index];
         const auto & addresses = cluster->getShardsAddresses();
 
@@ -356,12 +367,19 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
 {
     const Settings & settings = context.getSettingsRef();
     const auto & shards_info = cluster->getShardsInfo();
-    size_t num_shards = shards_info.size();
+    bool random_shard_insert = settings.insert_distributed_one_random_shard && !storage.has_sharding_key;
+    size_t start = 0, end = shards_info.size();
+    if (random_shard_insert)
+    {
+        start = storage.getRandomShardIndex(shards_info);
+        end = start + 1;
+    }
+    size_t num_shards = end - start;
 
     if (!pool)
     {
         /// Deferred initialization. Only for sync insertion.
-        initWritingJobs(block);
+        initWritingJobs(block, start, end);
 
         pool.emplace(remote_jobs_count + local_jobs_count);
 
@@ -394,7 +412,7 @@ void DistributedBlockOutputStream::writeSync(const Block & block)
         finished_jobs_count = 0;
         for (size_t shard_index : ext::range(0, shards_info.size()))
             for (JobReplica & job : per_shard_jobs[shard_index].replicas_jobs)
-                pool->scheduleOrThrowOnError(runWritingJob(job, block));
+                pool->scheduleOrThrowOnError(runWritingJob(job, block, num_shards));
     }
     catch (...)
     {
diff --git a/src/Storages/Distributed/DistributedBlockOutputStream.h b/src/Storages/Distributed/DistributedBlockOutputStream.h
index 872776f0867..ef37776893a 100644
--- a/src/Storages/Distributed/DistributedBlockOutputStream.h
+++ b/src/Storages/Distributed/DistributedBlockOutputStream.h
@@ -73,10 +73,10 @@ private:
     /// Performs synchronous insertion to remote nodes. If timeout_exceeded flag was set, throws.
     void writeSync(const Block & block);
 
-    void initWritingJobs(const Block & first_block);
+    void initWritingJobs(const Block & first_block, size_t start, size_t end);
 
     struct JobReplica;
-    ThreadPool::Job runWritingJob(JobReplica & job, const Block & current_block);
+    ThreadPool::Job runWritingJob(DistributedBlockOutputStream::JobReplica & job, const Block & current_block, size_t num_shards);
 
     void waitForJobs();
 
diff --git a/src/Storages/IStorage.cpp b/src/Storages/IStorage.cpp
index 94f37254b91..5f500518516 100644
--- a/src/Storages/IStorage.cpp
+++ b/src/Storages/IStorage.cpp
@@ -3,21 +3,22 @@
 #include <sparsehash/dense_hash_map>
 #include <sparsehash/dense_hash_set>
 
-#include <Storages/AlterCommands.h>
+#include <Common/StringUtils/StringUtils.h>
+#include <Common/quoteString.h>
+#include <IO/Operators.h>
+#include <IO/WriteBufferFromString.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/ExpressionActions.h>
+#include <Interpreters/InterpreterSelectQuery.h>
 #include <Parsers/ASTCreateQuery.h>
 #include <Parsers/ASTSetQuery.h>
 #include <Processors/Pipe.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
-#include <Interpreters/Context.h>
-#include <Common/StringUtils/StringUtils.h>
-#include <Common/quoteString.h>
-#include <Interpreters/ExpressionActions.h>
-#include <Interpreters/InterpreterSelectQuery.h>
+#include <Storages/AlterCommands.h>
 
 
 namespace DB
 {
-
 namespace ErrorCodes
 {
     extern const int TABLE_IS_DROPPED;
@@ -32,17 +33,18 @@ bool IStorage::isVirtualColumn(const String & column_name, const StorageMetadata
 }
 
 RWLockImpl::LockHolder IStorage::tryLockTimed(
-        const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const
+    const RWLock & rwlock, RWLockImpl::Type type, const String & query_id, const std::chrono::milliseconds & acquire_timeout) const
 {
     auto lock_holder = rwlock->getLock(type, query_id, acquire_timeout);
     if (!lock_holder)
     {
         const String type_str = type == RWLockImpl::Type::Read ? "READ" : "WRITE";
         throw Exception(
-                type_str + " locking attempt on \"" + getStorageID().getFullTableName() +
-                "\" has timed out! (" + std::to_string(acquire_timeout.count()) + "ms) "
-                "Possible deadlock avoided. Client should retry.",
-                ErrorCodes::DEADLOCK_AVOIDED);
+            type_str + " locking attempt on \"" + getStorageID().getFullTableName() + "\" has timed out! ("
+                + std::to_string(acquire_timeout.count())
+                + "ms) "
+                  "Possible deadlock avoided. Client should retry.",
+            ErrorCodes::DEADLOCK_AVOIDED);
     }
     return lock_holder;
 }
@@ -82,26 +84,26 @@ TableExclusiveLockHolder IStorage::lockExclusively(const String & query_id, cons
 }
 
 Pipe IStorage::read(
-        const Names & /*column_names*/,
-        const StorageMetadataPtr & /*metadata_snapshot*/,
-        SelectQueryInfo & /*query_info*/,
-        const Context & /*context*/,
-        QueryProcessingStage::Enum /*processed_stage*/,
-        size_t /*max_block_size*/,
-        unsigned /*num_streams*/)
+    const Names & /*column_names*/,
+    const StorageMetadataPtr & /*metadata_snapshot*/,
+    SelectQueryInfo & /*query_info*/,
+    const Context & /*context*/,
+    QueryProcessingStage::Enum /*processed_stage*/,
+    size_t /*max_block_size*/,
+    unsigned /*num_streams*/)
 {
     throw Exception("Method read is not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
 }
 
 void IStorage::read(
-        QueryPlan & query_plan,
-        const Names & column_names,
-        const StorageMetadataPtr & metadata_snapshot,
-        SelectQueryInfo & query_info,
-        const Context & context,
-        QueryProcessingStage::Enum processed_stage,
-        size_t max_block_size,
-        unsigned num_streams)
+    QueryPlan & query_plan,
+    const Names & column_names,
+    const StorageMetadataPtr & metadata_snapshot,
+    SelectQueryInfo & query_info,
+    const Context & context,
+    QueryProcessingStage::Enum processed_stage,
+    size_t max_block_size,
+    unsigned num_streams)
 {
     auto pipe = read(column_names, metadata_snapshot, query_info, context, processed_stage, max_block_size, num_streams);
     if (pipe.empty())
@@ -117,15 +119,12 @@ void IStorage::read(
 }
 
 Pipe IStorage::alterPartition(
-    const StorageMetadataPtr & /* metadata_snapshot */,
-    const PartitionCommands & /* commands */,
-    const Context & /* context */)
+    const StorageMetadataPtr & /* metadata_snapshot */, const PartitionCommands & /* commands */, const Context & /* context */)
 {
     throw Exception("Partition operations are not supported by storage " + getName(), ErrorCodes::NOT_IMPLEMENTED);
 }
 
-void IStorage::alter(
-    const AlterCommands & params, const Context & context, TableLockHolder &)
+void IStorage::alter(const AlterCommands & params, const Context & context, TableLockHolder &)
 {
     auto table_id = getStorageID();
     StorageInMemoryMetadata new_metadata = getInMemoryMetadata();
@@ -146,7 +145,8 @@ void IStorage::checkAlterIsPossible(const AlterCommands & commands, const Settin
     }
 }
 
-void IStorage::checkAlterPartitionIsPossible(const PartitionCommands & /*commands*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & /*settings*/) const
+void IStorage::checkAlterPartitionIsPossible(
+    const PartitionCommands & /*commands*/, const StorageMetadataPtr & /*metadata_snapshot*/, const Settings & /*settings*/) const
 {
     throw Exception("Table engine " + getName() + " doesn't support partitioning", ErrorCodes::NOT_IMPLEMENTED);
 }
@@ -168,4 +168,52 @@ NamesAndTypesList IStorage::getVirtuals() const
     return {};
 }
 
+Names IStorage::getAllRegisteredNames() const
+{
+    Names result;
+    auto getter = [](const auto & column) { return column.name; };
+    const NamesAndTypesList & available_columns = getInMemoryMetadata().getColumns().getAllPhysical();
+    std::transform(available_columns.begin(), available_columns.end(), std::back_inserter(result), getter);
+    return result;
+}
+
+std::string PrewhereDAGInfo::dump() const
+{
+    WriteBufferFromOwnString ss;
+    ss << "PrewhereDagInfo\n";
+
+    if (alias_actions)
+    {
+        ss << "alias_actions " << alias_actions->dumpDAG() << "\n";
+    }
+
+    if (prewhere_actions)
+    {
+        ss << "prewhere_actions " << prewhere_actions->dumpDAG() << "\n";
+    }
+
+    if (remove_columns_actions)
+    {
+        ss << "remove_columns_actions " << remove_columns_actions->dumpDAG() << "\n";
+    }
+
+    ss << "remove_prewhere_column " << remove_prewhere_column
+       << ", need_filter " << need_filter << "\n";
+
+    return ss.str();
+}
+
+std::string FilterInfo::dump() const
+{
+    WriteBufferFromOwnString ss;
+    ss << "FilterInfo for column '" << column_name <<"', do_remove_column "
+       << do_remove_column << "\n";
+    if (actions_dag)
+    {
+        ss << "actions_dag " << actions_dag->dumpDAG() << "\n";
+    }
+
+    return ss.str();
+}
+
 }
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index 89241f461f6..42ece547e1c 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -78,7 +78,7 @@ struct ColumnSize
   * - data storage structure (compression, etc.)
   * - concurrent access to data (locks, etc.)
   */
-class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromotion<IStorage>
+class IStorage : public std::enable_shared_from_this<IStorage>, public TypePromotion<IStorage>, public IHints<1, IStorage>
 {
 public:
     IStorage() = delete;
@@ -87,7 +87,6 @@ public:
         : storage_id(std::move(storage_id_))
         , metadata(std::make_unique<StorageInMemoryMetadata>()) {} //-V730
 
-    virtual ~IStorage() = default;
     IStorage(const IStorage &) = delete;
     IStorage & operator=(const IStorage &) = delete;
 
@@ -169,6 +168,7 @@ public:
     /// By default return empty list of columns.
     virtual NamesAndTypesList getVirtuals() const;
 
+    Names getAllRegisteredNames() const override;
 protected:
 
     /// Returns whether the column is virtual - by default all columns are real.
diff --git a/src/Storages/IndicesDescription.cpp b/src/Storages/IndicesDescription.cpp
index 8adf2be1bd4..dbc95615383 100644
--- a/src/Storages/IndicesDescription.cpp
+++ b/src/Storages/IndicesDescription.cpp
@@ -2,6 +2,7 @@
 #include <Interpreters/TreeRewriter.h>
 #include <Storages/IndicesDescription.h>
 
+#include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIndexDeclaration.h>
 #include <Parsers/formatAST.h>
 #include <Parsers/ParserCreateQuery.h>
diff --git a/src/Storages/Kafka/KafkaBlockInputStream.cpp b/src/Storages/Kafka/KafkaBlockInputStream.cpp
index 6a137bd4b8b..bf985902b4d 100644
--- a/src/Storages/Kafka/KafkaBlockInputStream.cpp
+++ b/src/Storages/Kafka/KafkaBlockInputStream.cpp
@@ -125,7 +125,6 @@ Block KafkaBlockInputStream::readImpl()
                 }
                 case IProcessor::Status::NeedData:
                 case IProcessor::Status::Async:
-                case IProcessor::Status::Wait:
                 case IProcessor::Status::ExpandPipeline:
                     throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR);
             }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 6a98529966c..104eedf060e 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -549,6 +549,13 @@ CompressionCodecPtr IMergeTreeDataPart::detectDefaultCompressionCodec() const
         auto column_size = getColumnSize(part_column.name, *part_column.type);
         if (column_size.data_compressed != 0 && !storage_columns.hasCompressionCodec(part_column.name))
         {
+            String path_to_data_file = getFullRelativePath() + getFileNameForColumn(part_column) + ".bin";
+            if (!volume->getDisk()->exists(path_to_data_file))
+            {
+                LOG_WARNING(storage.log, "Part's {} column {} has non zero data compressed size, but data file {} doesn't exists", name, backQuoteIfNeed(part_column.name), path_to_data_file);
+                continue;
+            }
+
             result = getCompressionCodecForFile(volume->getDisk(), getFullRelativePath() + getFileNameForColumn(part_column) + ".bin");
             break;
         }
diff --git a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
index df905215df1..83fd9692e49 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPartWriter.cpp
@@ -49,7 +49,9 @@ IMergeTreeDataPartWriter::IMergeTreeDataPartWriter(
     , columns_list(columns_list_)
     , settings(settings_)
     , index_granularity(index_granularity_)
-    , with_final_mark(storage.getSettings()->write_final_mark && settings.can_use_adaptive_granularity){}
+    , with_final_mark(storage.getSettings()->write_final_mark && settings.can_use_adaptive_granularity)
+{
+}
 
 Columns IMergeTreeDataPartWriter::releaseIndexColumns()
 {
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 9fa19859c7f..240759b29c7 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -877,6 +877,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
             std::lock_guard loading_lock(mutex);
             if (!data_parts_indexes.insert(part).second)
                 throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART);
+
+            addPartContributionToDataVolume(part);
         });
     }
 
@@ -893,6 +895,8 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
 
         if (!data_parts_indexes.insert(part).second)
             throw Exception("Part " + part->name + " already exists", ErrorCodes::DUPLICATE_DATA_PART);
+
+        addPartContributionToDataVolume(part);
     }
 
     if (has_non_adaptive_parts && has_adaptive_parts && !settings->enable_mixed_granularity_parts)
@@ -924,6 +928,7 @@ void MergeTreeData::loadDataParts(bool skip_sanity_checks)
         {
             (*it)->remove_time.store((*it)->modification_time, std::memory_order_relaxed);
             modifyPartState(it, DataPartState::Outdated);
+            removePartContributionToDataVolume(*it);
         };
 
         (*prev_jt)->assertState({DataPartState::Committed});
@@ -1292,6 +1297,8 @@ void MergeTreeData::dropAllData()
         }
     }
 
+    setDataVolume(0, 0, 0);
+
     LOG_TRACE(log, "dropAllData: done.");
 }
 
@@ -1987,16 +1994,25 @@ bool MergeTreeData::renameTempPartAndReplace(
     }
     else
     {
+        size_t reduce_bytes = 0;
+        size_t reduce_rows = 0;
+        size_t reduce_parts = 0;
         auto current_time = time(nullptr);
         for (const DataPartPtr & covered_part : covered_parts)
         {
             covered_part->remove_time.store(current_time, std::memory_order_relaxed);
             modifyPartState(covered_part, DataPartState::Outdated);
             removePartContributionToColumnSizes(covered_part);
+            reduce_bytes += covered_part->getBytesOnDisk();
+            reduce_rows += covered_part->rows_count;
+            ++reduce_parts;
         }
 
+        decreaseDataVolume(reduce_bytes, reduce_rows, reduce_parts);
+
         modifyPartState(part_it, DataPartState::Committed);
         addPartContributionToColumnSizes(part);
+        addPartContributionToDataVolume(part);
     }
 
     auto part_in_memory = asInMemoryPart(part);
@@ -2037,7 +2053,10 @@ void MergeTreeData::removePartsFromWorkingSet(const MergeTreeData::DataPartsVect
     for (const DataPartPtr & part : remove)
     {
         if (part->state == IMergeTreeDataPart::State::Committed)
+        {
             removePartContributionToColumnSizes(part);
+            removePartContributionToDataVolume(part);
+        }
 
         if (part->state == IMergeTreeDataPart::State::Committed || clear_without_timeout)
             part->remove_time.store(remove_time, std::memory_order_relaxed);
@@ -2150,7 +2169,10 @@ restore_covered)
     DataPartPtr part = *it_part;
 
     if (part->state == DataPartState::Committed)
+    {
+        removePartContributionToDataVolume(part);
         removePartContributionToColumnSizes(part);
+    }
     modifyPartState(it_part, DataPartState::Deleting);
 
     part->renameToDetached(prefix);
@@ -2198,6 +2220,7 @@ restore_covered)
                 if ((*it)->state != DataPartState::Committed)
                 {
                     addPartContributionToColumnSizes(*it);
+                    addPartContributionToDataVolume(*it);
                     modifyPartState(it, DataPartState::Committed); // iterator is not invalidated here
                 }
 
@@ -2228,6 +2251,7 @@ restore_covered)
             if ((*it)->state != DataPartState::Committed)
             {
                 addPartContributionToColumnSizes(*it);
+                addPartContributionToDataVolume(*it);
                 modifyPartState(it, DataPartState::Committed);
             }
 
@@ -2289,41 +2313,19 @@ void MergeTreeData::tryRemovePartImmediately(DataPartPtr && part)
 
 size_t MergeTreeData::getTotalActiveSizeInBytes() const
 {
-    size_t res = 0;
-    {
-        auto lock = lockParts();
-
-        for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
-            res += part->getBytesOnDisk();
-    }
-
-    return res;
+    return total_active_size_bytes.load(std::memory_order_acquire);
 }
 
 
 size_t MergeTreeData::getTotalActiveSizeInRows() const
 {
-    size_t res = 0;
-    {
-        auto lock = lockParts();
-
-        for (const auto & part : getDataPartsStateRange(DataPartState::Committed))
-            res += part->rows_count;
-    }
-
-    return res;
+    return total_active_size_rows.load(std::memory_order_acquire);
 }
 
 
 size_t MergeTreeData::getPartsCount() const
 {
-    auto lock = lockParts();
-
-    size_t res = 0;
-    for (const auto & part [[maybe_unused]] : getDataPartsStateRange(DataPartState::Committed))
-        ++res;
-
-    return res;
+    return total_active_size_parts.load(std::memory_order_acquire);
 }
 
 
@@ -2452,6 +2454,9 @@ void MergeTreeData::swapActivePart(MergeTreeData::DataPartPtr part_copy)
             auto part_it = data_parts_indexes.insert(part_copy).first;
             modifyPartState(part_it, DataPartState::Committed);
 
+            removePartContributionToDataVolume(original_active_part);
+            addPartContributionToDataVolume(part_copy);
+
             auto disk = original_active_part->volume->getDisk();
             String marker_path = original_active_part->getFullRelativePath() + IMergeTreeDataPart::DELETE_ON_DESTROY_MARKER_FILE_NAME;
             try
@@ -3349,6 +3354,15 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
         auto * owing_parts_lock = acquired_parts_lock ? acquired_parts_lock : &parts_lock;
 
         auto current_time = time(nullptr);
+
+        size_t add_bytes = 0;
+        size_t add_rows = 0;
+        size_t add_parts = 0;
+
+        size_t reduce_bytes = 0;
+        size_t reduce_rows = 0;
+        size_t reduce_parts = 0;
+
         for (const DataPartPtr & part : precommitted_parts)
         {
             DataPartPtr covering_part;
@@ -3366,14 +3380,25 @@ MergeTreeData::DataPartsVector MergeTreeData::Transaction::commit(MergeTreeData:
                 for (const DataPartPtr & covered_part : covered_parts)
                 {
                     covered_part->remove_time.store(current_time, std::memory_order_relaxed);
+
+                    reduce_bytes += covered_part->getBytesOnDisk();
+                    reduce_rows += covered_part->rows_count;
+
                     data.modifyPartState(covered_part, DataPartState::Outdated);
                     data.removePartContributionToColumnSizes(covered_part);
                 }
+                reduce_parts += covered_parts.size();
+
+                add_bytes += part->getBytesOnDisk();
+                add_rows += part->rows_count;
+                ++add_parts;
 
                 data.modifyPartState(part, DataPartState::Committed);
                 data.addPartContributionToColumnSizes(part);
             }
         }
+        data.decreaseDataVolume(reduce_bytes, reduce_rows, reduce_parts);
+        data.increaseDataVolume(add_bytes, add_rows, add_parts);
     }
 
     clear();
@@ -3918,4 +3943,34 @@ size_t MergeTreeData::getTotalMergesWithTTLInMergeList() const
     return global_context.getMergeList().getExecutingMergesWithTTLCount();
 }
 
+void MergeTreeData::addPartContributionToDataVolume(const DataPartPtr & part)
+{
+    increaseDataVolume(part->getBytesOnDisk(), part->rows_count, 1);
+}
+
+void MergeTreeData::removePartContributionToDataVolume(const DataPartPtr & part)
+{
+    decreaseDataVolume(part->getBytesOnDisk(), part->rows_count, 1);
+}
+
+void MergeTreeData::increaseDataVolume(size_t bytes, size_t rows, size_t parts)
+{
+    total_active_size_bytes.fetch_add(bytes, std::memory_order_acq_rel);
+    total_active_size_rows.fetch_add(rows, std::memory_order_acq_rel);
+    total_active_size_parts.fetch_add(parts, std::memory_order_acq_rel);
+}
+
+void MergeTreeData::decreaseDataVolume(size_t bytes, size_t rows, size_t parts)
+{
+    total_active_size_bytes.fetch_sub(bytes, std::memory_order_acq_rel);
+    total_active_size_rows.fetch_sub(rows, std::memory_order_acq_rel);
+    total_active_size_parts.fetch_sub(parts, std::memory_order_acq_rel);
+}
+
+void MergeTreeData::setDataVolume(size_t bytes, size_t rows, size_t parts)
+{
+    total_active_size_bytes.store(bytes, std::memory_order_release);
+    total_active_size_rows.store(rows, std::memory_order_release);
+    total_active_size_parts.store(parts, std::memory_order_release);
+}
 }
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 382f53f40a4..e5ffe8c025b 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -945,6 +945,18 @@ private:
     virtual void startBackgroundMovesIfNeeded() = 0;
 
     bool allow_nullable_key{};
+
+    void addPartContributionToDataVolume(const DataPartPtr & part);
+    void removePartContributionToDataVolume(const DataPartPtr & part);
+
+    void increaseDataVolume(size_t bytes, size_t rows, size_t parts);
+    void decreaseDataVolume(size_t bytes, size_t rows, size_t parts);
+
+    void setDataVolume(size_t bytes, size_t rows, size_t parts);
+
+    std::atomic<size_t> total_active_size_bytes = 0;
+    std::atomic<size_t> total_active_size_rows = 0;
+    std::atomic<size_t> total_active_size_parts = 0;
 };
 
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
index 389d2950f65..d5e7009efd6 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterCompact.cpp
@@ -93,12 +93,12 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
 
         result.emplace_back(Granule{
             .start_row = current_row,
-            .granularity_rows = expected_rows_in_mark,
-            .block_rows = std::min(rows_left_in_block, expected_rows_in_mark),
+            .rows_to_write = std::min(rows_left_in_block, expected_rows_in_mark),
             .mark_number = current_mark,
-            .mark_on_start = true
+            .mark_on_start = true,
+            .is_complete = (rows_left_in_block >= expected_rows_in_mark)
         });
-        current_row += expected_rows_in_mark;
+        current_row += result.back().rows_to_write;
         current_mark++;
     }
 
@@ -173,8 +173,7 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G
 {
     for (const auto & granule : granules)
     {
-        if (granule.granularity_rows)
-            data_written = true;
+        data_written = true;
 
         auto name_and_type = columns_list.begin();
         for (size_t i = 0; i < columns_list.size(); ++i, ++name_and_type)
@@ -206,13 +205,13 @@ void MergeTreeDataPartWriterCompact::writeDataBlock(const Block & block, const G
             writeIntBinary(plain_hashing.count(), marks);
             writeIntBinary(UInt64(0), marks);
 
-            writeColumnSingleGranule(block.getByName(name_and_type->name), stream_getter, granule.start_row, granule.granularity_rows);
+            writeColumnSingleGranule(block.getByName(name_and_type->name), stream_getter, granule.start_row, granule.rows_to_write);
 
             /// Each type always have at least one substream
             prev_stream->hashing_buf.next(); //-V522
         }
 
-        writeIntBinary(granule.block_rows, marks);
+        writeIntBinary(granule.rows_to_write, marks);
     }
 }
 
@@ -222,11 +221,11 @@ void MergeTreeDataPartWriterCompact::finishDataSerialization(IMergeTreeDataPart:
     {
         auto block = header.cloneWithColumns(columns_buffer.releaseColumns());
         auto granules_to_write = getGranulesToWrite(index_granularity, block.rows(), getCurrentMark(), /* last_block = */ true);
-        if (!granules_to_write.back().isCompleted())
+        if (!granules_to_write.back().is_complete)
         {
             /// Correct last mark as it should contain exact amount of rows.
             index_granularity.popMark();
-            index_granularity.appendMark(granules_to_write.back().block_rows);
+            index_granularity.appendMark(granules_to_write.back().rows_to_write);
         }
         writeDataBlockPrimaryIndexAndSkipIndices(block, granules_to_write);
     }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
index 5190b439809..8e6ffe9ee68 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.cpp
@@ -218,6 +218,12 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
         auto & stream = *skip_indices_streams[i];
         for (const auto & granule : granules_to_write)
         {
+            if (skip_index_accumulated_marks[i] == index_helper->index.granularity)
+            {
+                skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed);
+                skip_index_accumulated_marks[i] = 0;
+            }
+
             if (skip_indices_aggregators[i]->empty() && granule.mark_on_start)
             {
                 skip_indices_aggregators[i] = index_helper->createIndexAggregator();
@@ -234,18 +240,9 @@ void MergeTreeDataPartWriterOnDisk::calculateAndSerializeSkipIndices(const Block
             }
 
             size_t pos = granule.start_row;
-            skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.granularity_rows);
-            if (granule.isCompleted())
-            {
+            skip_indices_aggregators[i]->update(skip_indexes_block, &pos, granule.rows_to_write);
+            if (granule.is_complete)
                 ++skip_index_accumulated_marks[i];
-
-                /// write index if it is filled
-                if (skip_index_accumulated_marks[i] == index_helper->index.granularity)
-                {
-                    skip_indices_aggregators[i]->getGranuleAndReset()->serializeBinary(stream.compressed);
-                    skip_index_accumulated_marks[i] = 0;
-                }
-            }
         }
     }
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
index bc09e0e61e1..a7b84c95e0a 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterOnDisk.h
@@ -20,25 +20,18 @@ struct Granule
 {
     /// Start row in block for granule
     size_t start_row;
-    /// Amount of rows which granule have to contain according to index
-    /// granularity.
-    /// NOTE: Sometimes it's not equal to actually written rows, for example
-    /// for the last granule if it's smaller than computed granularity.
-    size_t granularity_rows;
     /// Amount of rows from block which have to be written to disk from start_row
-    size_t block_rows;
+    size_t rows_to_write;
     /// Global mark number in the list of all marks (index_granularity) for this part
     size_t mark_number;
     /// Should writer write mark for the first of this granule to disk.
     /// NOTE: Sometimes we don't write mark for the start row, because
     /// this granule can be continuation of the previous one.
     bool mark_on_start;
-
-    /// Is this granule contain amout of rows equal to the value in index granularity
-    bool isCompleted() const
-    {
-        return granularity_rows == block_rows;
-    }
+    /// if true: When this granule will be written to disk all rows for corresponding mark will
+    /// be wrtten. It doesn't mean that rows_to_write == index_granularity.getMarkRows(mark_number),
+    /// We may have a lot of small blocks between two marks and this may be the last one.
+    bool is_complete;
 };
 
 /// Multiple granules to write for concrete block.
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
index 1127c3a7dd7..be735104e99 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.cpp
@@ -33,12 +33,12 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
         size_t rows_left_in_block = block_rows - current_row;
         result.emplace_back(Granule{
             .start_row = current_row,
-            .granularity_rows = rows_left_in_last_mark,
-            .block_rows = std::min(rows_left_in_block, rows_left_in_last_mark),
+            .rows_to_write = std::min(rows_left_in_block, rows_left_in_last_mark),
             .mark_number = current_mark,
             .mark_on_start = false, /// Don't mark this granule because we have already marked it
+            .is_complete = (rows_left_in_block >= rows_left_in_last_mark),
         });
-        current_row += rows_left_in_last_mark;
+        current_row += result.back().rows_to_write;
         current_mark++;
     }
 
@@ -51,12 +51,12 @@ Granules getGranulesToWrite(const MergeTreeIndexGranularity & index_granularity,
         /// save incomplete granule
         result.emplace_back(Granule{
             .start_row = current_row,
-            .granularity_rows = expected_rows_in_mark,
-            .block_rows = std::min(rows_left_in_block, expected_rows_in_mark),
+            .rows_to_write = std::min(rows_left_in_block, expected_rows_in_mark),
             .mark_number = current_mark,
             .mark_on_start = true,
+            .is_complete = (rows_left_in_block >= expected_rows_in_mark),
         });
-        current_row += expected_rows_in_mark;
+        current_row += result.back().rows_to_write;
         current_mark++;
     }
 
@@ -136,11 +136,12 @@ IDataType::OutputStreamGetter MergeTreeDataPartWriterWide::createStreamGetter(
     };
 }
 
+
 void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_written)
 {
     auto last_granule = granules_written.back();
     /// If we didn't finished last granule than we will continue to write it from new block
-    if (!last_granule.isCompleted())
+    if (!last_granule.is_complete)
     {
         /// Shift forward except last granule
         setCurrentMark(getCurrentMark() + granules_written.size() - 1);
@@ -148,9 +149,9 @@ void MergeTreeDataPartWriterWide::shiftCurrentMark(const Granules & granules_wri
         /// We wrote whole block in the same granule, but didn't finished it.
         /// So add written rows to rows written in last_mark
         if (still_in_the_same_granule)
-            rows_written_in_last_mark += last_granule.block_rows;
+            rows_written_in_last_mark += last_granule.rows_to_write;
         else
-            rows_written_in_last_mark = last_granule.block_rows;
+            rows_written_in_last_mark = last_granule.rows_to_write;
     }
     else
     {
@@ -167,6 +168,23 @@ void MergeTreeDataPartWriterWide::write(const Block & block, const IColumn::Perm
     if (compute_granularity)
     {
         size_t index_granularity_for_block = computeIndexGranularity(block);
+        if (rows_written_in_last_mark > 0)
+        {
+            size_t rows_left_in_last_mark = index_granularity.getMarkRows(getCurrentMark()) - rows_written_in_last_mark;
+            /// Previous granularity was much bigger than our new block's
+            /// granularity let's adjust it, because we want add new
+            /// heavy-weight blocks into small old granule.
+            if (rows_left_in_last_mark > index_granularity_for_block)
+            {
+                /// We have already written more rows than granularity of our block.
+                /// adjust last mark rows and flush to disk.
+                if (rows_written_in_last_mark >= index_granularity_for_block)
+                    adjustLastMarkIfNeedAndFlushToDisk(rows_written_in_last_mark);
+                else /// We still can write some rows from new block into previous granule.
+                    adjustLastMarkIfNeedAndFlushToDisk(index_granularity_for_block - rows_written_in_last_mark);
+            }
+        }
+
         fillIndexGranularity(index_granularity_for_block, block.rows());
     }
 
@@ -281,10 +299,7 @@ void MergeTreeDataPartWriterWide::writeSingleGranule(
     IDataType::SerializeBinaryBulkSettings & serialize_settings,
     const Granule & granule)
 {
-    if (granule.mark_on_start)
-        writeSingleMark(name, type, offset_columns, granule.granularity_rows, serialize_settings.path);
-
-    type.serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.granularity_rows, serialize_settings, serialization_state);
+    type.serializeBinaryBulkWithMultipleStreams(column, granule.start_row, granule.rows_to_write, serialize_settings, serialization_state);
 
     /// So that instead of the marks pointing to the end of the compressed block, there were marks pointing to the beginning of the next one.
     type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
@@ -309,6 +324,9 @@ void MergeTreeDataPartWriterWide::writeColumn(
     WrittenOffsetColumns & offset_columns,
     const Granules & granules)
 {
+    if (granules.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Empty granules for column {}, current mark {}", backQuoteIfNeed(name), getCurrentMark());
+
     auto [it, inserted] = serialization_states.emplace(name, nullptr);
 
     if (inserted)
@@ -326,8 +344,14 @@ void MergeTreeDataPartWriterWide::writeColumn(
 
     for (const auto & granule : granules)
     {
-        if (granule.granularity_rows > 0)
-            data_written = true;
+        data_written = true;
+
+        if (granule.mark_on_start)
+        {
+            if (last_non_written_marks.count(name))
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "We have to add new mark for column, but already have non written mark. Current mark {}, total marks {}, offset {}", getCurrentMark(), index_granularity.getMarksCount(), rows_written_in_last_mark);
+            last_non_written_marks[name] = getCurrentMarksForColumn(name, type, offset_columns, serialize_settings.path);
+        }
 
         writeSingleGranule(
            name,
@@ -338,6 +362,17 @@ void MergeTreeDataPartWriterWide::writeColumn(
            serialize_settings,
            granule
         );
+
+        if (granule.is_complete)
+        {
+            auto marks_it = last_non_written_marks.find(name);
+            if (marks_it == last_non_written_marks.end())
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "No mark was saved for incomplete granule for column {}", backQuoteIfNeed(name));
+
+            for (const auto & mark : marks_it->second)
+                flushMarkToFile(mark, index_granularity.getMarkRows(granule.mark_number));
+            last_non_written_marks.erase(marks_it);
+        }
     }
 
     type.enumerateStreams([&] (const IDataType::SubstreamPath & substream_path, const IDataType & /* substream_type */)
@@ -365,7 +400,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name,
     bool must_be_last = false;
     UInt64 offset_in_compressed_file = 0;
     UInt64 offset_in_decompressed_block = 0;
-    UInt64 index_granularity_rows = 0;
+    UInt64 index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity;
 
     size_t mark_num;
 
@@ -379,7 +414,7 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name,
         if (settings.can_use_adaptive_granularity)
             DB::readBinary(index_granularity_rows, mrk_in);
         else
-            index_granularity_rows = storage.getSettings()->index_granularity;
+            index_granularity_rows = data_part->index_granularity_info.fixed_index_granularity;
 
         if (must_be_last)
         {
@@ -404,8 +439,8 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name,
 
         if (index_granularity_rows != index_granularity.getMarkRows(mark_num))
             throw Exception(
-                ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}",
-                mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows);
+                ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for part {} for mark #{} (compressed offset {}, decompressed offset {}), in-memory {}, on disk {}, total marks {}",
+                data_part->getFullPath(), mark_num, offset_in_compressed_file, offset_in_decompressed_block, index_granularity.getMarkRows(mark_num), index_granularity_rows, index_granularity.getMarksCount());
 
         auto column = type.createColumn();
 
@@ -415,8 +450,13 @@ void MergeTreeDataPartWriterWide::validateColumnOfFixedSize(const String & name,
         {
             must_be_last = true;
         }
-        else if (column->size() != index_granularity_rows)
+
+        /// Now they must be equal
+        if (column->size() != index_granularity_rows)
         {
+            if (must_be_last && !settings.can_use_adaptive_granularity)
+                break;
+
             throw Exception(
                 ErrorCodes::LOGICAL_ERROR, "Incorrect mark rows for mark #{} (compressed offset {}, decompressed offset {}), actually in bin file {}, in mrk file {}",
                 mark_num, offset_in_compressed_file, offset_in_decompressed_block, column->size(), index_granularity.getMarkRows(mark_num));
@@ -445,6 +485,8 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch
     serialize_settings.low_cardinality_max_dictionary_size = global_settings.low_cardinality_max_dictionary_size;
     serialize_settings.low_cardinality_use_single_dictionary_for_part = global_settings.low_cardinality_use_single_dictionary_for_part != 0;
     WrittenOffsetColumns offset_columns;
+    if (rows_written_in_last_mark > 0)
+        adjustLastMarkIfNeedAndFlushToDisk(rows_written_in_last_mark);
 
     bool write_final_mark = (with_final_mark && data_written);
 
@@ -474,6 +516,8 @@ void MergeTreeDataPartWriterWide::finishDataSerialization(IMergeTreeDataPart::Ch
     serialization_states.clear();
 
 #ifndef NDEBUG
+    /// Heavy weight validation of written data. Checks that we are able to read
+    /// data according to marks. Otherwise throws LOGICAL_ERROR (equal to about in debug mode)
     for (const auto & column : columns_list)
     {
         if (column.type->isValueRepresentedByNumber() && !column.type->haveSubtypes())
@@ -537,4 +581,50 @@ void MergeTreeDataPartWriterWide::fillIndexGranularity(size_t index_granularity_
         rows_in_block);
 }
 
+
+void MergeTreeDataPartWriterWide::adjustLastMarkIfNeedAndFlushToDisk(size_t new_rows_in_last_mark)
+{
+    /// We can adjust marks only if we computed granularity for blocks.
+    /// Otherwise we cannot change granularity because it will differ from
+    /// other columns
+    if (compute_granularity && settings.can_use_adaptive_granularity)
+    {
+        if (getCurrentMark() != index_granularity.getMarksCount() - 1)
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "Non last mark {} (with {} rows) having rows offset {}, total marks {}",
+                            getCurrentMark(), index_granularity.getMarkRows(getCurrentMark()), rows_written_in_last_mark, index_granularity.getMarksCount());
+
+        index_granularity.popMark();
+        index_granularity.appendMark(new_rows_in_last_mark);
+    }
+
+    /// Last mark should be filled, otherwise it's a bug
+    if (last_non_written_marks.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "No saved marks for last mark {} having rows offset {}, total marks {}",
+                        getCurrentMark(), rows_written_in_last_mark, index_granularity.getMarksCount());
+
+    if (rows_written_in_last_mark == new_rows_in_last_mark)
+    {
+        for (const auto & [name, marks] : last_non_written_marks)
+        {
+            for (const auto & mark : marks)
+                flushMarkToFile(mark, index_granularity.getMarkRows(getCurrentMark()));
+        }
+
+        last_non_written_marks.clear();
+
+        if (compute_granularity && settings.can_use_adaptive_granularity)
+        {
+            /// Also we add mark to each skip index because all of them
+            /// already accumulated all rows from current adjusting mark
+            for (size_t i = 0; i < skip_indices.size(); ++i)
+                ++skip_index_accumulated_marks[i];
+
+            /// This mark completed, go further
+            setCurrentMark(getCurrentMark() + 1);
+            /// Without offset
+            rows_written_in_last_mark = 0;
+        }
+    }
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
index cffedd66a32..8c76c10abef 100644
--- a/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
+++ b/src/Storages/MergeTree/MergeTreeDataPartWriterWide.h
@@ -99,6 +99,14 @@ private:
     /// in our index_granularity array.
     void shiftCurrentMark(const Granules & granules_written);
 
+    /// Change rows in the last mark in index_granularity to new_rows_in_last_mark.
+    /// Flush all marks from last_non_written_marks to disk and increment current mark if already written rows
+    /// (rows_written_in_last_granule) equal to new_rows_in_last_mark.
+    ///
+    /// This function used when blocks change granularity drastically and we have unfinished mark.
+    /// Also useful to have exact amount of rows in last (non-final) mark.
+    void adjustLastMarkIfNeedAndFlushToDisk(size_t new_rows_in_last_mark);
+
     IDataType::OutputStreamGetter createStreamGetter(const String & name, WrittenOffsetColumns & offset_columns) const;
 
     using SerializationState = IDataType::SerializeBinaryBulkStatePtr;
@@ -108,6 +116,10 @@ private:
 
     using ColumnStreams = std::map<String, StreamPtr>;
     ColumnStreams column_streams;
+    /// Non written marks to disk (for each column). Waiting until all rows for
+    /// this marks will be written to disk.
+    using MarksForColumns = std::unordered_map<String, StreamsWithMarks>;
+    MarksForColumns last_non_written_marks;
 
     /// How many rows we have already written in the current mark.
     /// More than zero when incoming blocks are smaller then their granularity.
diff --git a/src/Storages/MergeTree/MergeTreeIndices.h b/src/Storages/MergeTree/MergeTreeIndices.h
index 28795ae46b5..c7b9dfb123e 100644
--- a/src/Storages/MergeTree/MergeTreeIndices.h
+++ b/src/Storages/MergeTree/MergeTreeIndices.h
@@ -10,7 +10,6 @@
 #include <Storages/SelectQueryInfo.h>
 #include <Storages/MergeTree/MarkRange.h>
 #include <Interpreters/ExpressionActions.h>
-#include <Parsers/ASTIndexDeclaration.h>
 #include <DataTypes/DataTypeLowCardinality.h>
 
 constexpr auto INDEX_FILE_PREFIX = "skp_idx_";
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 2f3931786a6..9b344d19f8b 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -111,9 +111,6 @@ struct Settings;
     M(Bool, remove_empty_parts, true, "Remove empty parts after they were pruned by TTL, mutation, or collapsing merge algorithm", 0) \
     M(Bool, assign_part_uuids, false, "Generate UUIDs for parts. Before enabling check that all replicas support new format.", 0) \
     \
-    /** Settings for testing purposes */ \
-    M(Bool, randomize_part_type, false, "For testing purposes only. Randomizes part type between wide and compact", 0) \
-    \
     /** Obsolete settings. Kept for backward compatibility only. */ \
     M(UInt64, min_relative_delay_to_yield_leadership, 120, "Obsolete setting, does nothing.", 0) \
     M(UInt64, check_delay_period, 60, "Obsolete setting, does nothing.", 0) \
@@ -137,7 +134,8 @@ struct MergeTreeSettings : public BaseSettings<MergeTreeSettingsTraits>
     /// We check settings after storage creation
     static bool isReadonlySetting(const String & name)
     {
-        return name == "index_granularity" || name == "index_granularity_bytes";
+        return name == "index_granularity" || name == "index_granularity_bytes" || name == "write_final_mark"
+            || name == "enable_mixed_granularity_parts";
     }
 
     static bool isPartFormatSetting(const String & name)
diff --git a/src/Storages/MergeTree/registerStorageMergeTree.cpp b/src/Storages/MergeTree/registerStorageMergeTree.cpp
index 91bf105af74..a2429cead3d 100644
--- a/src/Storages/MergeTree/registerStorageMergeTree.cpp
+++ b/src/Storages/MergeTree/registerStorageMergeTree.cpp
@@ -234,25 +234,6 @@ If you use the Replicated version of engines, see https://clickhouse.tech/docs/e
 }
 
 
-static void randomizePartTypeSettings(const std::unique_ptr<MergeTreeSettings> & storage_settings)
-{
-    static constexpr auto MAX_THRESHOLD_FOR_ROWS = 100000;
-    static constexpr auto MAX_THRESHOLD_FOR_BYTES = 1024 * 1024 * 10;
-
-    /// Create all parts in wide format with probability 1/3.
-    if (thread_local_rng() % 3 == 0)
-    {
-        storage_settings->min_rows_for_wide_part = 0;
-        storage_settings->min_bytes_for_wide_part = 0;
-    }
-    else
-    {
-        storage_settings->min_rows_for_wide_part = std::uniform_int_distribution{0, MAX_THRESHOLD_FOR_ROWS}(thread_local_rng);
-        storage_settings->min_bytes_for_wide_part = std::uniform_int_distribution{0, MAX_THRESHOLD_FOR_BYTES}(thread_local_rng);
-    }
-}
-
-
 static StoragePtr create(const StorageFactory::Arguments & args)
 {
     /** [Replicated][|Summing|Collapsing|Aggregating|Replacing|Graphite]MergeTree (2 * 7 combinations) engines
@@ -737,20 +718,6 @@ static StoragePtr create(const StorageFactory::Arguments & args)
         ++arg_num;
     }
 
-    /// Allow to randomize part type for tests to cover more cases.
-    /// But if settings were set explicitly restrict it.
-    if (storage_settings->randomize_part_type
-        && !storage_settings->min_rows_for_wide_part.changed
-        && !storage_settings->min_bytes_for_wide_part.changed)
-    {
-        randomizePartTypeSettings(storage_settings);
-        LOG_INFO(&Poco::Logger::get(args.table_id.getNameForLogs() + " (registerStorageMergeTree)"),
-            "Applied setting 'randomize_part_type'. "
-            "Setting 'min_rows_for_wide_part' changed to {}. "
-            "Setting 'min_bytes_for_wide_part' changed to {}.",
-            storage_settings->min_rows_for_wide_part, storage_settings->min_bytes_for_wide_part);
-    }
-
     if (arg_num != arg_cnt)
         throw Exception("Wrong number of engine arguments.", ErrorCodes::BAD_ARGUMENTS);
 
diff --git a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp
index bb8500949fb..c5c43440228 100644
--- a/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp
+++ b/src/Storages/RabbitMQ/RabbitMQBlockInputStream.cpp
@@ -132,7 +132,6 @@ Block RabbitMQBlockInputStream::readImpl()
                 }
                 case IProcessor::Status::NeedData:
                 case IProcessor::Status::Async:
-                case IProcessor::Status::Wait:
                 case IProcessor::Status::ExpandPipeline:
                     throw Exception("Source processor returned status " + IProcessor::statusToName(status), ErrorCodes::LOGICAL_ERROR);
             }
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index 683b2463d1f..5a3ada6288b 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -45,6 +45,8 @@ struct PrewhereDAGInfo
     PrewhereDAGInfo() = default;
     explicit PrewhereDAGInfo(ActionsDAGPtr prewhere_actions_, String prewhere_column_name_)
             : prewhere_actions(std::move(prewhere_actions_)), prewhere_column_name(std::move(prewhere_column_name_)) {}
+
+    std::string dump() const;
 };
 
 /// Helper struct to store all the information about the filter expression.
@@ -53,6 +55,8 @@ struct FilterInfo
     ActionsDAGPtr actions_dag;
     String column_name;
     bool do_remove_column = false;
+
+    std::string dump() const;
 };
 
 struct InputOrderInfo
diff --git a/src/Storages/StorageBuffer.cpp b/src/Storages/StorageBuffer.cpp
index af595d10e28..7656da38a6c 100644
--- a/src/Storages/StorageBuffer.cpp
+++ b/src/Storages/StorageBuffer.cpp
@@ -386,16 +386,17 @@ static void appendBlock(const Block & from, Block & to)
 
     MemoryTracker::BlockerInThread temporarily_disable_memory_tracker;
 
+    MutableColumnPtr last_col;
     try
     {
         for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
         {
             const IColumn & col_from = *from.getByPosition(column_no).column.get();
-            MutableColumnPtr col_to = IColumn::mutate(std::move(to.getByPosition(column_no).column));
+            last_col = IColumn::mutate(std::move(to.getByPosition(column_no).column));
 
-            col_to->insertRangeFrom(col_from, 0, rows);
+            last_col->insertRangeFrom(col_from, 0, rows);
 
-            to.getByPosition(column_no).column = std::move(col_to);
+            to.getByPosition(column_no).column = std::move(last_col);
         }
     }
     catch (...)
@@ -406,6 +407,16 @@ static void appendBlock(const Block & from, Block & to)
             for (size_t column_no = 0, columns = to.columns(); column_no < columns; ++column_no)
             {
                 ColumnPtr & col_to = to.getByPosition(column_no).column;
+                /// If there is no column, then the exception was thrown in the middle of append, in the insertRangeFrom()
+                if (!col_to)
+                {
+                    col_to = std::move(last_col);
+                    /// Suppress clang-tidy [bugprone-use-after-move]
+                    last_col = {};
+                }
+                /// But if there is still nothing, abort
+                if (!col_to)
+                    throw Exception("No column to rollback", ErrorCodes::LOGICAL_ERROR);
                 if (col_to->size() != old_rows)
                     col_to = col_to->cut(0, old_rows);
             }
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index a991103d33b..4ce7efb60b4 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -17,6 +17,7 @@
 #include <Common/escapeForFileName.h>
 #include <Common/typeid_cast.h>
 #include <Common/quoteString.h>
+#include <Common/randomSeed.h>
 
 #include <Parsers/ASTDropQuery.h>
 #include <Parsers/ASTExpressionList.h>
@@ -373,6 +374,7 @@ StorageDistributed::StorageDistributed(
     , cluster_name(global_context.getMacros()->expand(cluster_name_))
     , has_sharding_key(sharding_key_)
     , relative_data_path(relative_data_path_)
+    , rng(randomSeed())
 {
     StorageInMemoryMetadata storage_metadata;
     storage_metadata.setColumns(columns_);
@@ -543,7 +545,8 @@ BlockOutputStreamPtr StorageDistributed::write(const ASTPtr &, const StorageMeta
     }
 
     /// If sharding key is not specified, then you can only write to a shard containing only one shard
-    if (!has_sharding_key && ((cluster->getLocalShardCount() + cluster->getRemoteShardCount()) >= 2))
+    if (!settings.insert_distributed_one_random_shard && !has_sharding_key
+        && ((cluster->getLocalShardCount() + cluster->getRemoteShardCount()) >= 2))
     {
         throw Exception("Method write is not supported by storage " + getName() + " with more than one shard and no sharding key provided",
                         ErrorCodes::STORAGE_REQUIRES_PARAMETER);
@@ -890,6 +893,32 @@ void StorageDistributed::rename(const String & new_path_to_table_data, const Sto
 }
 
 
+size_t StorageDistributed::getRandomShardIndex(const Cluster::ShardsInfo & shards)
+{
+
+    UInt32 total_weight = 0;
+    for (const auto & shard : shards)
+        total_weight += shard.weight;
+
+    assert(total_weight > 0);
+
+    size_t res;
+    {
+        std::lock_guard lock(rng_mutex);
+        res = std::uniform_int_distribution<size_t>(0, total_weight - 1)(rng);
+    }
+
+    for (auto i = 0ul, s = shards.size(); i < s; ++i)
+    {
+        if (shards[i].weight > res)
+            return i;
+        res -= shards[i].weight;
+    }
+
+    __builtin_unreachable();
+}
+
+
 void StorageDistributed::renameOnDisk(const String & new_path_to_table_data)
 {
     for (const DiskPtr & disk : data_volume->getDisks())
diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h
index 58ade73b4cf..ce7e48c85a9 100644
--- a/src/Storages/StorageDistributed.h
+++ b/src/Storages/StorageDistributed.h
@@ -10,7 +10,9 @@
 #include <Parsers/ASTFunction.h>
 #include <common/logger_useful.h>
 #include <Common/ActionBlocker.h>
+#include <Interpreters/Cluster.h>
 
+#include <pcg_random.hpp>
 
 namespace DB
 {
@@ -24,9 +26,6 @@ using VolumePtr = std::shared_ptr<IVolume>;
 class ExpressionActions;
 using ExpressionActionsPtr = std::shared_ptr<ExpressionActions>;
 
-class Cluster;
-using ClusterPtr = std::shared_ptr<Cluster>;
-
 /** A distributed table that resides on multiple servers.
   * Uses data from the specified database and tables on each server.
   *
@@ -126,6 +125,8 @@ public:
 
     NamesAndTypesList getVirtuals() const override;
 
+    size_t getRandomShardIndex(const Cluster::ShardsInfo & shards);
+
     String remote_database;
     String remote_table;
     ASTPtr remote_table_function_ptr;
@@ -198,6 +199,9 @@ protected:
     std::unordered_map<std::string, ClusterNodeData> cluster_nodes_data;
     mutable std::mutex cluster_nodes_mutex;
 
+    // For random shard index generation
+    mutable std::mutex rng_mutex;
+    pcg64 rng;
 };
 
 }
diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp
index c81b3538042..8d1bee3e889 100644
--- a/src/Storages/StorageMergeTree.cpp
+++ b/src/Storages/StorageMergeTree.cpp
@@ -1124,41 +1124,71 @@ ActionLock StorageMergeTree::stopMergesAndWait()
 }
 
 
+MergeTreeDataPartPtr StorageMergeTree::outdatePart(const String & part_name, bool force)
+{
+
+    if (force)
+    {
+        /// Forcefully stop merges and make part outdated
+        auto merge_blocker = stopMergesAndWait();
+        auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Committed});
+        if (!part)
+            throw Exception("Part " + part_name + " not found, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
+        removePartsFromWorkingSet({part}, true);
+        return part;
+    }
+    else
+    {
+
+        /// Wait merges selector
+        std::unique_lock lock(currently_processing_in_background_mutex);
+
+        auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Committed});
+        /// It's okay, part was already removed
+        if (!part)
+            return nullptr;
+
+        /// Part will be "removed" by merge or mutation, it's OK in case of some
+        /// background cleanup processes like removing of empty parts.
+        if (currently_merging_mutating_parts.count(part))
+            return nullptr;
+
+        removePartsFromWorkingSet({part}, true);
+        return part;
+    }
+}
+
 void StorageMergeTree::dropPartition(const ASTPtr & partition, bool detach, bool drop_part, const Context & context, bool throw_if_noop)
 {
     {
-        /// Asks to complete merges and does not allow them to start.
-        /// This protects against "revival" of data for a removed partition after completion of merge.
-        auto merge_blocker = stopMergesAndWait();
-
-        auto metadata_snapshot = getInMemoryMetadataPtr();
-
         MergeTreeData::DataPartsVector parts_to_remove;
+        auto metadata_snapshot = getInMemoryMetadataPtr();
 
         if (drop_part)
         {
-            String part_name = partition->as<ASTLiteral &>().value.safeGet<String>();
-            auto part = getPartIfExists(part_name, {MergeTreeDataPartState::Committed});
-
-            if (part)
-                parts_to_remove.push_back(part);
-            else if (throw_if_noop)
-                throw Exception("Part " + part_name + " not found, won't try to drop it.", ErrorCodes::NO_SUCH_DATA_PART);
-            else
+            auto part = outdatePart(partition->as<ASTLiteral &>().value.safeGet<String>(), throw_if_noop);
+            /// Nothing to do, part was removed in some different way
+            if (!part)
                 return;
+
+            parts_to_remove.push_back(part);
         }
         else
         {
+            /// Asks to complete merges and does not allow them to start.
+            /// This protects against "revival" of data for a removed partition after completion of merge.
+            auto merge_blocker = stopMergesAndWait();
             String partition_id = getPartitionIDFromQuery(partition, context);
             parts_to_remove = getDataPartsVectorInPartition(MergeTreeDataPartState::Committed, partition_id);
-        }
 
-        // TODO should we throw an exception if parts_to_remove is empty?
-        removePartsFromWorkingSet(parts_to_remove, true);
+            /// TODO should we throw an exception if parts_to_remove is empty?
+            removePartsFromWorkingSet(parts_to_remove, true);
+        }
 
         if (detach)
         {
             /// If DETACH clone parts to detached/ directory
+            /// NOTE: no race with background cleanup until we hold pointers to parts
             for (const auto & part : parts_to_remove)
             {
                 LOG_INFO(log, "Detaching {}", part->relative_path);
diff --git a/src/Storages/StorageMergeTree.h b/src/Storages/StorageMergeTree.h
index 5ee47832b1e..3263f124afa 100644
--- a/src/Storages/StorageMergeTree.h
+++ b/src/Storages/StorageMergeTree.h
@@ -107,7 +107,7 @@ private:
     BackgroundMovesExecutor background_moves_executor;
 
     /// For block numbers.
-    SimpleIncrement increment{0};
+    SimpleIncrement increment;
 
     /// For clearOldParts, clearOldTemporaryDirectories.
     AtomicStopwatch time_after_previous_cleanup;
@@ -135,6 +135,10 @@ private:
       */
     bool merge(bool aggressive, const String & partition_id, bool final, bool deduplicate, const Names & deduplicate_by_columns, String * out_disable_reason = nullptr, bool optimize_skip_merged_partitions = false);
 
+    /// Make part state outdated and queue it to remove without timeout
+    /// If force, then stop merges and block them until part state became outdated. Throw exception if part doesn't exists
+    /// If not force, then take merges selector and check that part is not participating in background operations.
+    MergeTreeDataPartPtr outdatePart(const String & part_name, bool force);
     ActionLock stopMergesAndWait();
 
     /// Allocate block number for new mutation, write mutation to disk
diff --git a/src/Storages/System/StorageSystemSettingsProfileElements.cpp b/src/Storages/System/StorageSystemSettingsProfileElements.cpp
index 6d6df4fe114..cf47416e188 100644
--- a/src/Storages/System/StorageSystemSettingsProfileElements.cpp
+++ b/src/Storages/System/StorageSystemSettingsProfileElements.cpp
@@ -68,121 +68,97 @@ void StorageSystemSettingsProfileElements::fillData(MutableColumns & res_columns
 
     auto add_rows_for_single_element = [&](const String & owner_name, EntityType owner_type, const SettingsProfileElement & element, size_t & index)
     {
-        switch (owner_type)
+        size_t old_num_rows = column_profile_name.size();
+        size_t new_num_rows = old_num_rows + 1;
+        size_t current_index = index++;
+
+        bool inserted_value = false;
+        if (!element.value.isNull() && !element.setting_name.empty())
         {
-            case EntityType::SETTINGS_PROFILE:
-            {
-                column_user_name.insertDefault();
-                column_user_name_null_map.push_back(true);
-                column_role_name.insertDefault();
-                column_role_name_null_map.push_back(true);
-                column_profile_name.insertData(owner_name.data(), owner_name.length());
-                column_profile_name_null_map.push_back(false);
-                break;
-            }
-            case EntityType::USER:
-            {
-                column_user_name.insertData(owner_name.data(), owner_name.length());
-                column_user_name_null_map.push_back(false);
-                column_profile_name.insertDefault();
-                column_profile_name_null_map.push_back(true);
-                column_role_name.insertDefault();
-                column_role_name_null_map.push_back(true);
-                break;
-            }
-            case EntityType::ROLE:
-            {
-                column_user_name.insertDefault();
-                column_user_name_null_map.push_back(true);
-                column_role_name.insertData(owner_name.data(), owner_name.length());
-                column_role_name_null_map.push_back(false);
-                column_profile_name.insertDefault();
-                column_profile_name_null_map.push_back(true);
-                break;
-            }
-            default:
-                assert(false);
+            String str = Settings::valueToStringUtil(element.setting_name, element.value);
+            column_value.insertData(str.data(), str.length());
+            column_value_null_map.push_back(false);
+            inserted_value = true;
         }
 
+        bool inserted_min = false;
+        if (!element.min_value.isNull() && !element.setting_name.empty())
+        {
+            String str = Settings::valueToStringUtil(element.setting_name, element.min_value);
+            column_min.insertData(str.data(), str.length());
+            column_min_null_map.push_back(false);
+            inserted_min = true;
+        }
+
+        bool inserted_max = false;
+        if (!element.max_value.isNull() && !element.setting_name.empty())
+        {
+            String str = Settings::valueToStringUtil(element.setting_name, element.max_value);
+            column_max.insertData(str.data(), str.length());
+            column_max_null_map.push_back(false);
+            inserted_max = true;
+        }
+
+        bool inserted_readonly = false;
+        if (element.readonly && !element.setting_name.empty())
+        {
+            column_readonly.push_back(*element.readonly);
+            column_readonly_null_map.push_back(false);
+            inserted_readonly = true;
+        }
+
+        bool inserted_setting_name = false;
+        if (inserted_value || inserted_min || inserted_max || inserted_readonly)
+        {
+            const auto & setting_name = element.setting_name;
+            column_setting_name.insertData(setting_name.data(), setting_name.size());
+            column_setting_name_null_map.push_back(false);
+            inserted_setting_name = true;
+        }
+
+        bool inserted_inherit_profile = false;
         if (element.parent_profile)
         {
             auto parent_profile = access_control.tryReadName(*element.parent_profile);
             if (parent_profile)
             {
-                column_index.push_back(index++);
-                column_setting_name.insertDefault();
-                column_setting_name_null_map.push_back(true);
-                column_value.insertDefault();
-                column_value_null_map.push_back(true);
-                column_min.insertDefault();
-                column_min_null_map.push_back(true);
-                column_max.insertDefault();
-                column_max_null_map.push_back(true);
-                column_readonly.push_back(0);
-                column_readonly_null_map.push_back(true);
                 const String & parent_profile_str = *parent_profile;
                 column_inherit_profile.insertData(parent_profile_str.data(), parent_profile_str.length());
                 column_inherit_profile_null_map.push_back(false);
+                inserted_inherit_profile = true;
             }
         }
 
-        if (!element.setting_name.empty()
-            && (!element.value.isNull() || !element.min_value.isNull() || !element.max_value.isNull() || element.readonly))
+        if (inserted_setting_name || inserted_inherit_profile)
         {
-            const auto & setting_name = element.setting_name;
-            column_index.push_back(index++);
-            column_setting_name.insertData(setting_name.data(), setting_name.size());
-            column_setting_name_null_map.push_back(false);
-
-            if (element.value.isNull())
+            switch (owner_type)
             {
-                column_value.insertDefault();
-                column_value_null_map.push_back(true);
-            }
-            else
-            {
-                String str = Settings::valueToStringUtil(setting_name, element.value);
-                column_value.insertData(str.data(), str.length());
-                column_value_null_map.push_back(false);
+                case EntityType::SETTINGS_PROFILE:
+                {
+                    column_profile_name.insertData(owner_name.data(), owner_name.length());
+                    column_profile_name_null_map.push_back(false);
+                    break;
+                }
+                case EntityType::USER:
+                {
+                    column_user_name.insertData(owner_name.data(), owner_name.length());
+                    column_user_name_null_map.push_back(false);
+                    break;
+                }
+                case EntityType::ROLE:
+                {
+                    column_role_name.insertData(owner_name.data(), owner_name.length());
+                    column_role_name_null_map.push_back(false);
+                    break;
+                }
+                default:
+                    assert(false);
             }
 
-            if (element.min_value.isNull())
-            {
-                column_min.insertDefault();
-                column_min_null_map.push_back(true);
-            }
-            else
-            {
-                String str = Settings::valueToStringUtil(setting_name, element.min_value);
-                column_min.insertData(str.data(), str.length());
-                column_min_null_map.push_back(false);
-            }
+            column_index.push_back(current_index);
 
-            if (element.max_value.isNull())
-            {
-                column_max.insertDefault();
-                column_max_null_map.push_back(true);
-            }
-            else
-            {
-                String str = Settings::valueToStringUtil(setting_name, element.max_value);
-                column_max.insertData(str.data(), str.length());
-                column_max_null_map.push_back(false);
-            }
-
-            if (element.readonly)
-            {
-                column_readonly.push_back(*element.readonly);
-                column_readonly_null_map.push_back(false);
-            }
-            else
-            {
-                column_readonly.push_back(0);
-                column_readonly_null_map.push_back(true);
-            }
-
-            column_inherit_profile.insertDefault();
-            column_inherit_profile_null_map.push_back(true);
+            for (auto & res_column : res_columns)
+                res_column->insertManyDefaults(new_num_rows - res_column->size());
         }
     };
 
diff --git a/src/Storages/System/StorageSystemStackTrace.cpp b/src/Storages/System/StorageSystemStackTrace.cpp
index 9edcb1ede47..0b5e82a1f3d 100644
--- a/src/Storages/System/StorageSystemStackTrace.cpp
+++ b/src/Storages/System/StorageSystemStackTrace.cpp
@@ -33,12 +33,24 @@ namespace ErrorCodes
 
 namespace
 {
-    const pid_t expected_pid = getpid();
+    // Initialized in StorageSystemStackTrace's ctor and used in signalHandler.
+    std::atomic<pid_t> expected_pid;
     const int sig = SIGRTMIN;
 
     std::atomic<int> sequence_num = 0;    /// For messages sent via pipe.
+    std::atomic<int> data_ready_num = 0;
+    std::atomic<bool> signal_latch = false;   /// Only need for thread sanitizer.
 
-    std::optional<StackTrace> stack_trace;
+    /** Notes:
+      * Only one query from the table can be processed at the moment of time.
+      * This is ensured by the mutex in fillData function.
+      * We obtain information about threads by sending signal and receiving info from the signal handler.
+      * Information is passed via global variables and pipe is used for signaling.
+      * Actually we can send all information via pipe, but we read from it with timeout just in case,
+      * so it's convenient to use is only for signaling.
+      */
+
+    StackTrace stack_trace{NoCapture{}};
 
     constexpr size_t max_query_id_size = 128;
     char query_id_data[max_query_id_size];
@@ -56,25 +68,34 @@ namespace
             return;
 
         /// Signal received too late.
-        if (info->si_value.sival_int != sequence_num.load(std::memory_order_relaxed))
+        int notification_num = info->si_value.sival_int;
+        if (notification_num != sequence_num.load(std::memory_order_acquire))
+            return;
+
+        bool expected = false;
+        if (!signal_latch.compare_exchange_strong(expected, true, std::memory_order_acquire))
             return;
 
         /// All these methods are signal-safe.
         const ucontext_t signal_context = *reinterpret_cast<ucontext_t *>(context);
-        stack_trace.emplace(signal_context);
+        stack_trace = StackTrace(signal_context);
 
         StringRef query_id = CurrentThread::getQueryId();
         query_id_size = std::min(query_id.size, max_query_id_size);
         if (query_id.data && query_id.size)
             memcpy(query_id_data, query_id.data, query_id_size);
 
-        int notification_num = info->si_value.sival_int;
+        /// This is unneeded (because we synchronize through pipe) but makes TSan happy.
+        data_ready_num.store(notification_num, std::memory_order_release);
+
         ssize_t res = ::write(notification_pipe.fds_rw[1], &notification_num, sizeof(notification_num));
 
         /// We cannot do anything if write failed.
         (void)res;
 
         errno = saved_errno;
+
+        signal_latch.store(false, std::memory_order_release);
     }
 
     /// Wait for data in pipe and read it.
@@ -132,7 +153,7 @@ StorageSystemStackTrace::StorageSystemStackTrace(const StorageID & table_id_)
     notification_pipe.open();
 
     /// Setup signal handler.
-
+    expected_pid = getpid();
     struct sigaction sa{};
     sa.sa_sigaction = signalHandler;
     sa.sa_flags = SA_SIGINFO;
@@ -179,7 +200,7 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte
         pid_t tid = parse<pid_t>(it->path().filename());
 
         sigval sig_value{};
-        sig_value.sival_int = sequence_num.load(std::memory_order_relaxed);
+        sig_value.sival_int = sequence_num.load(std::memory_order_acquire);
         if (0 != ::sigqueue(tid, sig, sig_value))
         {
             /// The thread may has been already finished.
@@ -191,15 +212,15 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte
 
         /// Just in case we will wait for pipe with timeout. In case signal didn't get processed.
 
-        if (wait(100))
+        if (wait(100) && sig_value.sival_int == data_ready_num.load(std::memory_order_acquire))
         {
-            size_t stack_trace_size = stack_trace->getSize();
-            size_t stack_trace_offset = stack_trace->getOffset();
+            size_t stack_trace_size = stack_trace.getSize();
+            size_t stack_trace_offset = stack_trace.getOffset();
 
             Array arr;
             arr.reserve(stack_trace_size - stack_trace_offset);
             for (size_t i = stack_trace_offset; i < stack_trace_size; ++i)
-                arr.emplace_back(reinterpret_cast<intptr_t>(stack_trace->getFramePointers()[i]));
+                arr.emplace_back(reinterpret_cast<intptr_t>(stack_trace.getFramePointers()[i]));
 
             res_columns[0]->insert(tid);
             res_columns[1]->insertData(query_id_data, query_id_size);
@@ -214,7 +235,11 @@ void StorageSystemStackTrace::fillData(MutableColumns & res_columns, const Conte
             res_columns[2]->insertDefault();
         }
 
-        ++sequence_num; /// FYI: For signed Integral types, arithmetic is defined to use two’s complement representation. There are no undefined results.
+        /// Signed integer overflow is undefined behavior in both C and C++. However, according to
+        /// C++ standard, Atomic signed integer arithmetic is defined to use two's complement; there
+        /// are no undefined results. See https://en.cppreference.com/w/cpp/atomic/atomic and
+        /// http://eel.is/c++draft/atomics.types.generic#atomics.types.int-8
+        ++sequence_num;
     }
 }
 
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 3ef09e5658f..132909438da 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -7,7 +7,7 @@ else ()
     include (${ClickHouse_SOURCE_DIR}/cmake/add_check.cmake)
 endif ()
 
-install (PROGRAMS clickhouse-test clickhouse-test-server DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+install (PROGRAMS clickhouse-test DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 install (
     DIRECTORY queries performance config
     DESTINATION ${CMAKE_INSTALL_DATAROOTDIR}/clickhouse-test
@@ -17,30 +17,6 @@ install (
     PATTERN ".gitignore" EXCLUDE
 )
 
-install (FILES server-test.xml DESTINATION  ${CLICKHOUSE_ETC_DIR}/clickhouse-server COMPONENT clickhouse)
-install (FILES client-test.xml DESTINATION  ${CLICKHOUSE_ETC_DIR}/clickhouse-client COMPONENT clickhouse)
-
-if (ENABLE_TESTS)
-    configure_file(${CMAKE_CURRENT_SOURCE_DIR}/CTestCustom.cmake ${ClickHouse_BINARY_DIR})
-
-    # maybe add --no-long ?
-    # if you want disable some tests: env TEST_OPT0='--skip compile'
-    if(TEST_CMAKE_PARALLEL)
-        # NUMBER_OF_LOGICAL_CORES
-        if (TEST_CMAKE_PARALLEL GREATER 1)
-            set(TOTAL_TESTS ${TEST_CMAKE_PARALLEL})
-        else()
-            set(TOTAL_TESTS ${NUMBER_OF_LOGICAL_CORES})
-        endif()
-        foreach(proc RANGE 1 ${TOTAL_TESTS})
-            add_test(NAME with_server${proc} COMMAND bash -c "env BUILD_DIR=${ClickHouse_BINARY_DIR} TEST_OPT0=--parallel=${proc}/${TOTAL_TESTS} ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse-test-server")
-        endforeach()
-    else()
-        add_test(NAME with_server COMMAND bash -c "env BUILD_DIR=${ClickHouse_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/clickhouse-test-server")
-    endif()
-
-endif ()
-
 if (ENABLE_TEST_INTEGRATION)
     add_subdirectory (integration)
 endif ()
diff --git a/tests/CTestCustom.cmake b/tests/CTestCustom.cmake
deleted file mode 100644
index 61bf7226506..00000000000
--- a/tests/CTestCustom.cmake
+++ /dev/null
@@ -1,5 +0,0 @@
-set(CTEST_CUSTOM_TESTS_IGNORE
-  example
-  example64
-  capnp-heavy-tests-run
-)
diff --git a/tests/ci/ci_config.json b/tests/ci/ci_config.json
index e599d170fb6..1efcf39601e 100644
--- a/tests/ci/ci_config.json
+++ b/tests/ci/ci_config.json
@@ -225,6 +225,18 @@
                 "with_coverage": false
             }
         },
+        "Functional stateless tests (ANTLR debug)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "debug",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
         "Functional stateful tests (release)": {
             "required_build_properties": {
                 "compiler": "clang-11",
@@ -417,6 +429,18 @@
                 "with_coverage": false
             }
         },
+        "Stress test (debug)": {
+            "required_build_properties": {
+                "compiler": "clang-11",
+                "package_type": "deb",
+                "build_type": "debug",
+                "sanitizer": "none",
+                "bundled": "bundled",
+                "splitted": "unsplitted",
+                "clang-tidy": "disable",
+                "with_coverage": false
+            }
+        },
         "Integration tests (asan)": {
             "required_build_properties": {
                 "compiler": "clang-11",
diff --git a/tests/clickhouse-client.xml b/tests/clickhouse-client.xml
index b6003ca2d09..b00c16f2c99 100644
--- a/tests/clickhouse-client.xml
+++ b/tests/clickhouse-client.xml
@@ -1,3 +1,3 @@
 <yandex>
     <insert_format_max_block_size>100000</insert_format_max_block_size>
-</yandex>
+</yandex>
\ No newline at end of file
diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 45cde00037f..94ad1a34de8 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -251,7 +251,7 @@ stop_time = None
 
 # def run_tests_array(all_tests, suite, suite_dir, suite_tmp_dir, run_total):
 def run_tests_array(all_tests_with_params):
-    all_tests, suite, suite_dir, suite_tmp_dir, run_total = all_tests_with_params
+    all_tests, suite, suite_dir, suite_tmp_dir = all_tests_with_params
     global exit_code
     global SERVER_DIED
     global stop_time
@@ -571,6 +571,8 @@ def main(args):
             Note: if you are using unbundled mode, you also have to specify -c option.")
 
     build_flags = collect_build_flags(args.client)
+    if args.antlr:
+        build_flags.append('antlr')
 
     if args.use_skip_list:
         tests_to_skip_from_list = collect_tests_to_skip(args.skip_list_path, build_flags)
@@ -688,7 +690,7 @@ def main(args):
                 prefix, suffix = item.split('_', 1)
 
                 try:
-                    return reverse * int(prefix), suffix
+                    return reverse * int(prefix)
                 except ValueError:
                     return 99997
 
@@ -698,6 +700,7 @@ def main(args):
                 all_tests = [t for t in all_tests if any([re.search(r, t) for r in args.test])]
             all_tests.sort(key=key_func)
 
+            jobs = args.jobs
             parallel_tests = []
             sequential_tests = []
             for test in all_tests:
@@ -706,35 +709,32 @@ def main(args):
                 else:
                     parallel_tests.append(test)
 
-            print("Found", len(parallel_tests), "parallel tests and", len(sequential_tests), "sequential tests")
-            run_n, run_total = args.parallel.split('/')
-            run_n = float(run_n)
-            run_total = float(run_total)
-            tests_n = len(parallel_tests)
-            if run_total > tests_n:
-                run_total = tests_n
-            if run_n > run_total:
-                continue
+            if jobs > 1 and len(parallel_tests) > 0:
+                print("Found", len(parallel_tests), "parallel tests and", len(sequential_tests), "sequential tests")
+                run_n, run_total = args.parallel.split('/')
+                run_n = float(run_n)
+                run_total = float(run_total)
+                tests_n = len(parallel_tests)
+                if run_total > tests_n:
+                    run_total = tests_n
 
-            jobs = args.jobs
-            if jobs > tests_n:
-                jobs = tests_n
-            if jobs > run_total:
-                run_total = jobs
+                if jobs > tests_n:
+                    jobs = tests_n
+                if jobs > run_total:
+                    run_total = jobs
 
-            batch_size = len(parallel_tests) // jobs
-            parallel_tests_array = []
-            for i in range(0, len(parallel_tests), batch_size):
-                parallel_tests_array.append((parallel_tests[i:i+batch_size], suite, suite_dir, suite_tmp_dir, run_total))
+                batch_size = len(parallel_tests) // jobs
+                parallel_tests_array = []
+                for i in range(0, len(parallel_tests), batch_size):
+                    parallel_tests_array.append((parallel_tests[i:i+batch_size], suite, suite_dir, suite_tmp_dir))
 
-            if jobs > 1:
                 with closing(multiprocessing.Pool(processes=jobs)) as pool:
                     pool.map(run_tests_array, parallel_tests_array)
 
-                run_tests_array((sequential_tests, suite, suite_dir, suite_tmp_dir, run_total))
+                run_tests_array((sequential_tests, suite, suite_dir, suite_tmp_dir))
                 total_tests_run += len(sequential_tests) + len(parallel_tests)
             else:
-                run_tests_array((all_tests, suite, suite_dir, suite_tmp_dir, run_total))
+                run_tests_array((all_tests, suite, suite_dir, suite_tmp_dir))
                 total_tests_run += len(all_tests)
 
     if args.hung_check:
@@ -886,6 +886,7 @@ if __name__ == '__main__':
     group=parser.add_mutually_exclusive_group(required=False)
     group.add_argument('--shard', action='store_true', default=None, dest='shard', help='Run sharding related tests (required to clickhouse-server listen 127.0.0.2 127.0.0.3)')
     group.add_argument('--no-shard', action='store_false', default=None, dest='shard', help='Do not run shard related tests')
+    group.add_argument('--antlr', action='store_true', default=False, dest='antlr', help='Use new ANTLR parser in tests')
 
     args = parser.parse_args()
 
@@ -965,6 +966,9 @@ if __name__ == '__main__':
 
         os.environ['CLICKHOUSE_URL_PARAMS'] += get_additional_client_options_url(args)
 
+    if args.antlr:
+        os.environ['CLICKHOUSE_CLIENT_OPT'] += ' --use_antlr_parser=1'
+
     if args.extract_from_config is None:
         if os.access(args.binary + '-extract-from-config', os.X_OK):
             args.extract_from_config = args.binary + '-extract-from-config'
diff --git a/tests/clickhouse-test-server b/tests/clickhouse-test-server
index 7195abbfde8..a6b6bd86dfa 100755
--- a/tests/clickhouse-test-server
+++ b/tests/clickhouse-test-server
@@ -163,4 +163,4 @@ else
 
     $CLICKHOUSE_CLIENT_QUERY "SELECT event, value FROM system.events; SELECT metric, value FROM system.metrics; SELECT metric, value FROM system.asynchronous_metrics;"
     $CLICKHOUSE_CLIENT_QUERY "SELECT 'Still alive'"
-fi
+fi
\ No newline at end of file
diff --git a/tests/client-test.xml b/tests/client-test.xml
index 936068a0923..ee84d41911f 100644
--- a/tests/client-test.xml
+++ b/tests/client-test.xml
@@ -1,4 +1,4 @@
-<!-- Config for connecting to test server -->
+<!-- Config for connecting to test server in Arcadia -->
 <yandex>
     <tcp_port>59000</tcp_port>
     <tcp_port_secure>59440</tcp_port_secure>
@@ -14,4 +14,4 @@
             </invalidCertificateHandler>
         </client>
     </openSSL>
-</yandex>
+</yandex>
\ No newline at end of file
diff --git a/tests/integration/test_adaptive_granularity_different_settings/test.py b/tests/integration/test_adaptive_granularity_different_settings/test.py
index 55b7e1c91b8..ce8f32b6ec6 100644
--- a/tests/integration/test_adaptive_granularity_different_settings/test.py
+++ b/tests/integration/test_adaptive_granularity_different_settings/test.py
@@ -51,31 +51,3 @@ def test_attach_detach(start_cluster):
 
     assert node1.query("SELECT COUNT() FROM  test") == "4\n"
     assert node2.query("SELECT COUNT() FROM  test") == "4\n"
-
-
-def test_mutate_with_mixed_granularity(start_cluster):
-    node3.query("""
-        CREATE TABLE test (date Date, key UInt64, value1 String, value2 String)
-        ENGINE = MergeTree
-        ORDER BY key PARTITION BY date""")
-
-    node3.query(
-        "INSERT INTO test SELECT toDate('2019-10-01') + number % 5, number, toString(number), toString(number * number) FROM numbers(500)")
-
-    assert node3.query("SELECT COUNT() FROM test") == "500\n"
-
-    node3.restart_with_latest_version()
-
-    assert node3.query("SELECT COUNT() FROM test") == "500\n"
-
-    node3.query("ALTER TABLE test MODIFY SETTING enable_mixed_granularity_parts = 1")
-
-    node3.query(
-        "INSERT INTO test SELECT toDate('2019-10-01') + number % 5, number, toString(number), toString(number * number) FROM numbers(500, 500)")
-
-    assert node3.query("SELECT COUNT() FROM test") == "1000\n"
-    assert node3.query("SELECT COUNT() FROM test WHERE key % 100 == 0") == "10\n"
-
-    node3.query("ALTER TABLE test DELETE WHERE key % 100 == 0", settings={"mutations_sync": "2"})
-
-    assert node3.query("SELECT COUNT() FROM test WHERE key % 100 == 0") == "0\n"
diff --git a/tests/integration/test_default_compression_codec/test.py b/tests/integration/test_default_compression_codec/test.py
index 116454cc31f..4d111db59ab 100644
--- a/tests/integration/test_default_compression_codec/test.py
+++ b/tests/integration/test_default_compression_codec/test.py
@@ -9,6 +9,7 @@ cluster = ClickHouseCluster(__file__)
 node1 = cluster.add_instance('node1', main_configs=['configs/default_compression.xml', 'configs/wide_parts_only.xml'], with_zookeeper=True)
 node2 = cluster.add_instance('node2', main_configs=['configs/default_compression.xml', 'configs/wide_parts_only.xml'], with_zookeeper=True)
 node3 = cluster.add_instance('node3', main_configs=['configs/default_compression.xml', 'configs/wide_parts_only.xml'], image='yandex/clickhouse-server', tag='20.3.16', stay_alive=True, with_installed_binary=True)
+node4 = cluster.add_instance('node4')
 
 @pytest.fixture(scope="module")
 def start_cluster():
@@ -228,3 +229,28 @@ def test_default_codec_version_update(start_cluster):
         "SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '2_2_2_1'") == "LZ4HC(5)\n"
     assert node3.query(
         "SELECT default_compression_codec FROM system.parts WHERE table = 'compression_table' and name = '3_3_3_1'") == "LZ4\n"
+
+def test_default_codec_for_compact_parts(start_cluster):
+    node4.query("""
+    CREATE TABLE compact_parts_table (
+        key UInt64,
+        data String
+    )
+    ENGINE MergeTree ORDER BY tuple()
+    """)
+
+    node4.query("INSERT INTO compact_parts_table VALUES (1, 'Hello world')")
+    assert node4.query("SELECT COUNT() FROM compact_parts_table") == "1\n"
+
+    node4.query("ALTER TABLE compact_parts_table DETACH PART 'all_1_1_0'")
+
+    node4.exec_in_container(["bash", "-c", "rm /var/lib/clickhouse/data/default/compact_parts_table/detached/all_1_1_0/default_compression_codec.txt"])
+
+    node4.query("ALTER TABLE compact_parts_table ATTACH PART 'all_1_1_0'")
+
+    assert node4.query("SELECT COUNT() FROM compact_parts_table") == "1\n"
+
+    node4.query("DETACH TABLE compact_parts_table")
+    node4.query("ATTACH TABLE compact_parts_table")
+
+    assert node4.query("SELECT COUNT() FROM compact_parts_table") == "1\n"
diff --git a/tests/performance/ColumnMap.xml b/tests/performance/ColumnMap.xml
new file mode 100644
index 00000000000..96f2026f870
--- /dev/null
+++ b/tests/performance/ColumnMap.xml
@@ -0,0 +1,36 @@
+<test>
+
+    <settings>
+        <allow_experimental_map_type>1</allow_experimental_map_type>
+    </settings>
+
+    <substitutions>
+        <substitution>
+            <name>key_suffix</name>
+            <values>
+                <value>''</value>
+                <value>'-miss'</value>
+            </values>
+        </substitution>
+    </substitutions>
+
+    <create_query>
+CREATE TABLE column_map_test
+ENGINE = MergeTree ORDER BY number
+AS
+SELECT number, map
+FROM
+(
+    SELECT
+        number,
+        arrayMap(x -> toString(x), range(100)) AS keys,
+        arrayMap(x -> toString(x * x), range(100)) AS values,
+        cast((keys, values), 'Map(String, String)') AS map
+    FROM numbers(10000)
+)
+    </create_query>
+
+    <query>SELECT count() FROM column_map_test WHERE NOT ignore(arrayMap(x -> map[CONCAT(toString(x), {key_suffix})], range(0, 100, 10)))</query>
+
+    <drop_query>DROP TABLE IF EXISTS column_map_test</drop_query>
+</test>
diff --git a/tests/performance/async_remote_read.xml b/tests/performance/async_remote_read.xml
new file mode 100644
index 00000000000..7f0ee6473ab
--- /dev/null
+++ b/tests/performance/async_remote_read.xml
@@ -0,0 +1,11 @@
+<test>
+    <query>
+        SELECT sum(x)
+        FROM
+        (
+            SELECT sipHash64(sipHash64(sipHash64(number))) AS x
+            FROM remote('127.0.0.{{2,3,4,5}}', numbers(10000000))
+        )
+        SETTINGS max_threads = 2, max_distributed_connections = 2
+    </query>
+</test>
diff --git a/tests/performance/constant_column_search.xml b/tests/performance/constant_column_search.xml
index 94b41fbf907..cb76fd4cefb 100644
--- a/tests/performance/constant_column_search.xml
+++ b/tests/performance/constant_column_search.xml
@@ -43,7 +43,7 @@
 
     <query><![CDATA[SELECT DISTINCT Title, multiFuzzyMatchAny(Title, 2, ['^metrika\\.ry$']) AS distance FROM hits_100m_single WHERE distance = 1]]></query>
     <query><![CDATA[SELECT DISTINCT Title, multiFuzzyMatchAny(Title, 5, ['^metrika\\.ry$']) AS distance FROM hits_100m_single WHERE distance = 1]]></query>
-    <query><![CDATA[SELECT sum(multiFuzzyMatchAny(Title, 3, ['hello$', 'world$', '^hello'])) FROM hits_100m_single]]></query>
+    <query><![CDATA[SELECT sum(multiFuzzyMatchAny(Title, 3, ['hello$', 'world$', '^hello'])) FROM (SELECT * FROM hits_100m_single LIMIT 50000000) FORMAT Null]]></query>
     <query><![CDATA[SELECT count() FROM hits_10m_single WHERE multiFuzzyMatchAny(URL, 2, ['about/address', 'for_woman', '^https?://lm-company.ruy/$', 'ultimateguitar.com'])]]></query>
 
 
diff --git a/tests/performance/countMatches.xml b/tests/performance/countMatches.xml
index 72bd2b132c6..96905ba308f 100644
--- a/tests/performance/countMatches.xml
+++ b/tests/performance/countMatches.xml
@@ -9,14 +9,14 @@
 
     <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, 'yandex')) SETTINGS max_threads=2</query>
     <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, 'yandex|google')) SETTINGS max_threads=2</query>
-    <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, '(\\w+=\\w+)')) SETTINGS max_threads=2</query>
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, '(\\w+=\\w+)')) SETTINGS max_threads=4</query>
 
     <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatchesCaseInsensitive(URL, 'yandex')) SETTINGS max_threads=2</query>
     <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatchesCaseInsensitive(URL, 'yandex|google')) SETTINGS max_threads=2</query>
-    <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatchesCaseInsensitive(URL, '(\\w+=\\w+)')) SETTINGS max_threads=2</query>
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatchesCaseInsensitive(URL, '(\\w+=\\w+)')) SETTINGS max_threads=4</query>
 
     <!-- Another variant of case-insensitivity -->
     <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, '(?i)yandex')) SETTINGS max_threads=2</query>
     <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, '(?i)yandex|google')) SETTINGS max_threads=2</query>
-    <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, '(?i)(\\w+=\\w+)')) SETTINGS max_threads=2</query>
+    <query>SELECT count() FROM test.hits WHERE NOT ignore(countMatches(URL, '(?i)(\\w+=\\w+)')) SETTINGS max_threads=4</query>
 </test>
diff --git a/tests/performance/general_purpose_hashes.xml b/tests/performance/general_purpose_hashes.xml
index e37e1c34496..c54e1c2fb02 100644
--- a/tests/performance/general_purpose_hashes.xml
+++ b/tests/performance/general_purpose_hashes.xml
@@ -35,14 +35,14 @@
            <name>table_slow</name>
            <values>
                <value>zeros(1000000)</value>
-               <value>zeros_mt(10000000)</value>
+               <value>zeros_mt(5000000)</value>
            </values>
         </substitution>
         <substitution>
            <name>table</name>
            <values>
                <value>numbers(100000000)</value>
-               <value>numbers_mt(1000000000)</value>
+               <value>numbers_mt(500000000)</value>
            </values>
         </substitution>
     </substitutions>
diff --git a/tests/performance/generate_table_function.xml b/tests/performance/generate_table_function.xml
index 0628e120966..bc49a7de1bd 100644
--- a/tests/performance/generate_table_function.xml
+++ b/tests/performance/generate_table_function.xml
@@ -11,7 +11,7 @@
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Int64)', 0, 10, 10) LIMIT 1000000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Int8)', 0, 10, 10) LIMIT 100000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(Nullable(Int32))', 0, 10, 10) LIMIT 100000000);</query>
-    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 0, 10, 10) LIMIT 1000000000);</query>
+    <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Tuple(Int32, Array(Int64))', 0, 10, 10) LIMIT 500000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Nullable(String)', 0, 10, 10) LIMIT 100000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i Array(String)', 0, 10, 10) LIMIT 100000000);</query>
     <query>SELECT sum(NOT ignore(*)) FROM (SELECT * FROM generateRandom('i UUID', 0, 10, 10) LIMIT 1000000000);</query>
diff --git a/tests/performance/push_down_limit.xml b/tests/performance/push_down_limit.xml
index 0611216410d..7e51d7d0763 100644
--- a/tests/performance/push_down_limit.xml
+++ b/tests/performance/push_down_limit.xml
@@ -1,7 +1,8 @@
 <test>
     <create_query>CREATE VIEW numbers_view AS SELECT number from numbers_mt(100000000) order by number desc</create_query>
 
-    <query>select number from (select number from numbers(1500000000) order by -number) limit 10</query>
+    <!-- This must have been an EXPLAIN test. -->
+    <query>select number from (select number from numbers(500000000) order by -number) limit 10</query>
     <query>select number from (select number from numbers_mt(1500000000) order by -number) limit 10</query>
 
     <query short="1">select number from numbers_view limit 100</query>
diff --git a/tests/performance/string_sort.xml b/tests/performance/string_sort.xml
index 82a91892af4..23ec8cf3144 100644
--- a/tests/performance/string_sort.xml
+++ b/tests/performance/string_sort.xml
@@ -35,7 +35,7 @@
     <query><![CDATA[SELECT {str1} FROM hits_100m_single ORDER BY {str1} LIMIT 2000 format Null]]></query>
     <query><![CDATA[SELECT {str1} FROM hits_100m_single ORDER BY {str1} LIMIT 5000 format Null]]></query>
     <query><![CDATA[SELECT {str1} FROM hits_100m_single ORDER BY {str1} LIMIT 10000 format Null]]></query>
-    <query><![CDATA[SELECT {str1} FROM hits_100m_single ORDER BY {str1} LIMIT 65535 format Null]]></query>
+    <query><![CDATA[SELECT {str1} FROM hits_10m_single ORDER BY {str1} LIMIT 65535 format Null]]></query>
 
     <query><![CDATA[SELECT {str1} FROM hits_100m_single ORDER BY {str1}, CounterID LIMIT 10 format Null]]></query>
     <query><![CDATA[SELECT {str1} FROM hits_100m_single ORDER BY {str1}, CounterID LIMIT 300 format Null]]></query>
diff --git a/tests/performance/url_hits.xml b/tests/performance/url_hits.xml
index 072fb5b94e7..f0ad6a786e0 100644
--- a/tests/performance/url_hits.xml
+++ b/tests/performance/url_hits.xml
@@ -1,13 +1,22 @@
 <test>
     <preconditions>
         <table_exists>hits_100m_single</table_exists>
+        <table_exists>hits_10m_single</table_exists>
         <table_exists>test.hits</table_exists>
     </preconditions>
 
 
     <substitutions>
         <substitution>
-           <name>func</name>
+           <name>func_slow</name>
+           <values>
+               <value>URLHierarchy</value>
+               <value>URLPathHierarchy</value>
+           </values>
+       </substitution>
+
+        <substitution>
+           <name>func_fast</name>
            <values>
                <value>protocol</value>
                <value>domain</value>
@@ -22,8 +31,6 @@
                <value>queryStringAndFragment</value>
                <value>extractURLParameters</value>
                <value>extractURLParameterNames</value>
-               <value>URLHierarchy</value>
-               <value>URLPathHierarchy</value>
                <value>decodeURLComponent</value>
                <value>cutWWW</value>
                <value>cutQueryString</value>
@@ -31,7 +38,9 @@
            </values>
        </substitution>
     </substitutions>
-    <query>SELECT count() FROM hits_100m_single WHERE NOT ignore({func}(URL))</query>
+
+    <query>SELECT count() FROM hits_100m_single WHERE NOT ignore({func_fast}(URL))</query>
+    <query>SELECT count() FROM hits_10m_single  WHERE NOT ignore({func_slow}(URL))</query>
 
     <!-- firstSignificantSubdomain/firstSignificantSubdomainCustom -->
     <query>SELECT count() FROM test.hits WHERE NOT ignore(firstSignificantSubdomain(URL)) SETTINGS max_threads=1</query>
diff --git a/tests/performance/website.xml b/tests/performance/website.xml
index 9e7c8cdc536..ae497295ddb 100644
--- a/tests/performance/website.xml
+++ b/tests/performance/website.xml
@@ -36,7 +36,7 @@
 <query>SELECT SearchEngineID, SearchPhrase, count() AS c FROM {table} WHERE SearchPhrase != '' GROUP BY SearchEngineID, SearchPhrase ORDER BY c DESC LIMIT 10</query>
 <query>SELECT UserID, count() FROM {table} GROUP BY UserID ORDER BY count() DESC LIMIT 10</query>
 <query>SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
-<query>SELECT UserID, SearchPhrase, count() FROM {table} GROUP BY UserID, SearchPhrase LIMIT 10</query>
+<query>SELECT UserID, SearchPhrase, count() FROM hits_10m_single GROUP BY UserID, SearchPhrase LIMIT 10</query>
 <query>SELECT UserID, toMinute(EventTime) AS m, SearchPhrase, count() FROM hits_10m_single GROUP BY UserID, m, SearchPhrase ORDER BY count() DESC LIMIT 10</query>
 <query short="1">SELECT count() FROM hits_100m_single WHERE UserID = 12345678901234567890</query>
 <query>SELECT count() FROM hits_100m_single WHERE URL LIKE '%metrika%'</query>
diff --git a/tests/queries/0_stateless/00900_parquet_create_table_columns.pl b/tests/queries/0_stateless/00900_parquet_create_table_columns.pl
deleted file mode 100755
index baba1f63aee..00000000000
--- a/tests/queries/0_stateless/00900_parquet_create_table_columns.pl
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/usr/bin/env perl
-package parquet_create_table_columns;
-use strict;
-no warnings 'experimental';
-use feature 'signatures';
-use JSON::XS;
-#use Data::Dumper;
-
-sub file_read($file) {
-    open my $f, '<', $file or return;
-    local $/ = undef;
-    my $ret = <$f>;
-    close $f;
-    return $ret;
-}
-
-our $type_parquet_logical_to_clickhouse = {
-    DECIMAL    => 'Decimal128(1)',
-    TIMESTAMP_MICROS => 'DateTime',
-    TIMESTAMP_MILLIS => 'DateTime',
-};
-our $type_parquet_physical_to_clickhouse = {
-    BOOLEAN    => 'UInt8',
-    INT32      => 'Int32',
-    INT64      => 'Int64',
-    FLOAT      => 'Float32',
-    DOUBLE     => 'Float64',
-    BYTE_ARRAY => 'String',
-    FIXED_LEN_BYTE_ARRAY => 'String', # Maybe FixedString?
-    INT96      => 'Int64',     # TODO!
-};
-
-sub columns ($json) {
-    my @list;
-    my %uniq;
-    for my $column (@{$json->{Columns}}) {
-        #warn Data::Dumper::Dumper $column;
-        my $name = $column->{'Name'};
-        my $type = $type_parquet_logical_to_clickhouse->{$column->{'LogicalType'}} || $type_parquet_physical_to_clickhouse->{$column->{'PhysicalType'}};
-        unless ($type) {
-            warn "Unknown type [$column->{'PhysicalType'}:$column->{'LogicalType'}] of column [$name]";
-        }
-        $type = "Nullable($type)";
-        $name .= $column->{'Id'} if $uniq{$name}++; # Names can be non-unique
-        push @list, {name => $name, type => $type};
-    }
-    print join ', ', map {"`$_->{name}` $_->{type}"} @list;
-}
-
-sub columns_file ($file) {
-    return columns(JSON::XS::decode_json(file_read($file)));
-}
-
-columns_file(shift) unless caller;
diff --git a/tests/queries/0_stateless/00900_parquet_load.reference b/tests/queries/0_stateless/00900_parquet_load.reference
index 6cd2b1cf462..1c890119486 100644
--- a/tests/queries/0_stateless/00900_parquet_load.reference
+++ b/tests/queries/0_stateless/00900_parquet_load.reference
@@ -13,134 +13,220 @@
 === Try load data from alltypes_plain.snappy.parquet
 6	1	0	0	0	0	0	0	04/01/09	0	1238544000
 7	0	1	1	1	10	1.1	10.1	04/01/09	1	1238544060
+=== Try load data from binary.parquet
+\0
+
+
+
+
+
+
+
+\b
+\t
+\n
+
 === Try load data from byte_array_decimal.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from datapage_v2.snappy.parquet
-Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin
+Code: 33. DB::ParsingEx---tion: Error while reading Parquet data: IOError: Not yet implemented: Unsupported encoding.: data for INSERT was parsed from stdin
 
+=== Try load data from dict-page-offset-zero.parquet
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+1552
+=== Try load data from fixed_length_decimal.parquet
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from fixed_length_decimal_1.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from fixed_length_decimal_legacy.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
+=== Try load data from hadoop_lz4_compressed.parquet
+1593604800	abc	42
+1593604800	def	7.7
+1593604801	abc	42.125
+1593604801	def	7.7
 === Try load data from int32_decimal.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
 === Try load data from int64_decimal.parquet
-1.0
-2.0
-3.0
-4.0
-5.0
-6.0
-7.0
-8.0
-9.0
-10.0
-11.0
-12.0
-13.0
-14.0
-15.0
-16.0
-17.0
-18.0
-19.0
-20.0
-21.0
-22.0
-23.0
-24.0
+1.00
+2.00
+3.00
+4.00
+5.00
+6.00
+7.00
+8.00
+9.00
+10.00
+11.00
+12.00
+13.00
+14.00
+15.00
+16.00
+17.00
+18.00
+19.00
+20.00
+21.00
+22.00
+23.00
+24.00
+=== Try load data from list_columns.parquet
+Code: 70. DB::Ex---tion: The type "list" of an input column "int64_list" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin
+
 === Try load data from nation.dict-malformed.parquet
 0	ALGERIA	0	 haggle. carefully final deposits detect slyly agai
 1	ARGENTINA	1	al foxes promise slyly according to the regular accounts. bold requests alon
@@ -168,23 +254,25 @@ Code: 33. DB::Ex---tion: Error while reading Parquet data: IOError: Not yet impl
 23	UNITED KINGDOM	3	eans boost carefully special requests. accounts are. carefull
 24	UNITED STATES	1	y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be
 === Try load data from nested_lists.snappy.parquet
-Code: 8. DB::Ex---tion: Column "element" is not presented in input data: data for INSERT was parsed from stdin
+Code: 70. DB::Ex---tion: The type "list" of an input column "a" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin
 
 === Try load data from nested_maps.snappy.parquet
-Code: 33. DB::Ex---tion: Error while reading Parquet data: NotImplemented: Reading lists of structs from Parquet files not yet supported: key_value: list<key_value: struct<key: string not null, value: struct<key_value: list<key_value: struct<key: int32 not null, value: bool not null> not null> not null>> not null> not null: data for INSERT was parsed from stdin
+Code: 70. DB::Ex---tion: The type "map" of an input column "a" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin
 
+=== Try load data from non_hadoop_lz4_compressed.parquet
+1593604800	abc	42
+1593604800	def	7.7
+1593604801	abc	42.125
+1593604801	def	7.7
 === Try load data from nonnullable.impala.parquet
-Code: 8. DB::Ex---tion: Column "element" is not presented in input data: data for INSERT was parsed from stdin
-
+../contrib/arrow/cpp/src/arrow/array/array_nested.cc:192:  Check failed: (self->list_type_->value_type()->id()) == (data->child_data[0]->type->id()) 
 === Try load data from nullable.impala.parquet
-Code: 8. DB::Ex---tion: Column "element" is not presented in input data: data for INSERT was parsed from stdin
-
+../contrib/arrow/cpp/src/arrow/array/array_nested.cc:192:  Check failed: (self->list_type_->value_type()->id()) == (data->child_data[0]->type->id()) 
 === Try load data from nulls.snappy.parquet
-Code: 8. DB::Ex---tion: Column "b_c_int" is not presented in input data: data for INSERT was parsed from stdin
-
-=== Try load data from repeated_no_annotation.parquet
-Code: 8. DB::Ex---tion: Column "number" is not presented in input data: data for INSERT was parsed from stdin
+Code: 70. DB::Ex---tion: The type "struct" of an input column "b_struct" is not supported for conversion from a Parquet data format: data for INSERT was parsed from stdin
 
+=== Try load data from single_nan.parquet
+\N
 === Try load data from userdata1.parquet
 1454486129	1	Amanda	Jordan	ajordan0@com.com	Female	1.197.201.2	6759521864920116	Indonesia	3/8/1971	49756.53	Internal Auditor	1E+02
 1454519043	2	Albert	Freeman	afreeman1@is.gd	Male	218.111.175.34		Canada	1/16/1968	150280.17	Accountant IV	
diff --git a/tests/queries/0_stateless/00900_parquet_load.sh b/tests/queries/0_stateless/00900_parquet_load.sh
index 346fa4f915c..43b738aab83 100755
--- a/tests/queries/0_stateless/00900_parquet_load.sh
+++ b/tests/queries/0_stateless/00900_parquet_load.sh
@@ -5,8 +5,6 @@
 # TODO: Add more files.
 #
 
-# To regenerate data install perl JSON::XS module: sudo apt install libjson-xs-perl
-
 # Also 5 sample files from
 # wget https://github.com/Teradata/kylo/raw/master/samples/sample-data/parquet/userdata1.parquet
 # ...
@@ -19,38 +17,46 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 . "$CUR_DIR"/../shell_config.sh
 
 CB_DIR=$(dirname "$CLICKHOUSE_CLIENT_BINARY")
-[ "$CB_DIR" == "." ] && ROOT_DIR=$CUR_DIR/../../../..
-[ "$CB_DIR" != "." ] && BUILD_DIR=$CB_DIR/../..
-[ -z "$ROOT_DIR" ] && ROOT_DIR=$CB_DIR/../../..
+[ "$CB_DIR" == "." ] && ROOT_DIR=$CUR_DIR/../../..
+[ -z "$ROOT_DIR" ] && ROOT_DIR=$CB_DIR/../..
 
 DATA_DIR=$CUR_DIR/data_parquet
 
+[ -n "$ROOT_DIR" ] && [ -z "$PARQUET_READER" ] && PARQUET_READER="$ROOT_DIR"/contrib/arrow/cpp/build/release/parquet-reader
+
 # To update:
 # cp $ROOT_DIR/contrib/arrow/cpp/submodules/parquet-testing/data/*.parquet $ROOT_DIR/contrib/arrow/python/pyarrow/tests/data/parquet/*.parquet $CUR_DIR/data_parquet/
 
-# BUG! nulls.snappy.parquet - parquet-reader shows wrong structure. Actual structure is {"type":"struct","fields":[{"name":"b_struct","type":{"type":"struct","fields":[{"name":"b_c_int","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]}
-# why? repeated_no_annotation.parquet
+# ClickHouse Parquet reader doesn't support such complex types, so I didn't burrow into the issue.
+# There is failure due parsing nested arrays or nested maps with NULLs:
+# ../contrib/arrow/cpp/src/arrow/array/array_nested.cc:192:  Check failed: (self->list_type_->value_type()->id()) == (data->child_data[0]->type->id())
 
-for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | sort); do
+# Strange behaviour for repeated_no_annotation.parquet around __buitin_expect, so this file was disabled:
+# debug:
+#   ../contrib/arrow/cpp/src/arrow/array/array_nested.cc:193:  Check failed: self->list_type_->value_type()->Equals(data->child_data[0]->type)
+# release:
+#   Code: 349. DB::Ex---tion: Can not insert NULL data into non-nullable column "phoneNumbers": data for INSERT was parsed from stdin
+
+for NAME in $(find "$DATA_DIR"/*.parquet -print0 | xargs -0 -n 1 basename | LC_ALL=C sort); do
     echo === Try load data from "$NAME"
 
     JSON=$DATA_DIR/$NAME.json
     COLUMNS_FILE=$DATA_DIR/$NAME.columns
 
     # If you want change or add .parquet file - rm data_parquet/*.json data_parquet/*.columns
-    [ -n "$BUILD_DIR" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$BUILD_DIR"/contrib/arrow-cmake/parquet-reader --json "$DATA_DIR"/"$NAME" > "$JSON"
-    [ -n "$BUILD_DIR" ] && [ ! -s "$COLUMNS_FILE" ] && "$CUR_DIR"/00900_parquet_create_table_columns.pl "$JSON" > "$COLUMNS_FILE"
+    [ -n "$PARQUET_READER" ] && [ ! -s "$COLUMNS_FILE" ] && [ ! -s "$JSON" ] && "$PARQUET_READER" --json "$DATA_DIR"/"$NAME" > "$JSON"
+    [ ! -s "$COLUMNS_FILE" ] && "$CUR_DIR"/helpers/00900_parquet_create_table_columns.py "$JSON" > "$COLUMNS_FILE"
 
     # Debug only:
-    # [ -n "$BUILD_DIR" ] && $BUILD_DIR/contrib/arrow-cmake/parquet-reader $DATA_DIR/$NAME > $DATA_DIR/$NAME.dump
+    # [ -n "$PARQUET_READER" ] && $PARQUET_READER $DATA_DIR/$NAME > $DATA_DIR/$NAME.dump
 
-    #COLUMNS=`$CUR_DIR/00900_parquet_create_table_columns.pl $JSON` 2>&1 || continue
+    # COLUMNS=`$CUR_DIR/00900_parquet_create_table_columns.py $JSON` 2>&1 || continue
     COLUMNS=$(cat "$COLUMNS_FILE") || continue
 
     ${CLICKHOUSE_CLIENT} --query="DROP TABLE IF EXISTS parquet_load"
     ${CLICKHOUSE_CLIENT} --query="CREATE TABLE parquet_load ($COLUMNS) ENGINE = Memory"
 
-    # Some files is broken, exception is ok.
+    # Some files contain unsupported data structures, exception is ok.
     cat "$DATA_DIR"/"$NAME" | ${CLICKHOUSE_CLIENT} --query="INSERT INTO parquet_load FORMAT Parquet" 2>&1 | sed 's/Exception/Ex---tion/'
 
     ${CLICKHOUSE_CLIENT} --query="SELECT * FROM parquet_load LIMIT 100"
diff --git a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference
index 797f208c02b..7a08495654c 100644
--- a/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference
+++ b/tests/queries/0_stateless/00933_test_fix_extra_seek_on_compressed_cache.reference
@@ -1 +1 @@
-0	36	14
+0	36	13
diff --git a/tests/queries/0_stateless/00955_test_final_mark.sql b/tests/queries/0_stateless/00955_test_final_mark.sql
index 50ca3d008f9..e020f10b71a 100644
--- a/tests/queries/0_stateless/00955_test_final_mark.sql
+++ b/tests/queries/0_stateless/00955_test_final_mark.sql
@@ -18,7 +18,7 @@ INSERT INTO mt_with_pk (d, x, y, z, `n.Age`, `n.Name`) VALUES (toDate('2018-10-0
 
 SELECT COUNT(*) FROM mt_with_pk WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1 AND database = currentDatabase();
 
 SELECT '===test merge===';
 INSERT INTO mt_with_pk (d, x, y, z, `n.Age`, `n.Name`) VALUES (toDate('2018-10-01'), toDateTime('2018-10-01 07:57:57'), [4, 4, 4], 14, [111, 222], ['Lui', 'Dave']), (toDate('2018-10-01'), toDateTime('2018-10-01 08:57:57'), [5, 5, 5], 15, [333, 444], ['John', 'Mike']), (toDate('2018-10-01'), toDateTime('2018-10-01 09:57:57'), [6, 6, 6], 16, [555, 666, 777], ['Alex', 'Jim', 'Tom']);
@@ -27,7 +27,7 @@ OPTIMIZE TABLE mt_with_pk FINAL;
 
 SELECT COUNT(*) FROM mt_with_pk WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1 AND database = currentDatabase();
 
 SELECT '===test alter===';
 ALTER TABLE mt_with_pk MODIFY COLUMN y Array(String);
@@ -38,7 +38,7 @@ OPTIMIZE TABLE mt_with_pk FINAL;
 
 SELECT COUNT(*) FROM mt_with_pk WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1 AND database = currentDatabase();
 
 SELECT '===test mutation===';
 ALTER TABLE mt_with_pk UPDATE w = 0 WHERE 1 SETTINGS mutations_sync = 2;
@@ -58,7 +58,7 @@ OPTIMIZE TABLE mt_with_pk FINAL;
 
 SELECT COUNT(*) FROM mt_with_pk WHERE z + w > 5000;
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_pk' AND database = currentDatabase() AND active=1 AND database = currentDatabase();
 
 DROP TABLE IF EXISTS mt_with_pk;
 
@@ -119,7 +119,7 @@ INSERT INTO mt_without_pk (d, x, y, z, `n.Age`, `n.Name`) VALUES (toDate('2018-1
 
 SELECT COUNT(*) FROM mt_without_pk WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_without_pk' AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_without_pk' AND active=1 AND database = currentDatabase();
 
 INSERT INTO mt_without_pk (d, x, y, z, `n.Age`, `n.Name`) VALUES (toDate('2018-10-01'), toDateTime('2018-10-01 07:57:57'), [4, 4, 4], 14, [111, 222], ['Lui', 'Dave']), (toDate('2018-10-01'), toDateTime('2018-10-01 08:57:57'), [5, 5, 5], 15, [333, 444], ['John', 'Mike']), (toDate('2018-10-01'), toDateTime('2018-10-01 09:57:57'), [6, 6, 6], 16, [555, 666, 777], ['Alex', 'Jim', 'Tom']);
 
@@ -127,7 +127,7 @@ OPTIMIZE TABLE mt_without_pk FINAL;
 
 SELECT COUNT(*) FROM mt_without_pk WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_without_pk' AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_without_pk' AND active=1 AND database = currentDatabase();
 
 DROP TABLE IF EXISTS mt_without_pk;
 
@@ -149,7 +149,7 @@ INSERT INTO mt_with_small_granularity (d, x, y, z, `n.Age`, `n.Name`) VALUES (to
 
 SELECT COUNT(*) FROM mt_with_small_granularity WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_small_granularity' AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_small_granularity' AND active=1 AND database = currentDatabase();
 
 INSERT INTO mt_with_small_granularity (d, x, y, z, `n.Age`, `n.Name`) VALUES (toDate('2018-10-01'), toDateTime('2018-10-01 07:57:57'), [4, 4, 4], 14, [111, 222], ['Lui', 'Dave']), (toDate('2018-10-01'), toDateTime('2018-10-01 08:57:57'), [5, 5, 5], 15, [333, 444], ['John', 'Mike']), (toDate('2018-10-01'), toDateTime('2018-10-01 09:57:57'), [6, 6, 6], 16, [555, 666, 777], ['Alex', 'Jim', 'Tom']);
 
@@ -157,6 +157,6 @@ OPTIMIZE TABLE mt_with_small_granularity FINAL;
 
 SELECT COUNT(*) FROM mt_with_small_granularity WHERE x > toDateTime('2018-10-01 23:57:57');
 
-SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_small_granularity' AND active=1;
+SELECT sum(marks) FROM system.parts WHERE table = 'mt_with_small_granularity' AND active=1 AND database = currentDatabase();
 
 DROP TABLE IF EXISTS mt_with_small_granularity;
diff --git a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference
index 099fe566817..186f2feab79 100644
--- a/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference
+++ b/tests/queries/0_stateless/00961_checksums_in_system_parts_columns_table.reference
@@ -1 +1 @@
-20000101_1_1_0	test_00961	5f2e2d4bbc14336f44037e3ac667f247	ed226557cd4e18ecf3ae06c6d5e6725c	da96ff1e527a8a1f908ddf2b1d0af239
+20000101_1_1_0	test_00961	b5fce9c4ef1ca42ce4ed027389c208d2	fc3b062b646cd23d4c23d7f5920f89ae	da96ff1e527a8a1f908ddf2b1d0af239
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference
new file mode 100644
index 00000000000..edd5afc1af7
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.reference
@@ -0,0 +1,144 @@
+0
+2718169299
+2718169299
+1315333491
+1099965843
+5746351769509927967
+5746351769509927967
+8347269581771603092
+6041373934059725027
+(17178276249054052155,8864230932371215121)
+(14133097226001036899,7985237721476952807)
+(14133097226001036899,7985237721476952807)
+(4661257206578284012,15229878657590021759)
+(3087743741749030713,11631667950302077749)
+(11923981719512934676,1193672187225825732)
+(11923981719512934676,1193672187225825732)
+(17970606678134635272,3825545538448404526)
+(9422952829151664974,568010773615758889)
+2548869326
+2548869326
+401385678
+401385710
+2652202579
+2652235347
+2984455347
+2984488115
+12804820948382413807
+12804820948919350245
+11651601468065149391
+11651600368014488527
+18377198011227067677
+18233505035951822655
+5501050600367972694
+5501050600367972692
+(8590465925632898311,12699049311112305995)
+(8590465925632898311,15828587343885202011)
+(8590465925632898311,15824051019631343049)
+(8590465925632898311,12699049311222825283)
+(217966158370437743,14452995911556652133)
+(217966158370437743,14452995911556652133)
+(2170210914777151141,5341809779339553313)
+(12469866236432988845,5341809779339553313)
+(12271157076799061825,5514511977572226426)
+(11639913962681153226,2767634094725305612)
+(12271157075024394466,17994666970078080114)
+(12271157077109587702,13572452308677868240)
+(6252006845407214340,13538761942960976531)
+(13795977174459370328,6392395597500134035)
+(16118993428517222971,13602445809406467)
+(16118993428517222971,13602445809406467)
+uniqExact	6
+ngramSimhash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	938403918
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	904817231
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	904849486
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	938469966
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	938404430
+ngramSimhashCaseInsensitive
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	938453071
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	938453599
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	938404430
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	636382047
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	938388046
+ngramSimhashUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2400625214
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2669060670
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2671174174
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2669060798
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	2635506238
+ngramSimhashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2984307934
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2967514366
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2715855070
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2967529694
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2984290526
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2984306910
+wordShingleSimhash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2384813566025024242
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2393820766427040734
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2421405261516400471
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2384883934767174398
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2384813567165864670
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2384813567098766070
+wordShingleSimhashCaseInsensitive
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	11635224793909957342
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	11617192803208139478
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	11617192803208151794
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	11617192803208151766
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3006891407629799254
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	11617263171950236406
+wordShingleSimhashUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	9097818277104946605
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	9084246141658271116
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	9084247241171471628
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	9088752215857929613
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	9093255814816009484
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	9084247481822285196
+wordShingleSimhashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	14788772559981154978
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	14497164445320454820
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	14500537785782895266
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	14787646625647636642
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	14500016612976573090
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	14787956717160870888
+ngramMinhash
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(15568933215262012353,16287411738807860353)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(9473589826959436958,14264235017873782379)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(261441656340606110,13387826928927239258)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(261441656340606110,3305790294064680121)
+ngramMinhashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(15568933215262012353,16287411738807860353)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(9473589826959436958,14264235017873782379)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3051755284325985438,3305790294064680121)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3051755284325985438,13387826928927239258)
+ngramMinhashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	4	(309830857064065611,7476109060377919216)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(309830856946430871,7521913981442105351)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(309830857559697399,7476109060377919216)
+ngramMinhashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13010809262502929096,2266175201446733829)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	5	(16827851229372179144,976408052548769549)
+wordShingleMinhash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(14343822344862533053,11776483993821900250)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(18417749332128868312,11776483993821900250)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(18417749329907528200,14156831980621923226)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(4600092690178227586,11776483993821900250)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(12998011837685887081,1565093152297016105)
+wordShingleMinhashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(12998011837880940480,1565093152297016105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(1100751419997894255,15225006848401474458)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(1100751419777226283,12993805708561478711)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(1260401089202135898,12993805709529540523)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(1638964264353944555,12993805708561478711)
+wordShingleMinhashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(742280067319112377,14237963017046410351)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(7237654052534217600,14400297883226437452)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(742280067319112377,17574811665615962276)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(3458625375707825328,17574811665615962276)
+wordShingleMinhashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7032848390598450936,5104668712725998486)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(15582670464629505464,13034678298246801511)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(9935434838523508980,7648038926638343017)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(7032848390598450936,16870743692447971238)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7302041809563941951,6856814412450461959)
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql
new file mode 100644
index 00000000000..61b9ac14259
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.sql
@@ -0,0 +1,111 @@
+SELECT ngramSimhash('');
+SELECT ngramSimhash('what a cute cat.');
+SELECT ngramSimhashCaseInsensitive('what a cute cat.');
+SELECT ngramSimhashUTF8('what a cute cat.');
+SELECT ngramSimhashCaseInsensitiveUTF8('what a cute cat.');
+SELECT wordShingleSimhash('what a cute cat.');
+SELECT wordShingleSimhashCaseInsensitive('what a cute cat.');
+SELECT wordShingleSimhashUTF8('what a cute cat.');
+SELECT wordShingleSimhashCaseInsensitiveUTF8('what a cute cat.');
+
+SELECT ngramMinhash('');
+SELECT ngramMinhash('what a cute cat.');
+SELECT ngramMinhashCaseInsensitive('what a cute cat.');
+SELECT ngramMinhashUTF8('what a cute cat.');
+SELECT ngramMinhashCaseInsensitiveUTF8('what a cute cat.');
+SELECT wordShingleMinhash('what a cute cat.');
+SELECT wordShingleMinhashCaseInsensitive('what a cute cat.');
+SELECT wordShingleMinhashUTF8('what a cute cat.');
+SELECT wordShingleMinhashCaseInsensitiveUTF8('what a cute cat.');
+
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+   s String
+)ENGINE = Memory();
+
+INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.');
+
+SELECT ngramSimhash(s) FROM defaults;
+SELECT ngramSimhashCaseInsensitive(s) FROM defaults;
+SELECT ngramSimhashUTF8(s) FROM defaults;
+SELECT ngramSimhashCaseInsensitiveUTF8(s) FROM defaults;
+SELECT wordShingleSimhash(s) FROM defaults;
+SELECT wordShingleSimhashCaseInsensitive(s) FROM defaults;
+SELECT wordShingleSimhashUTF8(s) FROM defaults;
+SELECT wordShingleSimhashCaseInsensitiveUTF8(s) FROM defaults;
+
+SELECT ngramMinhash(s) FROM defaults;
+SELECT ngramMinhashCaseInsensitive(s) FROM defaults;
+SELECT ngramMinhashUTF8(s) FROM defaults;
+SELECT ngramMinhashCaseInsensitiveUTF8(s) FROM defaults;
+SELECT wordShingleMinhash(s) FROM defaults;
+SELECT wordShingleMinhashCaseInsensitive(s) FROM defaults;
+SELECT wordShingleMinhashUTF8(s) FROM defaults;
+SELECT wordShingleMinhashCaseInsensitiveUTF8(s) FROM defaults;
+
+TRUNCATE TABLE defaults;
+INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n',
+'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.
+ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.
+ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'
+));
+
+SELECT 'uniqExact', uniqExact(s) FROM defaults;
+
+
+SELECT 'ngramSimhash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimhash(s) as h FROM defaults GROUP BY h;
+SELECT 'ngramSimhashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimhashCaseInsensitive(s) as h FROM defaults GROUP BY h;
+SELECT 'ngramSimhashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimhashUTF8(s) as h FROM defaults GROUP BY h;
+SELECT 'ngramSimhashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimhashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleSimhash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimhash(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleSimhashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimhashCaseInsensitive(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleSimhashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimhashUTF8(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleSimhashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimhashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h;
+
+SELECT 'ngramMinhash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinhash(s) as h FROM defaults GROUP BY h;
+SELECT 'ngramMinhashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinhashCaseInsensitive(s) as h FROM defaults GROUP BY h;
+SELECT 'ngramMinhashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinhashUTF8(s) as h FROM defaults GROUP BY h;
+SELECT 'ngramMinhashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinhashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleMinhash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinhash(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleMinhashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinhashCaseInsensitive(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleMinhashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinhashUTF8(s) as h FROM defaults GROUP BY h;
+SELECT 'wordShingleMinhashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinhashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h;
+
+DROP TABLE defaults;
diff --git a/tests/queries/0_stateless/01017_bithamming_distance.reference b/tests/queries/0_stateless/01017_bithamming_distance.reference
new file mode 100644
index 00000000000..cc2d4f39154
--- /dev/null
+++ b/tests/queries/0_stateless/01017_bithamming_distance.reference
@@ -0,0 +1,15 @@
+1
+7
+63
+2
+1
+3
+5
+4
+6
+6
+6
+3
+5
+9
+9
diff --git a/tests/queries/0_stateless/01017_bithamming_distance.sql b/tests/queries/0_stateless/01017_bithamming_distance.sql
new file mode 100644
index 00000000000..4b36894b97c
--- /dev/null
+++ b/tests/queries/0_stateless/01017_bithamming_distance.sql
@@ -0,0 +1,20 @@
+SELECT bitHammingDistance(1, 5);
+SELECT bitHammingDistance(100, 100000);
+SELECT bitHammingDistance(-1, 1);
+
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+	n1 UInt8,
+	n2 UInt16,
+	n3 UInt32,
+	n4 UInt64
+)ENGINE = Memory();
+
+INSERT INTO defaults VALUES (1, 2, 3, 4) (12, 4345, 435, 1233) (45, 675, 32343, 54566) (90, 784, 9034, 778752);
+
+SELECT bitHammingDistance(4, n1) FROM defaults;
+SELECT bitHammingDistance(n2, 100) FROM defaults;
+SELECT bitHammingDistance(n3, n4) FROM defaults;
+
+DROP TABLE defaults;
diff --git a/tests/queries/0_stateless/01017_tuplehamming_distance.reference b/tests/queries/0_stateless/01017_tuplehamming_distance.reference
new file mode 100644
index 00000000000..017ffb0cd33
--- /dev/null
+++ b/tests/queries/0_stateless/01017_tuplehamming_distance.reference
@@ -0,0 +1,15 @@
+2
+1
+1
+0
+2
+2
+2
+2
+1
+2
+2
+2
+0
+2
+2
diff --git a/tests/queries/0_stateless/01017_tuplehamming_distance.sql b/tests/queries/0_stateless/01017_tuplehamming_distance.sql
new file mode 100644
index 00000000000..d0ed1cee096
--- /dev/null
+++ b/tests/queries/0_stateless/01017_tuplehamming_distance.sql
@@ -0,0 +1,19 @@
+SELECT tupleHammingDistance((1, 2), (3, 4));
+SELECT tupleHammingDistance((120, 243), (120, 434));
+SELECT tupleHammingDistance((-12, 434), (434, 434));
+
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+	t1 Tuple(UInt16, UInt16),
+	t2 Tuple(UInt32, UInt32),
+	t3 Tuple(Int64, Int64)
+)ENGINE = Memory();
+
+INSERT INTO defaults VALUES ((12, 43), (12312, 43453) ,(-10, 32)) ((1, 4), (546, 12345), (546, 12345)) ((90, 9875), (43456, 234203), (1231, -123)) ((87, 987), (545645, 768354634), (9123, 909));
+
+SELECT tupleHammingDistance((12, 43), t1) FROM defaults;
+SELECT tupleHammingDistance(t2, (546, 456)) FROM defaults;
+SELECT tupleHammingDistance(t2, t3) FROM defaults;
+
+DROP TABLE defaults;
diff --git a/tests/queries/0_stateless/01051_system_stack_trace.reference b/tests/queries/0_stateless/01051_system_stack_trace.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01051_system_stack_trace.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01051_system_stack_trace.sql b/tests/queries/0_stateless/01051_system_stack_trace.sql
new file mode 100644
index 00000000000..32d344fce7e
--- /dev/null
+++ b/tests/queries/0_stateless/01051_system_stack_trace.sql
@@ -0,0 +1,2 @@
+-- at least this query should be present
+SELECT count() > 0 FROM system.stack_trace WHERE query_id != '';
diff --git a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh
index 4f86d7c1666..c9e5d7b9447 100755
--- a/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh
+++ b/tests/queries/0_stateless/01076_parallel_alter_replicated_zookeeper.sh
@@ -103,8 +103,10 @@ done
 sleep 1
 
 counter=0
+have_undone_mutations_query="select * from system.mutations where table like 'concurrent_mutate_mt_%' and is_done=0 and database='${CLICKHOUSE_DATABASE}'"
+have_all_tables_query="select count() FROM system.tables WHERE name LIKE 'concurrent_mutate_mt_%' and database='${CLICKHOUSE_DATABASE}'"
 
-while [[ $($CLICKHOUSE_CLIENT --query "select * from system.mutations where table like 'concurrent_mutate_mt_%' and is_done=0" 2>&1) ]]; do
+while true ; do
     if [ "$counter" -gt 120 ]
     then
         break
@@ -113,7 +115,13 @@ while [[ $($CLICKHOUSE_CLIENT --query "select * from system.mutations where tabl
     for i in $(seq $REPLICAS); do
         $CLICKHOUSE_CLIENT --query "ATTACH TABLE concurrent_mutate_mt_$i" 2> /dev/null
     done
+
     counter=$(($counter + 1))
+
+    # no active mutations and all tables attached
+    if [[ -z $($CLICKHOUSE_CLIENT --query "$have_undone_mutations_query" 2>&1) && $($CLICKHOUSE_CLIENT --query "$have_all_tables_query" 2>&1) == "$REPLICAS" ]]; then
+        break
+    fi
 done
 
 for i in $(seq $REPLICAS); do
diff --git a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference
index bcc7aebeae8..8984d35930a 100644
--- a/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference
+++ b/tests/queries/0_stateless/01103_check_cpu_instructions_at_startup.reference
@@ -2,4 +2,6 @@ Instruction check fail. The CPU does not support SSSE3 instruction set.
 Instruction check fail. The CPU does not support SSE4.1 instruction set.
 Instruction check fail. The CPU does not support SSE4.2 instruction set.
 Instruction check fail. The CPU does not support POPCNT instruction set.
-MADV_DONTNEED does not zeroed page. jemalloc will be broken
+<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)
+<jemalloc>: (This is the expected behaviour if you are running under QEMU)
+1
diff --git a/tests/queries/0_stateless/01238_http_memory_tracking.sh b/tests/queries/0_stateless/01238_http_memory_tracking.sh
index edace0c3530..b317a6c109b 100755
--- a/tests/queries/0_stateless/01238_http_memory_tracking.sh
+++ b/tests/queries/0_stateless/01238_http_memory_tracking.sh
@@ -13,7 +13,7 @@ ${CLICKHOUSE_CLIENT} --format Null -n <<<'SELECT sleepEachRow(1) FROM numbers(5)
 yes 'SELECT 1' 2>/dev/null | {
     head -n1000
 } | {
-    xargs -i ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_memory_usage_for_user=$((1<<30))" -d '{}'
+    xargs -I{} ${CLICKHOUSE_CURL} -sS "${CLICKHOUSE_URL}&wait_end_of_query=1&max_memory_usage_for_user=$((1<<30))" -d '{}'
 } | grep -x -c 1
 
 wait
diff --git a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql
index 9bc50ae2fc7..d152326091b 100644
--- a/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql
+++ b/tests/queries/0_stateless/01244_optimize_distributed_group_by_sharding_key.sql
@@ -68,7 +68,7 @@ select 'OFFSET';
 select count(), * from dist_01247 group by number offset 1;
 -- this will emulate different data on for different shards
 select 'WHERE LIMIT OFFSET';
-select count(), * from dist_01247 where number = _shard_num-1 group by number limit 1 offset 1;
+select count(), * from dist_01247 where number = _shard_num-1 group by number order by number limit 1 offset 1;
 
 select 'LIMIT BY 1';
 select count(), * from dist_01247 group by number order by number limit 1 by number;
diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference
index 5d20150fdc1..f7c4f677e9a 100644
--- a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference
+++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.reference
@@ -20,9 +20,7 @@ attempt to parse with input_format_allow_errors_ratio=0.3
 1	0
 2	0
 3	0
-4	0
 5	0
-6	0
 Return code: 0
 ******************
 attempt to parse with input_format_allow_errors_num=1
@@ -34,7 +32,5 @@ attempt to parse with input_format_allow_errors_num=2
 1	0
 2	0
 3	0
-4	0
 5	0
-6	0
 Return code: 0
diff --git a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh
index 63207c7f4a8..7ae77eb9f0c 100755
--- a/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh
+++ b/tests/queries/0_stateless/01355_CSV_input_format_allow_errors.sh
@@ -11,34 +11,34 @@ cat "$SAMPLE_FILE"
 
 echo '******************'
 echo 'attempt to parse w/o flags'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 expected_error_message='is not like Int64'
 cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong"
 
 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_ratio=0.1'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.1 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.1 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 expected_error_message='Already have 1 errors out of 5 rows, which is 0.2'
 cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong"
 
 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_ratio=0.3'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.3 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_ratio=0.3 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 cat "$STD_ERROR_CAPTURED"
 
 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_num=1'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=1 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=1 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 expected_error_message='Already have 2 errors out of 7 rows'
 cat "$STD_ERROR_CAPTURED" | grep -q "$expected_error_message" && echo "OK: stderr contains a message '$expected_error_message'" || echo "FAILED: Error message is wrong"
 
 echo '******************'
 echo 'attempt to parse with input_format_allow_errors_num=2'
-cat "$SAMPLE_FILE" | clickhouse-local --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=2 2>"$STD_ERROR_CAPTURED"
+cat "$SAMPLE_FILE" | ${CLICKHOUSE_LOCAL} --input-format=CSV --structure='num1 Int64, num2 Int64' --query='SELECT * from table' --input_format_allow_errors_num=2 2>"$STD_ERROR_CAPTURED"
 echo "Return code: $?"
 cat "$STD_ERROR_CAPTURED"
 
diff --git a/tests/queries/0_stateless/01383_log_broken_table.sh b/tests/queries/0_stateless/01383_log_broken_table.sh
index 6cecd050a69..80efa7e3908 100755
--- a/tests/queries/0_stateless/01383_log_broken_table.sh
+++ b/tests/queries/0_stateless/01383_log_broken_table.sh
@@ -5,7 +5,7 @@ CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL=none
 . "$CURDIR"/../shell_config.sh
 
 
-function test()
+function test_func()
 {
     ENGINE=$1
     MAX_MEM=4096
@@ -32,9 +32,9 @@ function test()
     $CLICKHOUSE_CLIENT --query "DROP TABLE log";
 }
 
-test TinyLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
-test StripeLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
-test Log | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
+test_func TinyLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
+test_func StripeLog | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
+test_func Log | grep -v -P '^(Memory limit|0\t0|File not found|[1-9]000000\t)'
 
 rm "${CLICKHOUSE_TMP}/insert_result"
 rm "${CLICKHOUSE_TMP}/select_result"
diff --git a/tests/queries/0_stateless/01415_sticking_mutations.sh b/tests/queries/0_stateless/01415_sticking_mutations.sh
index 9ae1ef03d03..ce34cd09ca3 100755
--- a/tests/queries/0_stateless/01415_sticking_mutations.sh
+++ b/tests/queries/0_stateless/01415_sticking_mutations.sh
@@ -43,7 +43,7 @@ function check_sticky_mutations()
 
     $CLICKHOUSE_CLIENT --query "SYSTEM START MERGES sticking_mutations"
 
-    # just to be sure, that previous mutations finished
+    # Just to be sure, that previous mutations finished
     $CLICKHOUSE_CLIENT --query "ALTER TABLE sticking_mutations DELETE WHERE value2 % 31 == 0 SETTINGS mutations_sync = 1"
 
     $CLICKHOUSE_CLIENT --query "OPTIMIZE TABLE sticking_mutations FINAL"
diff --git a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh
index 283e6662a43..f259badea8c 100755
--- a/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh
+++ b/tests/queries/0_stateless/01505_pipeline_executor_UAF.sh
@@ -5,7 +5,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 
 # Regression for UAF in ThreadPool.
 # (Triggered under TSAN)
-for i in {1..10}; do
+for _ in {1..10}; do
     ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null'
     # Binding to specific CPU is not required, but this makes the test more reliable.
     taskset --cpu-list 0 ${CLICKHOUSE_LOCAL} -q 'select * from numbers_mt(100000000) settings max_threads=100 FORMAT Null'
diff --git a/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh b/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh
index f251ff6138b..b0abd99d38c 100755
--- a/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh
+++ b/tests/queries/0_stateless/01514_distributed_cancel_query_on_error.sh
@@ -16,6 +16,6 @@ opts=(
 )
 ${CLICKHOUSE_CLIENT} "${opts[@]}" -q "SELECT groupArray(repeat('a', if(_shard_num == 2, 100000, 1))), number%100000 k from remote('127.{2,3}', system.numbers) GROUP BY k LIMIT 10e6" |& {
     # the query should fail earlier on 127.3 and 127.2 should not even go to the memory limit exceeded error.
-    fgrep -q 'DB::Exception: Received from 127.3:9000. DB::Exception: Memory limit (for query) exceeded:'
+    grep -F -q 'DB::Exception: Received from 127.3:9000. DB::Exception: Memory limit (for query) exceeded:'
     # while if this will not correctly then it will got the exception from the 127.2:9000 and fail
 }
diff --git a/tests/queries/0_stateless/01560_DateTime_and_DateTime64_comparision.reference b/tests/queries/0_stateless/01560_DateTime_and_DateTime64_comparision.reference
new file mode 100644
index 00000000000..088030bbc28
--- /dev/null
+++ b/tests/queries/0_stateless/01560_DateTime_and_DateTime64_comparision.reference
@@ -0,0 +1,3 @@
+-1	DateTime64(1, \'UTC\')	<	1	1	1	<=	1	1	1	=	0	0	0	>=	0	0	0	>	0	0	0	!=	1	1	1
+0	DateTime64(1, \'UTC\')	<	0	0	0	<=	1	1	1	=	1	1	1	>=	1	1	1	>	0	0	0	!=	0	0	0
+1	DateTime64(1, \'UTC\')	<	0	0	0	<=	0	0	0	=	0	0	0	>=	1	1	1	>	1	1	1	!=	1	1	1
diff --git a/tests/queries/0_stateless/01560_DateTime_and_DateTime64_comparision.sql b/tests/queries/0_stateless/01560_DateTime_and_DateTime64_comparision.sql
new file mode 100644
index 00000000000..c5e677570ea
--- /dev/null
+++ b/tests/queries/0_stateless/01560_DateTime_and_DateTime64_comparision.sql
@@ -0,0 +1,43 @@
+SELECT
+    n,
+    toTypeName(dt64) AS dt64_typename,
+
+    '<',
+    dt64 < dt,
+    toDateTime(dt64) < dt,
+    dt64 < toDateTime64(dt, 1, 'UTC'),
+    
+    '<=',
+    dt64 <= dt,
+    toDateTime(dt64) <= dt,
+    dt64 <= toDateTime64(dt, 1, 'UTC'),
+
+    '=',
+    dt64 = dt,
+    toDateTime(dt64) = dt,
+    dt64 = toDateTime64(dt, 1, 'UTC'),
+    
+    '>=',
+    dt64 >= dt,
+    toDateTime(dt64) >= dt,
+    dt64 >= toDateTime64(dt, 1, 'UTC'),
+    
+    '>',
+    dt64 > dt,
+    toDateTime(dt64) > dt,
+    dt64 > toDateTime64(dt, 1, 'UTC'),
+
+    '!=',
+    dt64 != dt,
+    toDateTime(dt64) != dt,
+    dt64 != toDateTime64(dt, 1, 'UTC')
+FROM 
+(
+    WITH toDateTime('2015-05-18 07:40:11') as value
+    SELECT
+        number - 1 as n,
+        toDateTime64(value, 1, 'UTC') AS dt64,
+        value - n as dt
+    FROM system.numbers
+    LIMIT 3
+)
diff --git a/tests/queries/0_stateless/01561_Date_and_DateTime64_comparision.reference b/tests/queries/0_stateless/01561_Date_and_DateTime64_comparision.reference
new file mode 100644
index 00000000000..e5183ec6a8a
--- /dev/null
+++ b/tests/queries/0_stateless/01561_Date_and_DateTime64_comparision.reference
@@ -0,0 +1,3 @@
+-1	DateTime64(1, \'UTC\')	<	1	1	1	<=	1	1	1	=	0	0	0	>=	0	0	0	>	0	0	0	!=	1	1	1
+0	DateTime64(1, \'UTC\')	<	0	0	0	<=	0	1	0	=	0	1	0	>=	1	1	1	>	1	0	1	!=	1	0	1
+1	DateTime64(1, \'UTC\')	<	0	0	0	<=	0	0	0	=	0	0	0	>=	1	1	1	>	1	1	1	!=	1	1	1
diff --git a/tests/queries/0_stateless/01561_Date_and_DateTime64_comparision.sql b/tests/queries/0_stateless/01561_Date_and_DateTime64_comparision.sql
new file mode 100644
index 00000000000..7e75d871e07
--- /dev/null
+++ b/tests/queries/0_stateless/01561_Date_and_DateTime64_comparision.sql
@@ -0,0 +1,43 @@
+SELECT
+    n,
+    toTypeName(dt64) AS dt64_typename,
+
+    '<',
+    dt64 < d,
+    toDate(dt64) < d,
+    dt64 < toDateTime64(d, 1, 'UTC'),
+    
+    '<=',
+    dt64 <= d,
+    toDate(dt64) <= d,
+    dt64 <= toDateTime64(d, 1, 'UTC'),
+
+    '=',
+    dt64 = d,
+    toDate(dt64) = d,
+    dt64 = toDateTime64(d, 1, 'UTC'),
+    
+    '>=',
+    dt64 >= d,
+    toDate(dt64) >= d,
+    dt64 >= toDateTime64(d, 1, 'UTC'),
+
+    '>',
+    dt64 > d,
+    toDate(dt64) > d,
+    dt64 > toDateTime64(d, 1, 'UTC'),
+
+    '!=',
+    dt64 != d,
+    toDate(dt64) != d,
+    dt64 != toDateTime64(d, 1, 'UTC')
+FROM 
+(
+    WITH toDateTime('2019-09-16 19:20:11') as val
+    SELECT
+        number - 1 as n,
+        toDateTime64(val, 1, 'UTC') AS dt64,
+        toDate(val, 'UTC') - n as d
+    FROM system.numbers
+    LIMIT 3
+)
diff --git a/tests/queries/0_stateless/01581_deduplicate_by_columns_local.sql b/tests/queries/0_stateless/01581_deduplicate_by_columns_local.sql
index 2ad9eea7aa4..0f10052667c 100644
--- a/tests/queries/0_stateless/01581_deduplicate_by_columns_local.sql
+++ b/tests/queries/0_stateless/01581_deduplicate_by_columns_local.sql
@@ -120,3 +120,7 @@ INSERT INTO partial_duplicates SELECT * FROM source_data;
 OPTIMIZE TABLE partial_duplicates FINAL DEDUPLICATE BY COLUMNS('.*k');
 SELECT * FROM partial_duplicates;
 TRUNCATE partial_duplicates;
+
+DROP TABLE full_duplicates;
+DROP TABLE partial_duplicates;
+DROP TABLE source_data;
diff --git a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference
index 20fdcf82c66..d39ecc52c10 100644
--- a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference
+++ b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.reference
@@ -1,47 +1,47 @@
 check that we have a data
-r1	1	1001	3	2	2
-r1	1	2001	1	1	1
-r1	2	1002	1	1	1
-r1	2	2002	1	1	1
-r1	3	1003	2	2	2
-r1	4	1004	2	2	2
-r1	5	2005	2	2	1
-r1	9	1002	1	1	1
-r2	1	1001	3	2	2
-r2	1	2001	1	1	1
-r2	2	1002	1	1	1
-r2	2	2002	1	1	1
-r2	3	1003	2	2	2
-r2	4	1004	2	2	2
-r2	5	2005	2	2	1
-r2	9	1002	1	1	1
+r1	1	1001	3	2
+r1	1	2001	1	1
+r1	2	1002	1	1
+r1	2	2002	1	1
+r1	3	1003	2	2
+r1	4	1004	2	2
+r1	5	2005	2	2
+r1	9	1002	1	1
+r2	1	1001	3	2
+r2	1	2001	1	1
+r2	2	1002	1	1
+r2	2	2002	1	1
+r2	3	1003	2	2
+r2	4	1004	2	2
+r2	5	2005	2	2
+r2	9	1002	1	1
 after old OPTIMIZE DEDUPLICATE
-r1	1	1001	3	2	2
-r1	1	2001	1	1	1
-r1	2	1002	1	1	1
-r1	2	2002	1	1	1
-r1	3	1003	2	2	2
-r1	4	1004	2	2	2
-r1	5	2005	2	2	1
-r1	9	1002	1	1	1
-r2	1	1001	3	2	2
-r2	1	2001	1	1	1
-r2	2	1002	1	1	1
-r2	2	2002	1	1	1
-r2	3	1003	2	2	2
-r2	4	1004	2	2	2
-r2	5	2005	2	2	1
-r2	9	1002	1	1	1
+r1	1	1001	2	2
+r1	1	2001	1	1
+r1	2	1002	1	1
+r1	2	2002	1	1
+r1	3	1003	2	2
+r1	4	1004	2	2
+r1	5	2005	2	2
+r1	9	1002	1	1
+r2	1	1001	2	2
+r2	1	2001	1	1
+r2	2	1002	1	1
+r2	2	2002	1	1
+r2	3	1003	2	2
+r2	4	1004	2	2
+r2	5	2005	2	2
+r2	9	1002	1	1
 check data again after multiple deduplications with new syntax
-r1	1	1001	1	1	1
-r1	2	1002	1	1	1
-r1	3	1003	1	1	1
-r1	4	1004	1	1	1
-r1	5	2005	1	1	1
-r1	9	1002	1	1	1
-r2	1	1001	1	1	1
-r2	2	1002	1	1	1
-r2	3	1003	1	1	1
-r2	4	1004	1	1	1
-r2	5	2005	1	1	1
-r2	9	1002	1	1	1
+r1	1	1001	1	1
+r1	2	1002	1	1
+r1	3	1003	1	1
+r1	4	1004	1	1
+r1	5	2005	1	1
+r1	9	1002	1	1
+r2	1	1001	1	1
+r2	2	1002	1	1
+r2	3	1003	1	1
+r2	4	1004	1	1
+r2	5	2005	1	1
+r2	9	1002	1	1
diff --git a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql
index b10f8b44483..9779dca90a2 100644
--- a/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql
+++ b/tests/queries/0_stateless/01581_deduplicate_by_columns_replicated.sql
@@ -3,57 +3,50 @@
 --- replicated case
 
 -- Just in case if previous tests run left some stuff behind.
-DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r1;
-DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r2;
+DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r1 SYNC;
+DROP TABLE IF EXISTS replicated_deduplicate_by_columns_r2 SYNC;
 
 SET replication_alter_partitions_sync = 2;
 
 -- IRL insert_replica_id were filled from hostname
 CREATE TABLE IF NOT EXISTS replicated_deduplicate_by_columns_r1 (
-	id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock(), insert_replica_id UInt8 MATERIALIZED randConstant()
+    id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock()
 ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01581/replicated_deduplicate', 'r1') ORDER BY id;
 
 CREATE TABLE IF NOT EXISTS replicated_deduplicate_by_columns_r2 (
-    id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock(), insert_replica_id UInt8 MATERIALIZED randConstant()
+    id Int32, val UInt32, unique_value UInt64 MATERIALIZED rowNumberInBlock()
 ) ENGINE=ReplicatedMergeTree('/clickhouse/tables/test_01581/replicated_deduplicate', 'r2') ORDER BY id;
 
 
-SYSTEM STOP REPLICATED SENDS;
-SYSTEM STOP FETCHES;
-SYSTEM STOP REPLICATION QUEUES;
-
 -- insert some data, 2 records: (3, 1003), (4, 1004) are duplicated and have difference in unique_value / insert_replica_id
 -- (1, 1001), (5, 2005) has full duplicates
 INSERT INTO replicated_deduplicate_by_columns_r1 VALUES (1, 1001), (1, 1001), (2, 1002), (3, 1003), (4, 1004), (1, 2001), (9, 1002);
 INSERT INTO replicated_deduplicate_by_columns_r2 VALUES (1, 1001), (2, 2002), (3, 1003), (4, 1004), (5, 2005), (5, 2005);
 
-SYSTEM START REPLICATION QUEUES;
-SYSTEM START FETCHES;
-SYSTEM START REPLICATED SENDS;
-
--- wait for syncing replicas
+-- make sure that all data is present on all replicas
 SYSTEM SYNC REPLICA replicated_deduplicate_by_columns_r2;
 SYSTEM SYNC REPLICA replicated_deduplicate_by_columns_r1;
 
 SELECT 'check that we have a data';
-SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
-SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
+SELECT 'r1', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
+SELECT 'r2', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
+
 
 -- NOTE: here and below we need FINAL to force deduplication in such a small set of data in only 1 part.
 -- that should remove full duplicates
 OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE;
 
 SELECT 'after old OPTIMIZE DEDUPLICATE';
-SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
-SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
+SELECT 'r1', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
+SELECT 'r2', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
 
 OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY id, val;
 OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[id, val]');
-OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[i]') EXCEPT(unique_value, insert_replica_id);
+OPTIMIZE TABLE replicated_deduplicate_by_columns_r1 FINAL DEDUPLICATE BY COLUMNS('[i]') EXCEPT(unique_value);
 
 SELECT 'check data again after multiple deduplications with new syntax';
-SELECT 'r1', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
-SELECT 'r2', id, val, count(), uniqExact(unique_value), uniqExact(insert_replica_id) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
+SELECT 'r1', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r1 GROUP BY id, val ORDER BY id, val;
+SELECT 'r2', id, val, count(), uniqExact(unique_value) FROM replicated_deduplicate_by_columns_r2 GROUP BY id, val ORDER BY id, val;
 
 -- cleanup the mess
 DROP TABLE replicated_deduplicate_by_columns_r1;
diff --git a/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.reference b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.reference
new file mode 100644
index 00000000000..d86bac9de59
--- /dev/null
+++ b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.reference
@@ -0,0 +1 @@
+OK
diff --git a/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh
new file mode 100755
index 00000000000..caa180b1e0f
--- /dev/null
+++ b/tests/queries/0_stateless/01583_parallel_parsing_exception_with_offset.sh
@@ -0,0 +1,14 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../shell_config.sh
+
+CLICKHOUSE_CLIENT=$(echo ${CLICKHOUSE_CLIENT} | sed 's/'"--send_logs_level=${CLICKHOUSE_CLIENT_SERVER_LOGS_LEVEL}"'/--send_logs_level=none/g')
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE IF EXISTS check;"
+
+$CLICKHOUSE_CLIENT --query="CREATE TABLE check (x UInt64) ENGINE = Memory;"
+
+(seq 1 2000000; echo 'hello'; seq 1 20000000) | $CLICKHOUSE_CLIENT --input_format_parallel_parsing=1 --min_chunk_bytes_for_parallel_parsing=1000 --query="INSERT INTO check(x) FORMAT TSV " 2>&1 | grep -q "(at row 2000001)" && echo 'OK' || echo 'FAIL' ||:  
+
+$CLICKHOUSE_CLIENT --query="DROP TABLE check;"
diff --git a/tests/queries/0_stateless/01591_window_functions.reference b/tests/queries/0_stateless/01591_window_functions.reference
new file mode 100644
index 00000000000..6c78108d734
--- /dev/null
+++ b/tests/queries/0_stateless/01591_window_functions.reference
@@ -0,0 +1,149 @@
+set allow_experimental_window_functions = 1;
+
+-- just something basic
+
+select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
+
+-- proper calculation across blocks
+
+0	1
+1	2
+2	3
+3	1
+4	2
+5	3
+6	1
+7	2
+8	3
+9	1
+select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
+
+-- not a window function
+
+2	2
+1	2
+0	2
+5	5
+4	5
+3	5
+8	8
+7	8
+6	8
+9	9
+select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
+
+-- no partition by
+
+select number, avg(number) over (order by number) from numbers(10);
+
+-- no order by
+
+0	0
+1	0.5
+2	1
+3	1.5
+4	2
+5	2.5
+6	3
+7	3.5
+8	4
+9	4.5
+select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
+
+-- can add an alias after window spec
+
+0	0
+1	1
+2	1
+3	3
+4	4
+5	4
+6	6
+7	7
+8	7
+9	9
+select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
+
+-- can't reference it yet -- the window functions are calculated at the
+-- last stage of select, after all other functions.
+
+0	0
+1	1
+2	1
+3	3
+4	4
+5	4
+6	6
+7	7
+8	7
+9	9
+select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
+
+-- should work in ORDER BY though
+
+select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
+
+-- this one doesn't work yet -- looks like the column names clash, and the
+-- window count() is overwritten with aggregate count()
+-- select number, count(), count() over (partition by intDiv(number, 3)) from numbers(10) group by number order by count() desc;
+
+-- different windows
+-- an explain test would also be helpful, but it's too immature now and I don't
+-- want to change reference all the time
+
+9	9
+6	8
+7	8
+8	8
+3	5
+4	5
+5	5
+0	2
+1	2
+2	2
+select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
+
+-- two functions over the same window
+-- an explain test would also be helpful, but it's too immature now and I don't
+-- want to change reference all the time
+
+0	2	1
+1	2	2
+2	2	3
+3	5	4
+4	5	5
+5	5	1
+6	8	2
+7	8	3
+8	8	4
+9	11	5
+10	11	1
+11	11	2
+12	14	3
+13	14	4
+14	14	5
+15	17	1
+16	17	2
+17	17	3
+18	20	4
+19	20	5
+20	20	1
+21	23	2
+22	23	3
+23	23	4
+24	26	5
+25	26	1
+26	26	2
+27	29	3
+28	29	4
+29	29	5
+30	30	1
+select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
+
+0	2	3
+1	2	2
+2	2	1
+3	5	3
+4	5	2
+5	5	1
+6	6	1
diff --git a/tests/queries/0_stateless/01591_window_functions.sql b/tests/queries/0_stateless/01591_window_functions.sql
new file mode 100644
index 00000000000..a28d435d3f8
--- /dev/null
+++ b/tests/queries/0_stateless/01591_window_functions.sql
@@ -0,0 +1,42 @@
+-- { echo }
+
+set allow_experimental_window_functions = 1;
+
+-- just something basic
+select number, count() over (partition by intDiv(number, 3) order by number) from numbers(10);
+
+-- proper calculation across blocks
+select number, max(number) over (partition by intDiv(number, 3) order by number desc) from numbers(10) settings max_block_size = 2;
+
+-- not a window function
+select number, abs(number) over (partition by toString(intDiv(number, 3))) from numbers(10); -- { serverError 63 }
+
+-- no partition by
+select number, avg(number) over (order by number) from numbers(10);
+
+-- no order by
+select number, quantileExact(number) over (partition by intDiv(number, 3)) from numbers(10);
+
+-- can add an alias after window spec
+select number, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10);
+
+-- can't reference it yet -- the window functions are calculated at the
+-- last stage of select, after all other functions.
+select q * 10, quantileExact(number) over (partition by intDiv(number, 3)) q from numbers(10); -- { serverError 47 }
+
+-- should work in ORDER BY though
+select number, max(number) over (partition by intDiv(number, 3) order by number desc) m from numbers(10) order by m desc, number;
+
+-- this one doesn't work yet -- looks like the column names clash, and the
+-- window count() is overwritten with aggregate count()
+-- select number, count(), count() over (partition by intDiv(number, 3)) from numbers(10) group by number order by count() desc;
+
+-- different windows
+-- an explain test would also be helpful, but it's too immature now and I don't
+-- want to change reference all the time
+select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 5) order by number) as m from numbers(31) order by number settings max_block_size = 2;
+
+-- two functions over the same window
+-- an explain test would also be helpful, but it's too immature now and I don't
+-- want to change reference all the time
+select number, max(number) over (partition by intDiv(number, 3) order by number desc), count(number) over (partition by intDiv(number, 3) order by number desc) as m from numbers(7) order by number settings max_block_size = 2;
diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.reference b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.reference
new file mode 100644
index 00000000000..81c7e6e4df0
--- /dev/null
+++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.reference
@@ -0,0 +1,2 @@
+849
+102400
diff --git a/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql
new file mode 100644
index 00000000000..a73045f5a6f
--- /dev/null
+++ b/tests/queries/0_stateless/01605_adaptive_granularity_block_borders.sql
@@ -0,0 +1,28 @@
+DROP TABLE IF EXISTS adaptive_table;
+
+--- If granularity of consequent blocks differs a lot, then adaptive
+--- granularity will adjust amout of marks correctly. Data for test empirically
+--- derived, it's quite hard to get good parameters.
+
+CREATE TABLE adaptive_table(
+    key UInt64,
+    value String
+) ENGINE MergeTree()
+ORDER BY key
+SETTINGS index_granularity_bytes=1048576, min_bytes_for_wide_part = 0, enable_vertical_merge_algorithm = 0;
+
+SET max_block_size=900;
+
+-- There are about 900 marks for our settings.
+INSERT INTO adaptive_table SELECT number, if(number > 700, randomPrintableASCII(102400), randomPrintableASCII(1)) FROM numbers(10000);
+
+OPTIMIZE TABLE adaptive_table FINAL;
+
+SELECT marks FROM system.parts WHERE table = 'adaptive_table' and database=currentDatabase() and active;
+
+-- If we have computed granularity incorrectly than we will exceed this limit.
+SET max_memory_usage='30M';
+
+SELECT max(length(value)) FROM adaptive_table;
+
+DROP TABLE IF EXISTS adaptive_table;
diff --git a/tests/queries/0_stateless/01605_dictinct_two_level.reference b/tests/queries/0_stateless/01605_dictinct_two_level.reference
new file mode 100644
index 00000000000..50d1615e1aa
--- /dev/null
+++ b/tests/queries/0_stateless/01605_dictinct_two_level.reference
@@ -0,0 +1,20 @@
+['0']
+['1']
+['2']
+['3']
+['4']
+['5']
+['6']
+['7']
+['8']
+['9']
+test.com	['foo3223','foo6455','foo382','foo5566','foo1037']
+test.com0	['foo0']
+test.com0.0001	['foo1']
+test.com0.0002	['foo2']
+test.com0.0003	['foo3']
+test.com0.0004	['foo4']
+test.com0.0005	['foo5']
+test.com0.0006	['foo6']
+test.com0.0007	['foo7']
+test.com0.0008	['foo8']
diff --git a/tests/queries/0_stateless/01605_dictinct_two_level.sql b/tests/queries/0_stateless/01605_dictinct_two_level.sql
new file mode 100644
index 00000000000..5f20ae590c5
--- /dev/null
+++ b/tests/queries/0_stateless/01605_dictinct_two_level.sql
@@ -0,0 +1,25 @@
+SET group_by_two_level_threshold_bytes = 1;
+SET group_by_two_level_threshold = 1;
+
+SELECT groupArray(DISTINCT toString(number % 10)) FROM numbers_mt(50000) 
+    GROUP BY number ORDER BY number LIMIT 10
+    SETTINGS max_threads = 2, max_block_size = 2000;
+
+DROP TABLE IF EXISTS dictinct_two_level;
+
+CREATE TABLE dictinct_two_level (
+    time DateTime64(3),
+    domain String,
+    subdomain String
+) ENGINE = MergeTree ORDER BY time;
+
+INSERT INTO dictinct_two_level SELECT 1546300800000, 'test.com', concat('foo', toString(number % 10000)) from numbers(10000);
+INSERT INTO dictinct_two_level SELECT 1546300800000, concat('test.com', toString(number / 10000)) , concat('foo', toString(number % 10000)) from numbers(10000);
+
+SELECT
+    domain, groupArraySample(5, 11111)(DISTINCT subdomain) AS example_subdomains
+FROM dictinct_two_level
+GROUP BY domain ORDER BY domain, example_subdomains
+LIMIT 10;
+
+DROP TABLE IF EXISTS dictinct_two_level;
diff --git a/tests/queries/0_stateless/01605_drop_settings_profile_while_assigned.reference b/tests/queries/0_stateless/01605_drop_settings_profile_while_assigned.reference
new file mode 100644
index 00000000000..47942812a11
--- /dev/null
+++ b/tests/queries/0_stateless/01605_drop_settings_profile_while_assigned.reference
@@ -0,0 +1,2 @@
+\N	test_01605	\N	0	\N	\N	\N	\N	\N	test_01605
+PROFILE DROPPED
diff --git a/tests/queries/0_stateless/01605_drop_settings_profile_while_assigned.sql b/tests/queries/0_stateless/01605_drop_settings_profile_while_assigned.sql
new file mode 100644
index 00000000000..c9205d7fd89
--- /dev/null
+++ b/tests/queries/0_stateless/01605_drop_settings_profile_while_assigned.sql
@@ -0,0 +1,8 @@
+CREATE USER OR REPLACE 'test_01605';
+CREATE SETTINGS PROFILE OR REPLACE 'test_01605';
+ALTER USER 'test_01605' SETTINGS PROFILE 'test_01605';
+SELECT * FROM system.settings_profile_elements WHERE user_name='test_01605' OR profile_name='test_01605';
+DROP SETTINGS PROFILE 'test_01605';
+SELECT 'PROFILE DROPPED';
+SELECT * FROM system.settings_profile_elements WHERE user_name='test_01605' OR profile_name='test_01605';
+DROP USER 'test_01605';
diff --git a/tests/queries/0_stateless/01607_arrays_as_nested_csv.reference b/tests/queries/0_stateless/01607_arrays_as_nested_csv.reference
new file mode 100644
index 00000000000..d96f6b50a19
--- /dev/null
+++ b/tests/queries/0_stateless/01607_arrays_as_nested_csv.reference
@@ -0,0 +1,4 @@
+['Hello','world','42" TV']
+['Hello','world','42" TV']
+['Hello','world','42" TV']
+['Hello','world','42" TV']
diff --git a/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh b/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh
new file mode 100755
index 00000000000..2f150e934d0
--- /dev/null
+++ b/tests/queries/0_stateless/01607_arrays_as_nested_csv.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. "$CURDIR"/../shell_config.sh
+
+${CLICKHOUSE_CLIENT} --multiquery --query "
+DROP TABLE IF EXISTS test;
+CREATE TABLE test (a Array(String)) ENGINE = Memory;
+"
+
+${CLICKHOUSE_CLIENT} --query "INSERT INTO test FORMAT CSV" <<END
+"['Hello', 'world', '42"" TV']"
+END
+
+${CLICKHOUSE_CLIENT} --format_csv_allow_single_quotes 0 --query "INSERT INTO test FORMAT CSV" <<END
+"'Hello', 'world', '42"" TV'"
+END
+
+${CLICKHOUSE_CLIENT} --input_format_csv_arrays_as_nested_csv 1 --query "INSERT INTO test FORMAT CSV" <<END
+"[""Hello"", ""world"", ""42"""" TV""]"
+"""Hello"", ""world"", ""42"""" TV"""
+END
+
+${CLICKHOUSE_CLIENT} --multiquery --query "
+SELECT * FROM test;
+DROP TABLE IF EXISTS test;
+"
diff --git a/tests/queries/0_stateless/01610_client_spawn_editor.reference b/tests/queries/0_stateless/01610_client_spawn_editor.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/01610_client_spawn_editor.sh b/tests/queries/0_stateless/01610_client_spawn_editor.sh
new file mode 100755
index 00000000000..178a0efdbe0
--- /dev/null
+++ b/tests/queries/0_stateless/01610_client_spawn_editor.sh
@@ -0,0 +1,25 @@
+#!/usr/bin/expect -f
+
+log_user 0
+set timeout 5
+match_max 100000
+
+if ![info exists env(CLICKHOUSE_PORT_TCP)] {set env(CLICKHOUSE_PORT_TCP) 9000}
+
+set env(EDITOR) [file dirname [file normalize [info script]]]"/01610_client_spawn_editor_open.editor"
+
+spawn clickhouse-client
+expect ":) "
+
+# Open EDITOR
+send -- "\033E"
+# Send return
+send -- "\r"
+expect {
+    "│ 1 │" { }
+    timeout { exit 1 }
+}
+expect ":) "
+
+send -- ""
+expect eof
diff --git a/tests/queries/0_stateless/01610_client_spawn_editor_open.editor b/tests/queries/0_stateless/01610_client_spawn_editor_open.editor
new file mode 100755
index 00000000000..4f80871f5f9
--- /dev/null
+++ b/tests/queries/0_stateless/01610_client_spawn_editor_open.editor
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+echo "select 1" > "$1"
diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.reference b/tests/queries/0_stateless/01615_random_one_shard_insertion.reference
new file mode 100644
index 00000000000..448a73c4789
--- /dev/null
+++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.reference
@@ -0,0 +1,8 @@
+0
+0
+1
+1
+2
+2
+3
+3
diff --git a/tests/queries/0_stateless/01615_random_one_shard_insertion.sql b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql
new file mode 100644
index 00000000000..7d07629feda
--- /dev/null
+++ b/tests/queries/0_stateless/01615_random_one_shard_insertion.sql
@@ -0,0 +1,22 @@
+drop table if exists shard;
+drop table if exists distr;
+
+create table shard (id Int32) engine = MergeTree order by cityHash64(id);
+create table distr as shard engine Distributed (test_cluster_two_shards_localhost, currentDatabase(), shard);
+
+insert into distr (id) values (0), (1);  -- { serverError 55; }
+
+set insert_distributed_sync = 1;
+
+insert into distr (id) values (0), (1);  -- { serverError 55; }
+
+set insert_distributed_sync = 0;
+set insert_distributed_one_random_shard = 1;
+
+insert into distr (id) values (0), (1);
+insert into distr (id) values (2), (3);
+
+select * from distr order by id;
+
+drop table if exists shard;
+drop table if exists distr;
diff --git a/tests/queries/0_stateless/01616_untuple_access_field.reference b/tests/queries/0_stateless/01616_untuple_access_field.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/01616_untuple_access_field.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/01616_untuple_access_field.sql b/tests/queries/0_stateless/01616_untuple_access_field.sql
new file mode 100644
index 00000000000..569efca5349
--- /dev/null
+++ b/tests/queries/0_stateless/01616_untuple_access_field.sql
@@ -0,0 +1 @@
+select _ut_1 from (select untuple((1,2)));
diff --git a/tests/queries/0_stateless/data_parquet/alltypes_dictionary.parquet.columns b/tests/queries/0_stateless/data_parquet/alltypes_dictionary.parquet.columns
index e13d779fda2..cbc891b2ca7 100644
--- a/tests/queries/0_stateless/data_parquet/alltypes_dictionary.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/alltypes_dictionary.parquet.columns
@@ -1 +1 @@
-`id` Nullable(Int32), `bool_col` Nullable(UInt8), `tinyint_col` Nullable(Int32), `smallint_col` Nullable(Int32), `int_col` Nullable(Int32), `bigint_col` Nullable(Int64), `float_col` Nullable(Float32), `double_col` Nullable(Float64), `date_string_col` Nullable(String), `string_col` Nullable(String), `timestamp_col` Nullable(Int64)
\ No newline at end of file
+`id` Nullable(Int32), `bool_col` Nullable(UInt8), `tinyint_col` Nullable(Int32), `smallint_col` Nullable(Int32), `int_col` Nullable(Int32), `bigint_col` Nullable(Int64), `float_col` Nullable(Float32), `double_col` Nullable(Float64), `date_string_col` Nullable(String), `string_col` Nullable(String), `timestamp_col` Nullable(Int64)
diff --git a/tests/queries/0_stateless/data_parquet/alltypes_plain.parquet.columns b/tests/queries/0_stateless/data_parquet/alltypes_plain.parquet.columns
index e13d779fda2..cbc891b2ca7 100644
--- a/tests/queries/0_stateless/data_parquet/alltypes_plain.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/alltypes_plain.parquet.columns
@@ -1 +1 @@
-`id` Nullable(Int32), `bool_col` Nullable(UInt8), `tinyint_col` Nullable(Int32), `smallint_col` Nullable(Int32), `int_col` Nullable(Int32), `bigint_col` Nullable(Int64), `float_col` Nullable(Float32), `double_col` Nullable(Float64), `date_string_col` Nullable(String), `string_col` Nullable(String), `timestamp_col` Nullable(Int64)
\ No newline at end of file
+`id` Nullable(Int32), `bool_col` Nullable(UInt8), `tinyint_col` Nullable(Int32), `smallint_col` Nullable(Int32), `int_col` Nullable(Int32), `bigint_col` Nullable(Int64), `float_col` Nullable(Float32), `double_col` Nullable(Float64), `date_string_col` Nullable(String), `string_col` Nullable(String), `timestamp_col` Nullable(Int64)
diff --git a/tests/queries/0_stateless/data_parquet/alltypes_plain.snappy.parquet.columns b/tests/queries/0_stateless/data_parquet/alltypes_plain.snappy.parquet.columns
index e13d779fda2..cbc891b2ca7 100644
--- a/tests/queries/0_stateless/data_parquet/alltypes_plain.snappy.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/alltypes_plain.snappy.parquet.columns
@@ -1 +1 @@
-`id` Nullable(Int32), `bool_col` Nullable(UInt8), `tinyint_col` Nullable(Int32), `smallint_col` Nullable(Int32), `int_col` Nullable(Int32), `bigint_col` Nullable(Int64), `float_col` Nullable(Float32), `double_col` Nullable(Float64), `date_string_col` Nullable(String), `string_col` Nullable(String), `timestamp_col` Nullable(Int64)
\ No newline at end of file
+`id` Nullable(Int32), `bool_col` Nullable(UInt8), `tinyint_col` Nullable(Int32), `smallint_col` Nullable(Int32), `int_col` Nullable(Int32), `bigint_col` Nullable(Int64), `float_col` Nullable(Float32), `double_col` Nullable(Float64), `date_string_col` Nullable(String), `string_col` Nullable(String), `timestamp_col` Nullable(Int64)
diff --git a/tests/queries/0_stateless/data_parquet/binary.parquet b/tests/queries/0_stateless/data_parquet/binary.parquet
new file mode 100644
index 00000000000..fc8c04669d1
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/binary.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/binary.parquet.columns b/tests/queries/0_stateless/data_parquet/binary.parquet.columns
new file mode 100644
index 00000000000..8f3b137647f
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/binary.parquet.columns
@@ -0,0 +1 @@
+`foo` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/byte_array_decimal.parquet.columns b/tests/queries/0_stateless/data_parquet/byte_array_decimal.parquet.columns
index 668abaf93c3..cb2a97de8c4 100644
--- a/tests/queries/0_stateless/data_parquet/byte_array_decimal.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/byte_array_decimal.parquet.columns
@@ -1 +1 @@
-`value` Nullable(Decimal128(1))
\ No newline at end of file
+`value` Nullable(Decimal(4, 2))
diff --git a/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns b/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns
index 90f3f85a99d..d9e51028f22 100644
--- a/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/datapage_v2.snappy.parquet.columns
@@ -1 +1 @@
-`a` Nullable(String), `b` Nullable(Int32), `c` Nullable(Float64), `d` Nullable(UInt8), `element` Nullable(Int32)
\ No newline at end of file
+`a` Nullable(String), `b` Nullable(Int32), `c` Nullable(Float64), `d` Nullable(UInt8), `e` Nullable(Int32)
diff --git a/tests/queries/0_stateless/data_parquet/dict-page-offset-zero.parquet b/tests/queries/0_stateless/data_parquet/dict-page-offset-zero.parquet
new file mode 100644
index 00000000000..f9dbd7fd5e2
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/dict-page-offset-zero.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/dict-page-offset-zero.parquet.columns b/tests/queries/0_stateless/data_parquet/dict-page-offset-zero.parquet.columns
new file mode 100644
index 00000000000..1ea0876ce95
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/dict-page-offset-zero.parquet.columns
@@ -0,0 +1 @@
+`l_partkey` Nullable(Int32)
diff --git a/tests/queries/0_stateless/data_parquet/fixed_length_decimal.parquet b/tests/queries/0_stateless/data_parquet/fixed_length_decimal.parquet
new file mode 100644
index 00000000000..69fce531e4d
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/fixed_length_decimal.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/fixed_length_decimal.parquet.columns b/tests/queries/0_stateless/data_parquet/fixed_length_decimal.parquet.columns
new file mode 100644
index 00000000000..469105337a6
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/fixed_length_decimal.parquet.columns
@@ -0,0 +1 @@
+`value` Nullable(Decimal(25, 2))
diff --git a/tests/queries/0_stateless/data_parquet/fixed_length_decimal_1.parquet.columns b/tests/queries/0_stateless/data_parquet/fixed_length_decimal_1.parquet.columns
index 668abaf93c3..469105337a6 100644
--- a/tests/queries/0_stateless/data_parquet/fixed_length_decimal_1.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/fixed_length_decimal_1.parquet.columns
@@ -1 +1 @@
-`value` Nullable(Decimal128(1))
\ No newline at end of file
+`value` Nullable(Decimal(25, 2))
diff --git a/tests/queries/0_stateless/data_parquet/fixed_length_decimal_legacy.parquet.columns b/tests/queries/0_stateless/data_parquet/fixed_length_decimal_legacy.parquet.columns
index 668abaf93c3..5e61877db58 100644
--- a/tests/queries/0_stateless/data_parquet/fixed_length_decimal_legacy.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/fixed_length_decimal_legacy.parquet.columns
@@ -1 +1 @@
-`value` Nullable(Decimal128(1))
\ No newline at end of file
+`value` Nullable(Decimal(13, 2))
diff --git a/tests/queries/0_stateless/data_parquet/hadoop_lz4_compressed.parquet b/tests/queries/0_stateless/data_parquet/hadoop_lz4_compressed.parquet
new file mode 100644
index 00000000000..b5fadcd49c3
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/hadoop_lz4_compressed.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/hadoop_lz4_compressed.parquet.columns b/tests/queries/0_stateless/data_parquet/hadoop_lz4_compressed.parquet.columns
new file mode 100644
index 00000000000..5a0c330c88f
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/hadoop_lz4_compressed.parquet.columns
@@ -0,0 +1 @@
+`c0` Nullable(Int64), `c1` Nullable(String), `v11` Nullable(Float64)
diff --git a/tests/queries/0_stateless/data_parquet/int32_decimal.parquet.columns b/tests/queries/0_stateless/data_parquet/int32_decimal.parquet.columns
index 668abaf93c3..cb2a97de8c4 100644
--- a/tests/queries/0_stateless/data_parquet/int32_decimal.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/int32_decimal.parquet.columns
@@ -1 +1 @@
-`value` Nullable(Decimal128(1))
\ No newline at end of file
+`value` Nullable(Decimal(4, 2))
diff --git a/tests/queries/0_stateless/data_parquet/int64_decimal.parquet.columns b/tests/queries/0_stateless/data_parquet/int64_decimal.parquet.columns
index 668abaf93c3..3624a571970 100644
--- a/tests/queries/0_stateless/data_parquet/int64_decimal.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/int64_decimal.parquet.columns
@@ -1 +1 @@
-`value` Nullable(Decimal128(1))
\ No newline at end of file
+`value` Nullable(Decimal(10, 2))
diff --git a/tests/queries/0_stateless/data_parquet/list_columns.parquet b/tests/queries/0_stateless/data_parquet/list_columns.parquet
new file mode 100644
index 00000000000..ecd7597e2fb
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/list_columns.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/list_columns.parquet.columns b/tests/queries/0_stateless/data_parquet/list_columns.parquet.columns
new file mode 100644
index 00000000000..86745c2f074
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/list_columns.parquet.columns
@@ -0,0 +1 @@
+`int64_list` Nullable(Int64), `utf8_list` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/nation.dict-malformed.parquet.columns b/tests/queries/0_stateless/data_parquet/nation.dict-malformed.parquet.columns
index dbfe40811d7..34513a1a8e2 100644
--- a/tests/queries/0_stateless/data_parquet/nation.dict-malformed.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/nation.dict-malformed.parquet.columns
@@ -1 +1 @@
-`nation_key` Nullable(Int32), `name` Nullable(String), `region_key` Nullable(Int32), `comment_col` Nullable(String)
\ No newline at end of file
+`nation_key` Nullable(Int32), `name` Nullable(String), `region_key` Nullable(Int32), `comment_col` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/nested_lists.snappy.parquet.columns b/tests/queries/0_stateless/data_parquet/nested_lists.snappy.parquet.columns
index e939719c71c..6d55d46dd5b 100644
--- a/tests/queries/0_stateless/data_parquet/nested_lists.snappy.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/nested_lists.snappy.parquet.columns
@@ -1 +1 @@
-`element` Nullable(String), `b` Nullable(Int32)
\ No newline at end of file
+`a` Nullable(String), `b` Nullable(Int32)
diff --git a/tests/queries/0_stateless/data_parquet/nested_maps.snappy.parquet.columns b/tests/queries/0_stateless/data_parquet/nested_maps.snappy.parquet.columns
index c0ac26b2478..d5e9599431b 100644
--- a/tests/queries/0_stateless/data_parquet/nested_maps.snappy.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/nested_maps.snappy.parquet.columns
@@ -1 +1 @@
-`key` Nullable(String), `key1` Nullable(Int32), `value` Nullable(UInt8), `b` Nullable(Int32), `c` Nullable(Float64)
\ No newline at end of file
+`a` Tuple(Nullable(String), Nullable(Int32), Nullable(UInt8)), `b` Nullable(Int32), `c` Nullable(Float64)
diff --git a/tests/queries/0_stateless/data_parquet/non_hadoop_lz4_compressed.parquet b/tests/queries/0_stateless/data_parquet/non_hadoop_lz4_compressed.parquet
new file mode 100644
index 00000000000..cfbdc7ef2db
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/non_hadoop_lz4_compressed.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/non_hadoop_lz4_compressed.parquet.columns b/tests/queries/0_stateless/data_parquet/non_hadoop_lz4_compressed.parquet.columns
new file mode 100644
index 00000000000..5a0c330c88f
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/non_hadoop_lz4_compressed.parquet.columns
@@ -0,0 +1 @@
+`c0` Nullable(Int64), `c1` Nullable(String), `v11` Nullable(Float64)
diff --git a/tests/queries/0_stateless/data_parquet/nonnullable.impala.parquet.columns b/tests/queries/0_stateless/data_parquet/nonnullable.impala.parquet.columns
index c12ef33a2bd..6d724200aec 100644
--- a/tests/queries/0_stateless/data_parquet/nonnullable.impala.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/nonnullable.impala.parquet.columns
@@ -1 +1 @@
-`ID` Nullable(Int64), `element` Nullable(Int32), `element2` Nullable(Int32), `key` Nullable(String), `value` Nullable(Int32), `key5` Nullable(String), `value6` Nullable(Int32), `a` Nullable(Int32), `element8` Nullable(Int32), `e` Nullable(Int32), `f` Nullable(String), `key11` Nullable(String), `element12` Nullable(Float64)
\ No newline at end of file
+`ID` Nullable(Int64), `Int_Array` Nullable(Int32), `int_array_array` Nullable(Int32), `Int_Map` Tuple(Nullable(String), Nullable(Int32)), `int_map_array` Tuple(Nullable(String), Nullable(Int32)), `nested_Struct` Tuple(Nullable(Int32), Nullable(Int32), Nullable(Int32), Nullable(String), Nullable(String), Nullable(Float64))
diff --git a/tests/queries/0_stateless/data_parquet/nullable.impala.parquet.columns b/tests/queries/0_stateless/data_parquet/nullable.impala.parquet.columns
index f9d7716b7f1..b5e122585d7 100644
--- a/tests/queries/0_stateless/data_parquet/nullable.impala.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/nullable.impala.parquet.columns
@@ -1 +1 @@
-`id` Nullable(Int64), `element` Nullable(Int32), `element2` Nullable(Int32), `key` Nullable(String), `value` Nullable(Int32), `key5` Nullable(String), `value6` Nullable(Int32), `A` Nullable(Int32), `element8` Nullable(Int32), `E` Nullable(Int32), `F` Nullable(String), `key11` Nullable(String), `element12` Nullable(Float64)
\ No newline at end of file
+`id` Nullable(Int64), `int_array` Nullable(Int32), `int_array_Array` Nullable(Int32), `int_map` Tuple(Nullable(String), Nullable(Int32)), `int_Map_Array` Tuple(Nullable(String), Nullable(Int32)), `nested_struct` Tuple(Nullable(Int32), Nullable(Int32), Nullable(Int32), Nullable(String), Nullable(String), Nullable(Float64))
diff --git a/tests/queries/0_stateless/data_parquet/nulls.snappy.parquet.columns b/tests/queries/0_stateless/data_parquet/nulls.snappy.parquet.columns
index b3824c5c7b8..a99b8b80eac 100644
--- a/tests/queries/0_stateless/data_parquet/nulls.snappy.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/nulls.snappy.parquet.columns
@@ -1 +1 @@
-`b_c_int` Nullable(Int32)
\ No newline at end of file
+`b_struct` Nullable(Int32)
diff --git a/tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet.columns b/tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet.columns
deleted file mode 100644
index 68517e0b499..00000000000
--- a/tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet.columns
+++ /dev/null
@@ -1 +0,0 @@
-`id` Nullable(Int32), `number` Nullable(Int64), `kind` Nullable(String)
\ No newline at end of file
diff --git a/tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet b/tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet.disabled
similarity index 100%
rename from tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet
rename to tests/queries/0_stateless/data_parquet/repeated_no_annotation.parquet.disabled
diff --git a/tests/queries/0_stateless/data_parquet/single_nan.parquet b/tests/queries/0_stateless/data_parquet/single_nan.parquet
new file mode 100644
index 00000000000..84dac10f00d
Binary files /dev/null and b/tests/queries/0_stateless/data_parquet/single_nan.parquet differ
diff --git a/tests/queries/0_stateless/data_parquet/single_nan.parquet.columns b/tests/queries/0_stateless/data_parquet/single_nan.parquet.columns
new file mode 100644
index 00000000000..f2bb48365be
--- /dev/null
+++ b/tests/queries/0_stateless/data_parquet/single_nan.parquet.columns
@@ -0,0 +1 @@
+`mycol` Nullable(Float64)
diff --git a/tests/queries/0_stateless/data_parquet/userdata1.parquet.columns b/tests/queries/0_stateless/data_parquet/userdata1.parquet.columns
index a1fa01e30ae..93e617f0467 100644
--- a/tests/queries/0_stateless/data_parquet/userdata1.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/userdata1.parquet.columns
@@ -1 +1 @@
-`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
\ No newline at end of file
+`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/userdata2.parquet.columns b/tests/queries/0_stateless/data_parquet/userdata2.parquet.columns
index a1fa01e30ae..93e617f0467 100644
--- a/tests/queries/0_stateless/data_parquet/userdata2.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/userdata2.parquet.columns
@@ -1 +1 @@
-`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
\ No newline at end of file
+`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/userdata3.parquet.columns b/tests/queries/0_stateless/data_parquet/userdata3.parquet.columns
index a1fa01e30ae..93e617f0467 100644
--- a/tests/queries/0_stateless/data_parquet/userdata3.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/userdata3.parquet.columns
@@ -1 +1 @@
-`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
\ No newline at end of file
+`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/userdata4.parquet.columns b/tests/queries/0_stateless/data_parquet/userdata4.parquet.columns
index a1fa01e30ae..93e617f0467 100644
--- a/tests/queries/0_stateless/data_parquet/userdata4.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/userdata4.parquet.columns
@@ -1 +1 @@
-`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
\ No newline at end of file
+`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/userdata5.parquet.columns b/tests/queries/0_stateless/data_parquet/userdata5.parquet.columns
index a1fa01e30ae..93e617f0467 100644
--- a/tests/queries/0_stateless/data_parquet/userdata5.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/userdata5.parquet.columns
@@ -1 +1 @@
-`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
\ No newline at end of file
+`registration_dttm` Nullable(Int64), `id` Nullable(Int32), `first_name` Nullable(String), `last_name` Nullable(String), `email` Nullable(String), `gender` Nullable(String), `ip_address` Nullable(String), `cc` Nullable(String), `country` Nullable(String), `birthdate` Nullable(String), `salary` Nullable(Float64), `title` Nullable(String), `comments` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/v0.7.1.all-named-index.parquet.columns b/tests/queries/0_stateless/data_parquet/v0.7.1.all-named-index.parquet.columns
index bcb204a577a..3f152e35001 100644
--- a/tests/queries/0_stateless/data_parquet/v0.7.1.all-named-index.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/v0.7.1.all-named-index.parquet.columns
@@ -1 +1 @@
-`carat` Nullable(Float64), `depth` Nullable(Float64), `table` Nullable(Float64), `price` Nullable(Int64), `x` Nullable(Float64), `y` Nullable(Float64), `z` Nullable(Float64), `cut` Nullable(String), `color` Nullable(String), `clarity` Nullable(String)
\ No newline at end of file
+`carat` Nullable(Float64), `depth` Nullable(Float64), `table` Nullable(Float64), `price` Nullable(Int64), `x` Nullable(Float64), `y` Nullable(Float64), `z` Nullable(Float64), `cut` Nullable(String), `color` Nullable(String), `clarity` Nullable(String)
diff --git a/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns b/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns
index b79ebb7e612..3d08da2522c 100644
--- a/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/v0.7.1.column-metadata-handling.parquet.columns
@@ -1 +1 @@
-`a` Nullable(Int64), `b` Nullable(Float64), `c` Nullable(DateTime), `index` Nullable(String), `__index_level_1__` Nullable(DateTime)
\ No newline at end of file
+`a` Nullable(Int64), `b` Nullable(Float64), `c` Nullable(DateTime), `index` Nullable(String), `__index_level_1__` Nullable(DateTime)
diff --git a/tests/queries/0_stateless/data_parquet/v0.7.1.parquet.columns b/tests/queries/0_stateless/data_parquet/v0.7.1.parquet.columns
index 70c607038ee..57a97f5fce9 100644
--- a/tests/queries/0_stateless/data_parquet/v0.7.1.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/v0.7.1.parquet.columns
@@ -1 +1 @@
-`carat` Nullable(Float64), `cut` Nullable(String), `color` Nullable(String), `clarity` Nullable(String), `depth` Nullable(Float64), `table` Nullable(Float64), `price` Nullable(Int64), `x` Nullable(Float64), `y` Nullable(Float64), `z` Nullable(Float64), `__index_level_0__` Nullable(Int64)
\ No newline at end of file
+`carat` Nullable(Float64), `cut` Nullable(String), `color` Nullable(String), `clarity` Nullable(String), `depth` Nullable(Float64), `table` Nullable(Float64), `price` Nullable(Int64), `x` Nullable(Float64), `y` Nullable(Float64), `z` Nullable(Float64), `__index_level_0__` Nullable(Int64)
diff --git a/tests/queries/0_stateless/data_parquet/v0.7.1.some-named-index.parquet.columns b/tests/queries/0_stateless/data_parquet/v0.7.1.some-named-index.parquet.columns
index cde1175cb0d..50b4cb1dfbc 100644
--- a/tests/queries/0_stateless/data_parquet/v0.7.1.some-named-index.parquet.columns
+++ b/tests/queries/0_stateless/data_parquet/v0.7.1.some-named-index.parquet.columns
@@ -1 +1 @@
-`carat` Nullable(Float64), `depth` Nullable(Float64), `table` Nullable(Float64), `price` Nullable(Int64), `x` Nullable(Float64), `y` Nullable(Float64), `z` Nullable(Float64), `cut` Nullable(String), `__index_level_1__` Nullable(String), `clarity` Nullable(String)
\ No newline at end of file
+`carat` Nullable(Float64), `depth` Nullable(Float64), `table` Nullable(Float64), `price` Nullable(Int64), `x` Nullable(Float64), `y` Nullable(Float64), `z` Nullable(Float64), `cut` Nullable(String), `__index_level_1__` Nullable(String), `clarity` Nullable(String)
diff --git a/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py b/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py
new file mode 100755
index 00000000000..1a41da8c8b4
--- /dev/null
+++ b/tests/queries/0_stateless/helpers/00900_parquet_create_table_columns.py
@@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+
+import json
+import sys
+
+TYPE_PARQUET_CONVERTED_TO_CLICKHOUSE = {
+    "TIMESTAMP_MICROS": "DateTime",
+    "TIMESTAMP_MILLIS": "DateTime",
+    "UTF8": "String",
+}
+
+TYPE_PARQUET_PHYSICAL_TO_CLICKHOUSE = {
+    "BOOLEAN": "UInt8",
+    "INT32": "Int32",
+    "INT64": "Int64",
+    "FLOAT": "Float32",
+    "DOUBLE": "Float64",
+    "BYTE_ARRAY": "String",
+    "INT96": "Int64", # TODO!
+}
+
+def read_file(filename):
+    with open(filename, "rb") as f:
+        return f.read().decode("raw_unicode_escape")
+
+def get_column_name(column):
+    return column["Name"].split(".", 1)[0]
+
+def resolve_clickhouse_column_type(column):
+    column_name = get_column_name(column)
+    logical_type = column.get("LogicalType", {})
+    converted_type = column.get("ConvertedType", "").upper()
+    physical_type = column.get("PhysicalType", "").upper()
+    if logical_type and logical_type.get("Type", "").upper() == "DECIMAL":
+        precision = int(logical_type["precision"])
+        scale = int(logical_type["scale"])
+        if precision < 1 or precision > 76:
+            raise RuntimeError("Column {} has invalid Decimal precision {}".format(column_name, precision))
+        if precision > 38:
+            raise RuntimeError("Column {} has unsupported Decimal precision {}".format(column_name, precision))
+        if scale < 0 or scale > precision:
+            raise RuntimeError("Column {} has invalid Decimal scale {} for precision {}".format(column_name, scale, precision))
+        return "Decimal({}, {})".format(precision, scale)
+    if converted_type and converted_type != "NONE":
+        result_type = TYPE_PARQUET_CONVERTED_TO_CLICKHOUSE.get(converted_type)
+        if result_type:
+            return result_type
+        raise RuntimeError("Column {} has unknown ConvertedType: {}".format(column_name, converted_type))
+    if physical_type and physical_type != "NONE":
+        result_type = TYPE_PARQUET_PHYSICAL_TO_CLICKHOUSE.get(physical_type)
+        if result_type:
+            return result_type
+        raise RuntimeError("Column {} has unknown PhysicalType: {}".format(column_name, physical_type))
+    raise RuntimeError("Column {} has invalid types: ConvertedType={}, PhysicalType={}".format(column_name, converted_type, physical_type))
+
+def dump_columns(obj):
+    descr_by_column_name = {}
+    columns_descr = []
+    for column in obj["Columns"]:
+        column_name = get_column_name(column)
+        column_type = resolve_clickhouse_column_type(column)
+        result_type = "Nullable({})".format(column_type)
+        if column_name in descr_by_column_name:
+            descr = descr_by_column_name[column_name]
+            descr["types"].append(result_type)
+        else:
+            descr = {
+                "name": column_name,
+                "types": [result_type],
+            }
+            descr_by_column_name[column_name] = descr
+            columns_descr.append(descr)
+
+    # Make tuples from nested types. CH Server doesn't support such Arrow type but it makes Server Exceptions more relevant.
+    def _format_type(types):
+        if len(types) == 1:
+            return types[0]
+        else:
+            return "Tuple({})".format(", ".join(types))
+
+    print(", ".join(map(lambda descr: "`{}` {}".format(descr["name"], _format_type(descr["types"])), columns_descr)))
+
+def dump_columns_from_file(filename):
+    dump_columns(json.loads(read_file(filename), strict=False))
+
+if __name__ == "__main__":
+    filename = sys.argv[1]
+    dump_columns_from_file(filename)
diff --git a/tests/queries/query_test.py b/tests/queries/query_test.py
index 34a23c49d36..c4e7e613175 100644
--- a/tests/queries/query_test.py
+++ b/tests/queries/query_test.py
@@ -20,6 +20,7 @@ SKIP_LIST = [
     "00505_shard_secure",
     "00506_union_distributed",  # flaky
     "00646_url_engine",
+    "00821_distributed_storage_with_join_on.sql",  # flaky
     "00834_cancel_http_readonly_queries_on_client_close",
     "00933_test_fix_extra_seek_on_compressed_cache",
     "00965_logs_level_bugfix",
@@ -31,6 +32,7 @@ SKIP_LIST = [
     "01018_ip_dictionary",
     "01023_materialized_view_query_context",  # flaky
     "01035_lc_empty_part_bug",  # flaky
+    "01037_polygon_dicts_simple_functions.sh",  # flaky
     "01046_materialized_view_with_join_over_distributed",  # flaky
     "01050_clickhouse_dict_source_with_subquery",
     "01053_ssd_dictionary",
@@ -45,6 +47,7 @@ SKIP_LIST = [
     "01103_check_cpu_instructions_at_startup",
     "01114_database_atomic",
     "01148_zookeeper_path_macros_unfolding",
+    "01193_metadata_loading.sh",  # flaky
     "01274_alter_rename_column_distributed",  # flaky
     "01280_ssd_complex_key_dictionary",
     "01293_client_interactive_vertical_multiline",  # expect-test
@@ -58,6 +61,7 @@ SKIP_LIST = [
     "01320_create_sync_race_condition_zookeeper",
     "01355_CSV_input_format_allow_errors",
     "01370_client_autocomplete_word_break_characters",  # expect-test
+    "01375_storage_file_tsv_csv_with_names_write_prefix",  # flaky
     "01376_GROUP_BY_injective_elimination_dictGet",
     "01393_benchmark_secure_port",
     "01418_custom_settings",
diff --git a/tests/queries/skip_list.json b/tests/queries/skip_list.json
index f3cc522be97..14c5cd5a55e 100644
--- a/tests/queries/skip_list.json
+++ b/tests/queries/skip_list.json
@@ -107,6 +107,151 @@
         "01508_partition_pruning", /// bug, shoud be fixed
         "01482_move_to_prewhere_and_cast" /// bug, shoud be fixed
     ],
+    "antlr": [
+        "00763_create_query_as_table_engine_bug",
+        "00765_sql_compatibility_aliases",
+        "00826_cross_to_inner_join",
+        "00834_not_between",
+        "00939_limit_by_offset",
+        "00969_columns_clause",
+        "00975_values_list",
+        "00976_system_stop_ttl_merges",
+        "00978_table_function_values_alias",
+        "00980_merge_alter_settings",
+        "00980_zookeeper_merge_tree_alter_settings",
+        "00982_array_enumerate_uniq_ranked",
+        "00984_materialized_view_to_columns",
+        "00988_constraints_replication_zookeeper",
+        "00995_order_by_with_fill",
+        "01001_enums_in_in_section",
+        "01011_group_uniq_array_memsan",
+        "01011_test_create_as_skip_indices",
+        "01015_attach_part",
+        "01015_database_bad_tables",
+        "01017_uniqCombined_memory_usage",
+        "01019_alter_materialized_view_query",
+        "01021_tuple_parser",
+        "01025_array_compact_generic",
+        "01033_quota_dcl",
+        "01034_with_fill_and_push_down_predicate",
+        "01039_row_policy_dcl",
+        "01039_test_setting_parse",
+        "01045_dictionaries_restrictions",
+        "01048_exists_query",
+        "01055_compact_parts_1",
+        "01056_create_table_as",
+        "01066_bit_count",
+        "01070_materialize_ttl",
+        "01070_mutations_with_dependencies",
+        "01073_grant_and_revoke",
+        "01073_show_tables_not_like",
+        "01074_partial_revokes",
+        "01075_allowed_client_hosts",
+        "01089_alter_settings_old_format",
+        "01095_tpch_like_smoke",
+        "01109_exchange_tables",
+        "01109_sc0rp10_string_hash_map_zero_bytes",
+        "01110_dictionary_layout_without_arguments",
+        "01114_materialize_clear_index_compact_parts",
+        "01115_join_with_dictionary",
+        "01117_comma_and_others_join_mix",
+        "01125_dict_ddl_cannot_add_column",
+        "01130_in_memory_parts",
+        "01144_multiple_joins_rewriter_v2_and_lambdas",
+        "01144_multiword_data_types",
+        "01145_with_fill_const",
+        "01149_zookeeper_mutation_stuck_after_replace_partition",
+        "01188_attach_table_from_path",
+        "01190_full_attach_syntax",
+        "01191_rename_dictionary",
+        "01210_drop_view",
+        "01213_alter_rename_column",
+        "01232_untuple",
+        "01244_optimize_distributed_group_by_sharding_key",
+        "01254_dict_load_after_detach_attach",
+        "01256_misspell_layout_name_podshumok",
+        "01257_dictionary_mismatch_types",
+        "01267_alter_default_key_columns_zookeeper",
+        "01268_mv_scalars",
+        "01269_create_with_null",
+        "01271_show_privileges",
+        "01272_offset_without_limit",
+        "01275_parallel_mv",
+        "01277_alter_rename_column_constraint_zookeeper",
+        "01280_min_map_max_map",
+        "01280_null_in",
+        "01280_ttl_where_group_by_negative",
+        "01280_unicode_whitespaces_lexer",
+        "01292_create_user",
+        "01293_create_role",
+        "01293_pretty_max_value_width",
+        "01293_show_settings",
+        "01294_create_settings_profile",
+        "01295_create_row_policy",
+        "01296_create_row_policy_in_current_database",
+        "01297_create_quota",
+        "01308_row_policy_and_trivial_count_query",
+        "01318_map_add_map_subtract",
+        "01322_any_input_optimize",
+        "01324_if_transform_strings_to_enum",
+        "01337_mysql_global_variables",
+        "01355_alter_column_with_order",
+        "01355_ilike",
+        "01373_is_zero_or_null",
+        "01374_if_nullable_filimonov",
+        "01378_alter_rename_with_ttl_zookeeper",
+        "01379_with_fill_several_columns",
+        "01397_in_bad_arguments",
+        "01415_table_function_view",
+        "01419_merge_tree_settings_sanity_check",
+        "01430_modify_sample_by_zookeeper",
+        "01447_json_strings",
+        "01449_json_compact_strings",
+        "01451_detach_drop_part",
+        "01451_replicated_detach_drop_and_quorum",
+        "01451_replicated_detach_drop_part",
+        "01457_create_as_table_function_structure",
+        "01460_allow_dollar_and_number_in_identifier",
+        "01463_test_alter_live_view_refresh",
+        "01465_ttl_recompression",
+        "01470_columns_transformers",
+        "01470_explain",
+        "01470_show_databases_like",
+        "01470_test_insert_select_asterisk",
+        "01491_nested_multiline_comments",
+        "01493_table_function_null",
+        "01495_subqueries_in_with_statement",
+        "01495_subqueries_in_with_statement_2",
+        "01495_subqueries_in_with_statement_3",
+        "01504_compression_multiple_streams",
+        "01508_explain_header",
+        "01515_mv_and_array_join_optimisation_bag",
+        "01516_create_table_primary_key",
+        "01517_drop_mv_with_inner_table",
+        "01523_interval_operator_support_string_literal",
+        "01525_select_with_offset_fetch_clause",
+        "01526_client_start_and_exit",
+        "01529_union_distinct_and_setting_union_default_mode",
+        "01530_drop_database_atomic_sync",
+        "01532_execute_merges_on_single_replica",
+        "01532_primary_key_without_order_by_zookeeper",
+        "01551_mergetree_read_in_order_spread",
+        "01552_dict_fixedstring",
+        "01554_bloom_filter_index_big_integer_uuid",
+        "01556_explain_select_with_union_query",
+        "01561_aggregate_functions_of_key_with_join",
+        "01562_optimize_monotonous_functions_in_order_by",
+        "01581_deduplicate_by_columns_local",
+        "01581_deduplicate_by_columns_replicated",
+        "01590_countSubstrings",
+        "01593_insert_settings",
+        "01596_setting_limit_offset",
+        "01601_detach_permanently",
+        "01603_read_with_backoff_bug",
+        "01604_explain_ast_of_nonselect_query",
+        "01605_skip_idx_compact_parts",
+        "01606_git_import"
+    ],
     "parallel":
     [
         /// Pessimistic list of tests which work badly in parallel.
diff --git a/tests/server-test.xml b/tests/server-test.xml
index cf713ca03f5..0b5e8f760a8 100644
--- a/tests/server-test.xml
+++ b/tests/server-test.xml
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<!-- Config for test server -->
+<!-- Config for test server in Arcadia -->
 <yandex>
     <logger>
         <level>trace</level>
@@ -140,4 +140,4 @@
             <replace>[hidden]</replace>
         </rule>
     </query_masking_rules>
-</yandex>
+</yandex>
\ No newline at end of file
diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index 6727d5bd243..d3653deb980 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -131,4 +131,8 @@ find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
 # Forbid stringstream because it's easy to use them incorrectly and hard to debug possible issues
 find $ROOT_PATH/{src,programs,utils} -name '*.h' -or -name '*.cpp' |
     grep -vP $EXCLUDE_DIRS |
-    xargs grep 'std::[io]\?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
+    xargs grep -P 'std::[io]?stringstream' | grep -v "STYLE_CHECK_ALLOW_STD_STRING_STREAM" && echo "Use WriteBufferFromOwnString or ReadBufferFromString instead of std::stringstream"
+
+# Conflict markers
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
+    xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
diff --git a/utils/github/backport.py b/utils/github/backport.py
index f303be23ac4..c51c84e6680 100644
--- a/utils/github/backport.py
+++ b/utils/github/backport.py
@@ -1,8 +1,13 @@
 # -*- coding: utf-8 -*-
 
-from clickhouse.utils.github.cherrypick import CherryPick
-from clickhouse.utils.github.query import Query as RemoteRepo
-from clickhouse.utils.github.local import Repository as LocalRepo
+try:
+    from clickhouse.utils.github.cherrypick import CherryPick
+    from clickhouse.utils.github.query import Query as RemoteRepo
+    from clickhouse.utils.github.local import Repository as LocalRepo
+except:
+    from .cherrypick import CherryPick
+    from .query import Query as RemoteRepo
+    from .local import Repository as LocalRepo
 
 import argparse
 import logging
@@ -20,9 +25,25 @@ class Backport:
     def getPullRequests(self, from_commit):
         return self._gh.get_pull_requests(from_commit)
 
-    def execute(self, repo, until_commit, number, run_cherrypick):
+    def getBranchesWithLTS(self):
+        branches = []
+        for pull_request in self._gh.find_pull_requests("release-lts"):
+            if not pull_request['merged'] and not pull_request['closed']:
+                branches.append(pull_request['headRefName'])
+        return branches
+
+    def execute(self, repo, until_commit, number, run_cherrypick, find_lts=False):
         repo = LocalRepo(repo, 'origin', self.default_branch_name)
-        branches = repo.get_release_branches()[-number:]  # [(branch_name, base_commit)]
+        all_branches = repo.get_release_branches()  # [(branch_name, base_commit)]
+
+        last_branches = set([branch[0] for branch in all_branches[-number:]])
+        lts_branches = set(self.getBranchesWithLTS() if find_lts else [])
+
+        branches = []
+        # iterate over all branches to preserve their precedence.
+        for branch in all_branches:
+            if branch in last_branches or branch in lts_branches:
+                branches.append(branch)
 
         if not branches:
             logging.info('No release branches found!')
@@ -95,6 +116,7 @@ if __name__ == "__main__":
     parser.add_argument('--repo',      type=str, required=True, help='path to full repository', metavar='PATH')
     parser.add_argument('--til',       type=str,                help='check PRs from HEAD til this commit', metavar='COMMIT')
     parser.add_argument('-n',          type=int, dest='number', help='number of last release branches to consider')
+    parser.add_argument('--lts',       action='store_true',     help='consider branches with LTS')
     parser.add_argument('--dry-run',   action='store_true',     help='do not create or merge any PRs', default=False)
     parser.add_argument('--verbose', '-v', action='store_true', help='more verbose output', default=False)
     args = parser.parse_args()
@@ -106,4 +128,4 @@ if __name__ == "__main__":
 
     cherrypick_run = lambda token, pr, branch: CherryPick(token, 'ClickHouse', 'ClickHouse', 'core', pr, branch).execute(args.repo, args.dry_run)
     bp = Backport(args.token, 'ClickHouse', 'ClickHouse', 'core')
-    bp.execute(args.repo, args.til, args.number, cherrypick_run)
+    bp.execute(args.repo, args.til, args.number, cherrypick_run, args.lts)
diff --git a/utils/github/cherrypick.py b/utils/github/cherrypick.py
index e03c18305c4..89072b316b2 100644
--- a/utils/github/cherrypick.py
+++ b/utils/github/cherrypick.py
@@ -14,7 +14,10 @@ Second run checks PR from previous run to be merged or at least being mergeable.
 Third run creates PR from backport branch (with merged previous PR) to release branch.
 '''
 
-from clickhouse.utils.github.query import Query as RemoteRepo
+try:
+    from clickhouse.utils.github.query import Query as RemoteRepo
+except:
+    from .query import Query as RemoteRepo
 
 import argparse
 from enum import Enum
diff --git a/utils/github/query.py b/utils/github/query.py
index ac3ce5bffa9..628d3a12dfd 100644
--- a/utils/github/query.py
+++ b/utils/github/query.py
@@ -39,6 +39,7 @@ class Query:
 
         baseRefName
         closed
+        headRefName
         id
         mergeable
         merged
@@ -158,6 +159,24 @@ class Query:
         else:
             return {}
 
+    def find_pull_requests(self, label_name):
+        '''
+        Get all pull-requests filtered by label name
+        '''
+        _QUERY = '''
+            repository(owner: "{owner}" name: "{name}") {{
+                pullRequests(first: {min_page_size} labels: "{label_name}") {{
+                    nodes {{
+                        {pull_request_data}
+                    }}
+                }}
+            }}
+        '''
+
+        query = _QUERY.format(owner=self._owner, name=self._name, label_name=label_name,
+                              pull_request_data=self._PULL_REQUEST, min_page_size=self._min_page_size)
+        return self._run(query)['repository']['pullRequests']['nodes']
+
     def get_pull_requests(self, before_commit):
         '''
         Get all merged pull-requests from the HEAD of default branch to the last commit (excluding)
@@ -342,130 +361,6 @@ class Query:
         query = _SET_LABEL.format(pr_id=pull_request['id'], label_id=labels[0]['id'])
         self._run(query, is_mutation=True)
 
-    # OLD METHODS
-
-    # _LABELS = '''
-    #     repository(owner: "ClickHouse" name: "ClickHouse") {{
-    #         pullRequest(number: {number}) {{
-    #             labels(first: {max_page_size} {next}) {{
-    #                 pageInfo {{
-    #                     hasNextPage
-    #                     endCursor
-    #                 }}
-    #                 nodes {{
-    #                     name
-    #                     color
-    #                 }}
-    #             }}
-    #         }}
-    #     }}
-    # '''
-    # def get_labels(self, pull_request):
-    #     '''Fetchs all labels for given pull-request
-
-    #     Args:
-    #         pull_request: JSON object returned by `get_pull_requests()`
-
-    #     Returns:
-    #         labels: a list of JSON nodes with the name and color fields
-    #     '''
-    #     labels = [label for label in pull_request['labels']['nodes']]
-    #     not_end = pull_request['labels']['pageInfo']['hasNextPage']
-    #     query = Query._LABELS.format(number = pull_request['number'],
-    #                                  max_page_size = self._max_page_size,
-    #                                  next=f'after: "{pull_request["labels"]["pageInfo"]["endCursor"]}"')
-
-    #     while not_end:
-    #         result = self._run(query)['repository']['pullRequest']['labels']
-    #         not_end = result['pageInfo']['hasNextPage']
-    #         query = Query._LABELS.format(number=pull_request['number'],
-    #                                      max_page_size=self._max_page_size,
-    #                                      next=f'after: "{result["pageInfo"]["endCursor"]}"')
-
-    #         labels += [label for label in result['nodes']]
-
-    #     return labels
-
-    # _TIMELINE = '''
-    #     repository(owner: "ClickHouse" name: "ClickHouse") {{
-    #         pullRequest(number: {number}) {{
-    #             timeline(first: {max_page_size} {next}) {{
-    #                 pageInfo {{
-    #                     hasNextPage
-    #                     endCursor
-    #                 }}
-    #                 nodes {{
-    #                     ... on CrossReferencedEvent {{
-    #                         isCrossRepository
-    #                         source {{
-    #                             ... on PullRequest {{
-    #                                 number
-    #                                 baseRefName
-    #                                 merged
-    #                                 labels(first: {max_page_size}) {{
-    #                                     pageInfo {{
-    #                                         hasNextPage
-    #                                         endCursor
-    #                                     }}
-    #                                     nodes {{
-    #                                         name
-    #                                         color
-    #                                     }}
-    #                                 }}
-    #                             }}
-    #                         }}
-    #                         target {{
-    #                             ... on PullRequest {{
-    #                                 number
-    #                             }}
-    #                         }}
-    #                     }}
-    #                 }}
-    #             }}
-    #         }}
-    #     }}
-    # '''
-    # def get_timeline(self, pull_request):
-    #     '''Fetchs all cross-reference events from pull-request's timeline
-
-    #     Args:
-    #         pull_request: JSON object returned by `get_pull_requests()`
-
-    #     Returns:
-    #         events: a list of JSON nodes for CrossReferenceEvent
-    #     '''
-    #     events = [event for event in pull_request['timeline']['nodes'] if event and event['source']]
-    #     not_end = pull_request['timeline']['pageInfo']['hasNextPage']
-    #     query = Query._TIMELINE.format(number = pull_request['number'],
-    #                                    max_page_size = self._max_page_size,
-    #                                    next=f'after: "{pull_request["timeline"]["pageInfo"]["endCursor"]}"')
-
-    #     while not_end:
-    #         result = self._run(query)['repository']['pullRequest']['timeline']
-    #         not_end = result['pageInfo']['hasNextPage']
-    #         query = Query._TIMELINE.format(number=pull_request['number'],
-    #                                        max_page_size=self._max_page_size,
-    #                                        next=f'after: "{result["pageInfo"]["endCursor"]}"')
-
-    #         events += [event for event in result['nodes'] if event and event['source']]
-
-    #     return events
-
-    # _DEFAULT = '''
-    #     repository(owner: "ClickHouse", name: "ClickHouse") {
-    #         defaultBranchRef {
-    #             name
-    #         }
-    #     }
-    # '''
-    # def get_default_branch(self):
-    #     '''Get short name of the default branch
-
-    #     Returns:
-    #         name (string): branch name
-    #     '''
-    #     return self._run(Query._DEFAULT)['repository']['defaultBranchRef']['name']
-
     def _run(self, query, is_mutation=False):
         from requests.adapters import HTTPAdapter
         from urllib3.util.retry import Retry
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index cec9dbf1b08..e7e4b78c99b 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -1,9 +1,12 @@
+v20.12.4.5-stable	2020-12-24
 v20.12.3.3-stable	2020-12-09
 v20.12.2.1-stable	2020-12-09
+v20.11.6.6-stable	2020-12-24
 v20.11.5.18-stable	2020-12-06
 v20.11.4.13-stable	2020-11-20
 v20.11.3.3-stable	2020-11-13
 v20.11.2.1-stable	2020-11-11
+v20.10.7.4-stable	2020-12-24
 v20.10.6.27-stable	2020-12-06
 v20.10.5.10-stable	2020-11-20
 v20.10.4.1-stable	2020-11-13
@@ -15,6 +18,7 @@ v20.9.5.5-stable	2020-11-13
 v20.9.4.76-stable	2020-10-29
 v20.9.3.45-stable	2020-10-09
 v20.9.2.20-stable	2020-09-22
+v20.8.10.13-lts	2020-12-24
 v20.8.9.6-lts	2020-12-10
 v20.8.8.2-lts	2020-12-07
 v20.8.7.15-lts	2020-11-20
diff --git a/website/README.md b/website/README.md
index c8881ad5eb9..c2702c82157 100644
--- a/website/README.md
+++ b/website/README.md
@@ -6,6 +6,12 @@ ClickHouse website is built alongside it's documentation via [docs/tools](https:
 cd ../docs/tools
 sudo apt install python-3 pip
 pip3 install -r requirements.txt
+
+# This is needed only when documentation is included
+sudo npm install -g purify-css amphtml-validator
+sudo apt install wkhtmltopdf
+virtualenv build
+
 ./build.py --skip-multi-page --skip-single-page --skip-amp --skip-pdf --skip-blog --skip-git-log --skip-docs --skip-test-templates --livereload 8080
 
 # Open the web browser and go to http://localhost:8080/
diff --git a/website/main.html b/website/main.html
index 82b0f222344..ff59f6564ee 100644
--- a/website/main.html
+++ b/website/main.html
@@ -31,7 +31,7 @@
     {% set description = description.replace('¶','')[0:120] %}
 {% endif %}
 
-{% set data_version = config.extra.version_prefix or 'master' %}
+{% set data_version = 'master' %}
 {% set data_single_page = 'true' if config.extra.single_page else 'false' %}
 
 {% if is_amp %}
diff --git a/website/templates/common_meta.html b/website/templates/common_meta.html
index 89a650fba6a..0c4f5c30785 100644
--- a/website/templates/common_meta.html
+++ b/website/templates/common_meta.html
@@ -32,15 +32,15 @@
 <meta name="keywords"
       content="ClickHouse, DBMS, OLAP, SQL, {{ _('open-source') }}, {{ _('relational') }}, {{ _('analytics') }}, {{ _('analytical') }}, {{ _('Big Data') }}, {{ _('web-analytics') }}" />
 {% endif %}
-{% if config and (config.extra.single_page or config.extra.version_prefix) %}
+{% if config and config.extra.single_page %}
 <meta name="robots" content="noindex,follow" />
 {% endif %}
 
 {% if config and page and not is_blog %}
     {% for code, name in config.extra.languages.items() %}
-    <link rel="alternate" hreflang="{{ code }}" href="{{ config.extra.website_url }}/docs/{% if config.extra.version_prefix %}{{ config.extra.version_prefix }}/{% endif %}{{ code }}/{{ page.url }}" />
+    <link rel="alternate" hreflang="{{ code }}" href="{{ config.extra.website_url }}/docs/{{ code }}/{{ page.url }}" />
     {% endfor %}
-    <link rel="alternate" hreflang="x-default" href="{{ config.extra.website_url }}/docs/{% if config.extra.version_prefix %}{{ config.extra.version_prefix }}/{% endif %}en/{{ page.url }}" />
+    <link rel="alternate" hreflang="x-default" href="{{ config.extra.website_url }}/docs/en/{{ page.url }}" />
 {% endif %}
 
 {% for prefetch_item in prefetch_items %}
diff --git a/website/templates/docs/amp.html b/website/templates/docs/amp.html
index 9a68cc18e11..e5794465dfc 100644
--- a/website/templates/docs/amp.html
+++ b/website/templates/docs/amp.html
@@ -7,7 +7,7 @@
     <link rel="canonical" href="{{ canonical_url or 'https://clickhouse.tech/' }}" />
     <meta name="viewport" content="width=device-width,minimum-scale=1,initial-scale=1">
     {% include "templates/docs/ld_json.html" %}
-    {% if config.extra.single_page or config.extra.version_prefix %}
+    {% if config.extra.single_page %}
       <meta name="robots" content="noindex,follow" />
     {% endif %}
     <style amp-boilerplate>body{-webkit-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-moz-animation:-amp-start 8s steps(1,end) 0s 1 normal both;-ms-animation:-amp-start 8s steps(1,end) 0s 1 normal both;animation:-amp-start 8s steps(1,end) 0s 1 normal both}@-webkit-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-moz-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-ms-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@-o-keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}@keyframes -amp-start{from{visibility:hidden}to{visibility:visible}}</style><noscript><style amp-boilerplate>body{-webkit-animation:none;-moz-animation:none;-ms-animation:none;animation:none}</style></noscript>
diff --git a/website/templates/docs/nav.html b/website/templates/docs/nav.html
index b3c8b3a3181..047914c2b95 100644
--- a/website/templates/docs/nav.html
+++ b/website/templates/docs/nav.html
@@ -40,7 +40,7 @@
                         <div class="dropdown-menu bg-dark" aria-labelledby="lang-dropdown">
                             {% for code, name in config.extra.languages.items() %}
                             <a class="dropdown-item{% if language == code %} disabled{% endif %}"
-                               href="/docs/{% if config.extra.version_prefix %}{{ config.extra.version_prefix }}/{% endif %}{{ code }}/{{ page.url }}">
+                               href="/docs/{{ code }}/{{ page.url }}">
                                 <img src="/images/flags/{{ code }}.svg" alt="" title="" width="32" class="d-inline-block mr-2" />{{ name }}
                             </a>
                             {% endfor %}
diff --git a/website/templates/docs/sidebar.html b/website/templates/docs/sidebar.html
index a4ff0e780d8..794984686c1 100644
--- a/website/templates/docs/sidebar.html
+++ b/website/templates/docs/sidebar.html
@@ -8,23 +8,6 @@
                 <img src="/images/mkdocs/single.svg" alt="{{ _('Single-page version') }}" title="{{ _('Single-page version') }}" />
             </a>
         </div>
-        <div class="dropdown mb-3">
-            <a class="btn btn-dark dropdown-toggle text-muted" href="#" role="button" id="select-version" data-toggle="dropdown" aria-haspopup="true" aria-expanded="false">
-                {{ _('Version') }}: <span class="text-light">{{ config.extra.version_prefix or _('latest') }}</span>
-            </a>
-
-            <div class="dropdown-menu bg-dark text-muted p-1" aria-labelledby="select-version">
-                <a class="dropdown-item{% if not config.extra.version_prefix %} disable{% endif %}"
-                   href="/docs/{{ language }}/">
-                    <span class="text-light">{{ _('latest') }}</span>
-                </a>
-
-                {% for release in config.extra.stable_releases %}
-                    <a class="dropdown-item{% if release.0 == config.extra.version_prefix %} disable{% endif %}"
-                       href="/docs/{{ release.0 }}/{{ language }}/">{{ release.0 }}{% if release.1.2 %} LTS{% endif %}</a>
-                {% endfor %}
-            </div>
-        </div>
         {% if not single_page %}
         <nav id="sidebar-nav" class="nav flex-column mb-5">
             {% for nav_item in nav %}