diff --git a/cmake/find_hyperscan.cmake b/cmake/find_hyperscan.cmake
index 826ee555d53..a3e0b6bc9bc 100644
--- a/cmake/find_hyperscan.cmake
+++ b/cmake/find_hyperscan.cmake
@@ -1,7 +1,33 @@
 if (HAVE_SSSE3)
+    option (ENABLE_HYPERSCAN "Enable hyperscan" ON)
+endif ()
+
+if (ENABLE_HYPERSCAN)
+
+option (USE_INTERNAL_HYPERSCAN_LIBRARY "Set to FALSE to use system hyperscan instead of the bundled" ${NOT_UNBUNDLED})
+
+if (NOT EXISTS "${ClickHouse_SOURCE_DIR}/contrib/hyperscan/CMakeLists.txt")
+    if (USE_INTERNAL_HYPERSCAN_LIBRARY)
+        message (WARNING "submodule contrib/hyperscan is missing. to fix try run: \n git submodule update --init --recursive")
+    endif ()
+   set (MISSING_INTERNAL_HYPERSCAN_LIBRARY 1)
+   set (USE_INTERNAL_HYPERSCAN_LIBRARY 0)
+endif ()
+
+if (NOT USE_INTERNAL_HYPERSCAN_LIBRARY)
+    find_library (HYPERSCAN_LIBRARY hs)
+    find_path (HYPERSCAN_INCLUDE_DIR NAMES hs/hs.h hs.h PATHS ${HYPERSCAN_INCLUDE_PATHS})
+endif ()
+
+if (HYPERSCAN_LIBRARY AND HYPERSCAN_INCLUDE_DIR)
+    set (USE_HYPERSCAN 1)
+elseif (NOT MISSING_INTERNAL_HYPERSCAN_LIBRARY)
     set (HYPERSCAN_INCLUDE_DIR ${ClickHouse_SOURCE_DIR}/contrib/hyperscan/src)
     set (HYPERSCAN_LIBRARY hs)
     set (USE_HYPERSCAN 1)
     set (USE_INTERNAL_HYPERSCAN_LIBRARY 1)
-    message (STATUS "Using hyperscan: ${HYPERSCAN_INCLUDE_DIR} " : ${HYPERSCAN_LIBRARY})
 endif()
+
+message (STATUS "Using hyperscan=${USE_HYPERSCAN}: ${HYPERSCAN_INCLUDE_DIR} : ${HYPERSCAN_LIBRARY}")
+
+endif ()
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index dee9b3765de..03daaf8907b 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -305,6 +305,6 @@ if (USE_BASE64)
     add_subdirectory (base64-cmake)
 endif()
 
-if (USE_HYPERSCAN)
+if (USE_INTERNAL_HYPERSCAN_LIBRARY)
     add_subdirectory (hyperscan)
 endif()
diff --git a/contrib/boost b/contrib/boost
index 32abf16beb7..471ea208abb 160000
--- a/contrib/boost
+++ b/contrib/boost
@@ -1 +1 @@
-Subproject commit 32abf16beb7bb8b243a4d100ccdd6acb271738c4
+Subproject commit 471ea208abb92a5cba7d3a08a819bb728f27e95f
diff --git a/contrib/librdkafka b/contrib/librdkafka
index 73295a702cd..8695b9d63ac 160000
--- a/contrib/librdkafka
+++ b/contrib/librdkafka
@@ -1 +1 @@
-Subproject commit 73295a702cd1c85c11749ade500d713db7099cca
+Subproject commit 8695b9d63ac0fe1b891b511d5b36302ffc84d4e2
diff --git a/dbms/programs/client/Client.cpp b/dbms/programs/client/Client.cpp
index 24336822f88..1568f1cc544 100644
--- a/dbms/programs/client/Client.cpp
+++ b/dbms/programs/client/Client.cpp
@@ -42,6 +42,7 @@
 #include <IO/ReadBufferFromString.h>
 #include <IO/ReadHelpers.h>
 #include <IO/WriteHelpers.h>
+#include <IO/Operators.h>
 #include <IO/UseSSL.h>
 #include <DataStreams/AsynchronousBlockInputStream.h>
 #include <DataStreams/AddingDefaultsBlockInputStream.h>
@@ -1314,6 +1315,9 @@ private:
 
         /// Received data block is immediately displayed to the user.
         block_out_stream->flush();
+
+        /// Restore progress bar after data block.
+        writeProgress();
     }
 
 
@@ -1353,8 +1357,8 @@ private:
 
     void clearProgress()
     {
-        std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE;
         written_progress_chars = 0;
+        std::cerr << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE;
     }
 
 
@@ -1363,6 +1367,9 @@ private:
         if (!need_render_progress)
             return;
 
+        /// Output all progress bar commands to stderr at once to avoid flicker.
+        WriteBufferFromFileDescriptor message(STDERR_FILENO, 1024);
+
         static size_t increment = 0;
         static const char * indicators[8] =
         {
@@ -1377,13 +1384,15 @@ private:
         };
 
         if (written_progress_chars)
-            clearProgress();
+            message << RESTORE_CURSOR_POSITION CLEAR_TO_END_OF_LINE;
         else
-            std::cerr << SAVE_CURSOR_POSITION;
+            message << SAVE_CURSOR_POSITION;
+
+        message << DISABLE_LINE_WRAPPING;
+
+        size_t prefix_size = message.count();
 
-        std::stringstream message;
         message << indicators[increment % 8]
-            << std::fixed << std::setprecision(3)
             << " Progress: ";
 
         message
@@ -1398,8 +1407,7 @@ private:
         else
             message << ". ";
 
-        written_progress_chars = message.str().size() - (increment % 8 == 7 ? 10 : 13);
-        std::cerr << DISABLE_LINE_WRAPPING << message.rdbuf();
+        written_progress_chars = message.count() - prefix_size - (increment % 8 == 7 ? 10 : 13);    /// Don't count invisible output (escape sequences).
 
         /// If the approximate number of rows to process is known, we can display a progress bar and percentage.
         if (progress.total_rows > 0)
@@ -1421,19 +1429,21 @@ private:
                     if (width_of_progress_bar > 0)
                     {
                         std::string bar = UnicodeBar::render(UnicodeBar::getWidth(progress.rows, 0, total_rows_corrected, width_of_progress_bar));
-                        std::cerr << "\033[0;32m" << bar << "\033[0m";
+                        message << "\033[0;32m" << bar << "\033[0m";
                         if (width_of_progress_bar > static_cast<ssize_t>(bar.size() / UNICODE_BAR_CHAR_SIZE))
-                        std::cerr << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' ');
+                            message << std::string(width_of_progress_bar - bar.size() / UNICODE_BAR_CHAR_SIZE, ' ');
                     }
                 }
             }
 
             /// Underestimate percentage a bit to avoid displaying 100%.
-            std::cerr << ' ' << (99 * progress.rows / total_rows_corrected) << '%';
+            message << ' ' << (99 * progress.rows / total_rows_corrected) << '%';
         }
 
-        std::cerr << ENABLE_LINE_WRAPPING;
+        message << ENABLE_LINE_WRAPPING;
         ++increment;
+
+        message.next();
     }
 
 
diff --git a/dbms/src/Common/Volnitsky.h b/dbms/src/Common/Volnitsky.h
index d8fc42245bf..bce37e655cd 100644
--- a/dbms/src/Common/Volnitsky.h
+++ b/dbms/src/Common/Volnitsky.h
@@ -516,7 +516,7 @@ public:
     template <typename ResultType, typename CountCharsCallback>
     void searchFirstPosition(const ColumnString::Chars & haystack_data, const ColumnString::Offsets & haystack_offsets, const CountCharsCallback & count_chars_callback, ResultType & ans)
     {
-        auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> size_t
+        auto callback = [this, &count_chars_callback](const UInt8 * haystack, const UInt8 * haystack_end) -> UInt64
         {
             return this->searchOneFirstPosition(haystack, haystack_end, count_chars_callback);
         };
@@ -676,11 +676,11 @@ private:
     }
 
     template <typename CountCharsCallback>
-    inline size_t searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const
+    inline UInt64 searchOneFirstPosition(const UInt8 * haystack, const UInt8 * haystack_end, const CountCharsCallback & callback) const
     {
         const size_t fallback_size = fallback_needles.size();
 
-        size_t ans = std::numeric_limits<size_t>::max();
+        UInt64 ans = std::numeric_limits<UInt64>::max();
 
         for (size_t i = 0; i < fallback_size; ++i)
             if (auto pos = fallback_searchers[fallback_needles[i]].search(haystack, haystack_end); pos != haystack_end)
@@ -705,7 +705,7 @@ private:
                 }
             }
         }
-        if (ans == std::numeric_limits<size_t>::max())
+        if (ans == std::numeric_limits<UInt64>::max())
             return 0;
         return ans;
     }
diff --git a/dbms/src/Common/config.h.in b/dbms/src/Common/config.h.in
index 0b31466d522..c323afe369e 100644
--- a/dbms/src/Common/config.h.in
+++ b/dbms/src/Common/config.h.in
@@ -24,6 +24,7 @@
 #cmakedefine01 USE_CPUINFO
 #cmakedefine01 USE_BROTLI
 #cmakedefine01 USE_SSL
+#cmakedefine01 USE_HYPERSCAN
 
 #cmakedefine01 CLICKHOUSE_SPLIT_BINARY
 #cmakedefine01 LLVM_HAS_RTTI
diff --git a/dbms/src/Formats/CapnProtoRowInputStream.cpp b/dbms/src/Formats/CapnProtoRowInputStream.cpp
index 692d1ec8dee..23a1c899838 100644
--- a/dbms/src/Formats/CapnProtoRowInputStream.cpp
+++ b/dbms/src/Formats/CapnProtoRowInputStream.cpp
@@ -206,28 +206,42 @@ CapnProtoRowInputStream::CapnProtoRowInputStream(ReadBuffer & istr_, const Block
     createActions(list, root);
 }
 
+kj::Array<capnp::word> CapnProtoRowInputStream::readMessage()
+{
+    uint32_t segment_count;
+    istr.readStrict(reinterpret_cast<char*>(&segment_count), sizeof(uint32_t));
+
+    // one for segmentCount and one because segmentCount starts from 0
+    const auto prefix_size = (2 + segment_count) * sizeof(uint32_t);
+    const auto words_prefix_size = (segment_count + 1) / 2 + 1;
+    auto prefix = kj::heapArray<capnp::word>(words_prefix_size);
+    auto prefix_chars = prefix.asChars();
+    ::memcpy(prefix_chars.begin(), &segment_count, sizeof(uint32_t));
+
+    // read size of each segment
+    for (size_t i = 0; i <= segment_count; ++i)
+        istr.readStrict(prefix_chars.begin() + ((i + 1) * sizeof(uint32_t)), sizeof(uint32_t));
+
+    // calculate size of message
+    const auto expected_words = capnp::expectedSizeInWordsFromPrefix(prefix);
+    const auto expected_bytes = expected_words * sizeof(capnp::word);
+    const auto data_size = expected_bytes - prefix_size;
+    auto msg = kj::heapArray<capnp::word>(expected_words);
+    auto msg_chars = msg.asChars();
+
+    // read full message
+    ::memcpy(msg_chars.begin(), prefix_chars.begin(), prefix_size);
+    istr.readStrict(msg_chars.begin() + prefix_size, data_size);
+
+    return msg;
+}
 
 bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &)
 {
     if (istr.eof())
         return false;
 
-    // Read from underlying buffer directly
-    auto buf = istr.buffer();
-    auto base = reinterpret_cast<const capnp::word *>(istr.position());
-
-    // Check if there's enough bytes in the buffer to read the full message
-    kj::Array<capnp::word> heap_array;
-    auto array = kj::arrayPtr(base, buf.size() - istr.offset());
-    auto expected_words = capnp::expectedSizeInWordsFromPrefix(array);
-    if (expected_words * sizeof(capnp::word) > array.size())
-    {
-        // We'll need to reassemble the message in a contiguous buffer
-        heap_array = kj::heapArray<capnp::word>(expected_words);
-        istr.readStrict(heap_array.asChars().begin(), heap_array.asChars().size());
-        array = heap_array.asPtr();
-    }
-
+    auto array = readMessage();
 
 #if CAPNP_VERSION >= 8000
     capnp::UnalignedFlatArrayMessageReader msg(array);
@@ -281,13 +295,6 @@ bool CapnProtoRowInputStream::read(MutableColumns & columns, RowReadExtension &)
         }
     }
 
-    // Advance buffer position if used directly
-    if (heap_array.size() == 0)
-    {
-        auto parsed = (msg.getEnd() - base) * sizeof(capnp::word);
-        istr.position() += parsed;
-    }
-
     return true;
 }
 
diff --git a/dbms/src/Formats/CapnProtoRowInputStream.h b/dbms/src/Formats/CapnProtoRowInputStream.h
index c40827522aa..02548b5a0fc 100644
--- a/dbms/src/Formats/CapnProtoRowInputStream.h
+++ b/dbms/src/Formats/CapnProtoRowInputStream.h
@@ -38,6 +38,8 @@ public:
     bool read(MutableColumns & columns, RowReadExtension &) override;
 
 private:
+    kj::Array<capnp::word> readMessage();
+
     // Build a traversal plan from a sorted list of fields
     void createActions(const NestedFieldList & sortedFields, capnp::StructSchema reader);
 
diff --git a/dbms/src/Functions/FunctionsStringSearch.cpp b/dbms/src/Functions/FunctionsStringSearch.cpp
index 0af2d3d7007..7c22afc9020 100644
--- a/dbms/src/Functions/FunctionsStringSearch.cpp
+++ b/dbms/src/Functions/FunctionsStringSearch.cpp
@@ -1,8 +1,6 @@
-#include <Functions/FunctionsStringSearch.h>
+#include "FunctionsStringSearch.h"
 
 #include <Columns/ColumnFixedString.h>
-#include <Common/config.h>
-
 #include <DataTypes/DataTypeFixedString.h>
 #include <Functions/FunctionFactory.h>
 #include <Functions/Regexps.h>
@@ -11,12 +9,16 @@
 #include <re2/stringpiece.h>
 #include <Poco/UTF8String.h>
 #include <Common/Volnitsky.h>
-
 #include <algorithm>
 #include <memory>
 
-#ifdef __SSSE3__
-#    include <hs.h>
+#include <Common/config.h>
+#if USE_HYPERSCAN
+#   if __has_include(<hs/hs.h>)
+#       include <hs/hs.h>
+#   else
+#       include <hs.h>
+#   endif
 #endif
 
 #if USE_RE2_ST
@@ -617,7 +619,7 @@ struct MultiMatchAnyImpl
     {
         (void)FindAny;
         (void)FindAnyIndex;
-#ifdef __SSSE3__
+#if USE_HYPERSCAN
         using ScratchPtr = std::unique_ptr<hs_scratch_t, DB::MultiRegexps::HyperscanDeleter<decltype(&hs_free_scratch), &hs_free_scratch>>;
 
         const auto & hyperscan_regex = MultiRegexps::get<FindAnyIndex>(needles);
@@ -670,7 +672,7 @@ struct MultiMatchAnyImpl
                     res[i] = j + 1;
             }
         }
-#endif // __SSSE3__
+#endif // USE_HYPERSCAN
     }
 };
 
diff --git a/dbms/src/Functions/Regexps.h b/dbms/src/Functions/Regexps.h
index f5ad738425c..f6a37f94ddc 100644
--- a/dbms/src/Functions/Regexps.h
+++ b/dbms/src/Functions/Regexps.h
@@ -5,13 +5,17 @@
 #include <Common/OptimizedRegularExpression.h>
 #include <Common/ProfileEvents.h>
 #include <common/StringRef.h>
-
 #include <memory>
 #include <string>
 #include <vector>
 
-#ifdef __SSSE3__
-#    include <hs.h>
+#include <Common/config.h>
+#if USE_HYPERSCAN
+#   if __has_include(<hs/hs.h>)
+#       include <hs/hs.h>
+#   else
+#       include <hs.h>
+#   endif
 #endif
 
 namespace ProfileEvents
@@ -63,7 +67,7 @@ namespace Regexps
     }
 }
 
-#ifdef __SSSE3__
+#if USE_HYPERSCAN
 
 namespace MultiRegexps
 {
@@ -139,6 +143,6 @@ namespace MultiRegexps
     }
 }
 
-#endif // __SSSE3__
+#endif // USE_HYPERSCAN
 
 }
diff --git a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
index 0a1243758cf..0bf593a6078 100644
--- a/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
+++ b/dbms/src/Interpreters/ClusterProxy/SelectStreamFactory.cpp
@@ -58,7 +58,7 @@ namespace
 
 BlockInputStreamPtr createLocalStream(const ASTPtr & query_ast, const Context & context, QueryProcessingStage::Enum processed_stage)
 {
-    InterpreterSelectQuery interpreter{query_ast, context, Names{}, processed_stage};
+    InterpreterSelectQuery interpreter{query_ast, context, SelectQueryOptions(processed_stage)};
     BlockInputStreamPtr stream = interpreter.execute().in;
 
     /** Materialization is needed, since from remote servers the constants come materialized.
diff --git a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
index 57a08994426..b6b731d8662 100644
--- a/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
+++ b/dbms/src/Interpreters/ExecuteScalarSubqueriesVisitor.cpp
@@ -76,7 +76,7 @@ void ExecuteScalarSubqueriesMatcher::visit(const ASTSubquery & subquery, ASTPtr
 
     ASTPtr subquery_select = subquery.children.at(0);
     BlockIO res = InterpreterSelectWithUnionQuery(
-        subquery_select, subquery_context, {}, QueryProcessingStage::Complete, data.subquery_depth + 1).execute();
+        subquery_select, subquery_context, SelectQueryOptions(QueryProcessingStage::Complete, data.subquery_depth + 1)).execute();
 
     Block block;
     try
diff --git a/dbms/src/Interpreters/InterpreterExplainQuery.cpp b/dbms/src/Interpreters/InterpreterExplainQuery.cpp
index 971de38d11a..3994ccb5cc7 100644
--- a/dbms/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterExplainQuery.cpp
@@ -51,7 +51,8 @@ BlockInputStreamPtr InterpreterExplainQuery::executeImpl()
     }
     else if (ast.getKind() == ASTExplainQuery::AnalyzedSyntax)
     {
-        InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context, {}, QueryProcessingStage::FetchColumns, 0, true, true);
+        InterpreterSelectWithUnionQuery interpreter(ast.children.at(0), context,
+                                                    SelectQueryOptions(QueryProcessingStage::FetchColumns).analyze().modify());
         interpreter.getQuery()->format(IAST::FormatSettings(ss, false));
     }
 
diff --git a/dbms/src/Interpreters/InterpreterFactory.cpp b/dbms/src/Interpreters/InterpreterFactory.cpp
index b2497481361..54611860227 100644
--- a/dbms/src/Interpreters/InterpreterFactory.cpp
+++ b/dbms/src/Interpreters/InterpreterFactory.cpp
@@ -84,12 +84,12 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, Context &
     {
         /// This is internal part of ASTSelectWithUnionQuery.
         /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child.
-        return std::make_unique<InterpreterSelectQuery>(query, context, Names{}, stage);
+        return std::make_unique<InterpreterSelectQuery>(query, context, SelectQueryOptions(stage));
     }
     else if (query->as<ASTSelectWithUnionQuery>())
     {
         ProfileEvents::increment(ProfileEvents::SelectQuery);
-        return std::make_unique<InterpreterSelectWithUnionQuery>(query, context, Names{}, stage);
+        return std::make_unique<InterpreterSelectWithUnionQuery>(query, context, SelectQueryOptions(stage));
     }
     else if (query->as<ASTInsertQuery>())
     {
diff --git a/dbms/src/Interpreters/InterpreterInsertQuery.cpp b/dbms/src/Interpreters/InterpreterInsertQuery.cpp
index 444e41632fd..f74bd026090 100644
--- a/dbms/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterInsertQuery.cpp
@@ -128,7 +128,7 @@ BlockIO InterpreterInsertQuery::execute()
     if (query.select)
     {
         /// Passing 1 as subquery_depth will disable limiting size of intermediate result.
-        InterpreterSelectWithUnionQuery interpreter_select{query.select, context, {}, QueryProcessingStage::Complete, 1};
+        InterpreterSelectWithUnionQuery interpreter_select{query.select, context, SelectQueryOptions(QueryProcessingStage::Complete, 1)};
 
         res.in = interpreter_select.execute().in;
 
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.cpp b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
index 0dfddc10dea..182927b7104 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.cpp
@@ -78,13 +78,9 @@ namespace ErrorCodes
 InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const Context & context_,
-    const Names & required_result_column_names,
-    QueryProcessingStage::Enum to_stage_,
-    size_t subquery_depth_,
-    bool only_analyze_,
-    bool modify_inplace)
-    : InterpreterSelectQuery(
-          query_ptr_, context_, nullptr, nullptr, required_result_column_names, to_stage_, subquery_depth_, only_analyze_, modify_inplace)
+    const SelectQueryOptions & options,
+    const Names & required_result_column_names)
+    : InterpreterSelectQuery(query_ptr_, context_, nullptr, nullptr, options, required_result_column_names)
 {
 }
 
@@ -92,23 +88,17 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const Context & context_,
     const BlockInputStreamPtr & input_,
-    QueryProcessingStage::Enum to_stage_,
-    bool only_analyze_,
-    bool modify_inplace)
-    : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
-{
-}
+    const SelectQueryOptions & options)
+    : InterpreterSelectQuery(query_ptr_, context_, input_, nullptr, options.copy().noSubquery())
+{}
 
 InterpreterSelectQuery::InterpreterSelectQuery(
     const ASTPtr & query_ptr_,
     const Context & context_,
     const StoragePtr & storage_,
-    QueryProcessingStage::Enum to_stage_,
-    bool only_analyze_,
-    bool modify_inplace)
-    : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, Names{}, to_stage_, 0, only_analyze_, modify_inplace)
-{
-}
+    const SelectQueryOptions & options)
+    : InterpreterSelectQuery(query_ptr_, context_, nullptr, storage_, options.copy().noSubquery())
+{}
 
 InterpreterSelectQuery::~InterpreterSelectQuery() = default;
 
@@ -133,17 +123,12 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     const Context & context_,
     const BlockInputStreamPtr & input_,
     const StoragePtr & storage_,
-    const Names & required_result_column_names,
-    QueryProcessingStage::Enum to_stage_,
-    size_t subquery_depth_,
-    bool only_analyze_,
-    bool modify_inplace)
+    const SelectQueryOptions & options_,
+    const Names & required_result_column_names)
+    : options(options_)
     /// NOTE: the query almost always should be cloned because it will be modified during analysis.
-    : query_ptr(modify_inplace ? query_ptr_ : query_ptr_->clone())
+    , query_ptr(options.modify_inplace ? query_ptr_ : query_ptr_->clone())
     , context(context_)
-    , to_stage(to_stage_)
-    , subquery_depth(subquery_depth_)
-    , only_analyze(only_analyze_)
     , storage(storage_)
     , input(input_)
     , log(&Logger::get("InterpreterSelectQuery"))
@@ -151,7 +136,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     initSettings();
     const Settings & settings = context.getSettingsRef();
 
-    if (settings.max_subquery_depth && subquery_depth > settings.max_subquery_depth)
+    if (settings.max_subquery_depth && options.subquery_depth > settings.max_subquery_depth)
         throw Exception("Too deep subqueries. Maximum: " + settings.max_subquery_depth.toString(),
             ErrorCodes::TOO_DEEP_SUBQUERIES);
 
@@ -189,7 +174,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     {
         /// Read from subquery.
         interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
-            table_expression, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze, modify_inplace);
+            table_expression, getSubqueryContext(context), options.subquery(), required_columns);
 
         source_header = interpreter_subquery->getSampleBlock();
     }
@@ -215,13 +200,14 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     if (storage)
         table_lock = storage->lockStructureForShare(false, context.getCurrentQueryId());
 
-    syntax_analyzer_result = SyntaxAnalyzer(context, subquery_depth).analyze(
+    syntax_analyzer_result = SyntaxAnalyzer(context, options).analyze(
         query_ptr, source_header.getNamesAndTypesList(), required_result_column_names, storage);
     query_analyzer = std::make_unique<ExpressionAnalyzer>(
         query_ptr, syntax_analyzer_result, context, NamesAndTypesList(),
-        NameSet(required_result_column_names.begin(), required_result_column_names.end()), subquery_depth, !only_analyze);
+        NameSet(required_result_column_names.begin(), required_result_column_names.end()),
+        options.subquery_depth, !options.only_analyze);
 
-    if (!only_analyze)
+    if (!options.only_analyze)
     {
         if (query.sample_size() && (input || !storage || !storage->supportsSampling()))
             throw Exception("Illegal SAMPLE: table doesn't support sampling", ErrorCodes::SAMPLING_NOT_SUPPORTED);
@@ -238,7 +224,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 context.addExternalTable(it.first, it.second);
     }
 
-    if (!only_analyze || modify_inplace)
+    if (!options.only_analyze || options.modify_inplace)
     {
         if (query_analyzer->isRewriteSubqueriesPredicate())
         {
@@ -247,11 +233,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
                     table_expression,
                     getSubqueryContext(context),
-                    required_columns,
-                    QueryProcessingStage::Complete,
-                    subquery_depth + 1,
-                    only_analyze,
-                    modify_inplace);
+                    options.subquery(),
+                    required_columns);
         }
     }
 
@@ -304,7 +287,7 @@ Block InterpreterSelectQuery::getSampleBlock()
 BlockIO InterpreterSelectQuery::execute()
 {
     Pipeline pipeline;
-    executeImpl(pipeline, input, only_analyze);
+    executeImpl(pipeline, input, options.only_analyze);
     executeUnion(pipeline);
 
     BlockIO res;
@@ -315,7 +298,7 @@ BlockIO InterpreterSelectQuery::execute()
 BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
 {
     Pipeline pipeline;
-    executeImpl(pipeline, input, only_analyze);
+    executeImpl(pipeline, input, options.only_analyze);
     return pipeline.streams;
 }
 
@@ -325,10 +308,10 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
 
     /// Do I need to perform the first part of the pipeline - running on remote servers during distributed processing.
     res.first_stage = from_stage < QueryProcessingStage::WithMergeableState
-        && to_stage >= QueryProcessingStage::WithMergeableState;
+        && options.to_stage >= QueryProcessingStage::WithMergeableState;
     /// Do I need to execute the second part of the pipeline - running on the initiating server during distributed processing.
     res.second_stage = from_stage <= QueryProcessingStage::WithMergeableState
-        && to_stage > QueryProcessingStage::WithMergeableState;
+        && options.to_stage > QueryProcessingStage::WithMergeableState;
 
     /** First we compose a chain of actions and remember the necessary steps from it.
         *  Regardless of from_stage and to_stage, we will compose a complete sequence of actions to perform optimization and
@@ -553,16 +536,16 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
         expressions = analyzeExpressions(from_stage, false);
 
         if (from_stage == QueryProcessingStage::WithMergeableState &&
-            to_stage == QueryProcessingStage::WithMergeableState)
+            options.to_stage == QueryProcessingStage::WithMergeableState)
             throw Exception("Distributed on Distributed is not supported", ErrorCodes::NOT_IMPLEMENTED);
 
         /** Read the data from Storage. from_stage - to what stage the request was completed in Storage. */
         executeFetchColumns(from_stage, pipeline, expressions.prewhere_info, expressions.columns_to_remove_after_prewhere);
 
-        LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(to_stage));
+        LOG_TRACE(log, QueryProcessingStage::toString(from_stage) << " -> " << QueryProcessingStage::toString(options.to_stage));
     }
 
-    if (to_stage > QueryProcessingStage::FetchColumns)
+    if (options.to_stage > QueryProcessingStage::FetchColumns)
     {
         /// Do I need to aggregate in a separate row rows that have not passed max_rows_to_group_by.
         bool aggregate_overflow_row =
@@ -575,7 +558,7 @@ void InterpreterSelectQuery::executeImpl(Pipeline & pipeline, const BlockInputSt
         /// Do I need to immediately finalize the aggregate functions after the aggregation?
         bool aggregate_final =
             expressions.need_aggregate &&
-            to_stage > QueryProcessingStage::WithMergeableState &&
+            options.to_stage > QueryProcessingStage::WithMergeableState &&
             !query.group_by_with_totals && !query.group_by_with_rollup && !query.group_by_with_cube;
 
         if (expressions.first_stage)
@@ -938,7 +921,7 @@ void InterpreterSelectQuery::executeFetchColumns(
 
     /// Limitation on the number of columns to read.
     /// It's not applied in 'only_analyze' mode, because the query could be analyzed without removal of unnecessary columns.
-    if (!only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read)
+    if (!options.only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read)
         throw Exception("Limit for number of columns to read exceeded. "
             "Requested: " + toString(required_columns.size())
             + ", maximum: " + settings.max_columns_to_read.toString(),
@@ -1000,7 +983,8 @@ void InterpreterSelectQuery::executeFetchColumns(
                 throw Exception("Subquery expected", ErrorCodes::LOGICAL_ERROR);
 
             interpreter_subquery = std::make_unique<InterpreterSelectWithUnionQuery>(
-                subquery, getSubqueryContext(context), required_columns, QueryProcessingStage::Complete, subquery_depth + 1, only_analyze);
+                subquery, getSubqueryContext(context),
+                options.copy().subquery().noModify(), required_columns);
 
             if (query_analyzer->hasAggregation())
                 interpreter_subquery->ignoreWithTotals();
@@ -1057,7 +1041,7 @@ void InterpreterSelectQuery::executeFetchColumns(
               *  additionally on each remote server, because these limits are checked per block of data processed,
               *  and remote servers may process way more blocks of data than are received by initiator.
               */
-            if (to_stage == QueryProcessingStage::Complete)
+            if (options.to_stage == QueryProcessingStage::Complete)
             {
                 limits.min_execution_speed = settings.min_execution_speed;
                 limits.max_execution_speed = settings.max_execution_speed;
@@ -1072,7 +1056,7 @@ void InterpreterSelectQuery::executeFetchColumns(
             {
                 stream->setLimits(limits);
 
-                if (to_stage == QueryProcessingStage::Complete)
+                if (options.to_stage == QueryProcessingStage::Complete)
                     stream->setQuota(quota);
             });
         }
diff --git a/dbms/src/Interpreters/InterpreterSelectQuery.h b/dbms/src/Interpreters/InterpreterSelectQuery.h
index fa4651c12ff..4d8c4a7a39b 100644
--- a/dbms/src/Interpreters/InterpreterSelectQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectQuery.h
@@ -3,12 +3,13 @@
 #include <memory>
 
 #include <Core/QueryProcessingStage.h>
+#include <Parsers/ASTSelectQuery.h>
 #include <DataStreams/IBlockInputStream.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/ExpressionActions.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/IInterpreter.h>
-#include <Parsers/ASTSelectQuery.h>
+#include <Interpreters/SelectQueryOptions.h>
 #include <Storages/SelectQueryInfo.h>
 
 
@@ -23,6 +24,7 @@ class InterpreterSelectWithUnionQuery;
 struct SyntaxAnalyzerResult;
 using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
 
+
 /** Interprets the SELECT query. Returns the stream of blocks with the results of the query before `to_stage` stage.
   */
 class InterpreterSelectQuery : public IInterpreter
@@ -32,14 +34,6 @@ public:
      * query_ptr
      * - A query AST to interpret.
      *
-     * to_stage
-     * - the stage to which the query is to be executed. By default - till to the end.
-     *   You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
-     *
-     * subquery_depth
-     * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
-     *   for INSERT SELECT, a value 1 is passed instead of 0.
-     *
      * required_result_column_names
      * - don't calculate all columns except the specified ones from the query
      *  - it is used to remove calculation (and reading) of unnecessary columns from subqueries.
@@ -49,29 +43,22 @@ public:
     InterpreterSelectQuery(
         const ASTPtr & query_ptr_,
         const Context & context_,
-        const Names & required_result_column_names = Names{},
-        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
-        size_t subquery_depth_ = 0,
-        bool only_analyze_ = false,
-        bool modify_inplace = false);
+        const SelectQueryOptions &,
+        const Names & required_result_column_names = Names{});
 
     /// Read data not from the table specified in the query, but from the prepared source `input`.
     InterpreterSelectQuery(
         const ASTPtr & query_ptr_,
         const Context & context_,
         const BlockInputStreamPtr & input_,
-        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
-        bool only_analyze_ = false,
-        bool modify_inplace = false);
+        const SelectQueryOptions & = {});
 
     /// Read data not from the table specified in the query, but from the specified `storage_`.
     InterpreterSelectQuery(
         const ASTPtr & query_ptr_,
         const Context & context_,
         const StoragePtr & storage_,
-        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
-        bool only_analyze_ = false,
-        bool modify_inplace = false);
+        const SelectQueryOptions & = {});
 
     ~InterpreterSelectQuery() override;
 
@@ -93,11 +80,8 @@ private:
         const Context & context_,
         const BlockInputStreamPtr & input_,
         const StoragePtr & storage_,
-        const Names & required_result_column_names,
-        QueryProcessingStage::Enum to_stage_,
-        size_t subquery_depth_,
-        bool only_analyze_,
-        bool modify_inplace);
+        const SelectQueryOptions &,
+        const Names & required_result_column_names = {});
 
     ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
 
@@ -223,10 +207,9 @@ private:
       */
     void initSettings();
 
+    const SelectQueryOptions options;
     ASTPtr query_ptr;
     Context context;
-    QueryProcessingStage::Enum to_stage;
-    size_t subquery_depth = 0;
     NamesAndTypesList source_columns;
     SyntaxAnalyzerResultPtr syntax_analyzer_result;
     std::unique_ptr<ExpressionAnalyzer> query_analyzer;
@@ -234,9 +217,6 @@ private:
     /// How many streams we ask for storage to produce, and in how many threads we will do further processing.
     size_t max_streams = 1;
 
-    /// The object was created only for query analysis.
-    bool only_analyze = false;
-
     /// List of columns to read to execute the query.
     Names required_columns;
     /// Structure of query source (table, subquery, etc).
diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index 34918023d15..4561affbd10 100644
--- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -26,15 +26,11 @@ namespace ErrorCodes
 InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
     const ASTPtr & query_ptr_,
     const Context & context_,
-    const Names & required_result_column_names,
-    QueryProcessingStage::Enum to_stage_,
-    size_t subquery_depth_,
-    bool only_analyze,
-    bool modify_inplace)
-    : query_ptr(query_ptr_),
-    context(context_),
-    to_stage(to_stage_),
-    subquery_depth(subquery_depth_)
+    const SelectQueryOptions & options_,
+    const Names & required_result_column_names)
+    : options(options_),
+    query_ptr(query_ptr_),
+    context(context_)
 {
     const auto & ast = query_ptr->as<ASTSelectWithUnionQuery &>();
 
@@ -57,7 +53,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
         /// We use it to determine positions of 'required_result_column_names' in SELECT clause.
 
         Block full_result_header = InterpreterSelectQuery(
-            ast.list_of_selects->children.at(0), context, Names(), to_stage, subquery_depth, true).getSampleBlock();
+            ast.list_of_selects->children.at(0), context, options.copy().analyze().noModify()).getSampleBlock();
 
         std::vector<size_t> positions_of_required_result_columns(required_result_column_names.size());
         for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num)
@@ -66,7 +62,7 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
         for (size_t query_num = 1; query_num < num_selects; ++query_num)
         {
             Block full_result_header_for_current_select = InterpreterSelectQuery(
-                ast.list_of_selects->children.at(query_num), context, Names(), to_stage, subquery_depth, true).getSampleBlock();
+                ast.list_of_selects->children.at(query_num), context, options.copy().analyze().noModify()).getSampleBlock();
 
             if (full_result_header_for_current_select.columns() != full_result_header.columns())
                 throw Exception("Different number of columns in UNION ALL elements:\n"
@@ -89,11 +85,8 @@ InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
         nested_interpreters.emplace_back(std::make_unique<InterpreterSelectQuery>(
             ast.list_of_selects->children.at(query_num),
             context,
-            current_required_result_column_names,
-            to_stage,
-            subquery_depth,
-            only_analyze,
-            modify_inplace));
+            options,
+            current_required_result_column_names));
     }
 
     /// Determine structure of the result.
@@ -179,7 +172,7 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(
         return cache[key];
     }
 
-    return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, {}, QueryProcessingStage::Complete, 0, true).getSampleBlock();
+    return cache[key] = InterpreterSelectWithUnionQuery(query_ptr, context, SelectQueryOptions().analyze()).getSampleBlock();
 }
 
 
diff --git a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
index 44131a9d100..84d562a5308 100644
--- a/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/dbms/src/Interpreters/InterpreterSelectWithUnionQuery.h
@@ -3,6 +3,7 @@
 #include <Core/QueryProcessingStage.h>
 #include <Interpreters/Context.h>
 #include <Interpreters/IInterpreter.h>
+#include <Interpreters/SelectQueryOptions.h>
 
 
 namespace DB
@@ -19,11 +20,8 @@ public:
     InterpreterSelectWithUnionQuery(
         const ASTPtr & query_ptr_,
         const Context & context_,
-        const Names & required_result_column_names = Names{},
-        QueryProcessingStage::Enum to_stage_ = QueryProcessingStage::Complete,
-        size_t subquery_depth_ = 0,
-        bool only_analyze = false,
-        bool modify_inplace = false);
+        const SelectQueryOptions &,
+        const Names & required_result_column_names = {});
 
     ~InterpreterSelectWithUnionQuery() override;
 
@@ -43,10 +41,9 @@ public:
     ASTPtr getQuery() const { return query_ptr; }
 
 private:
+    const SelectQueryOptions options;
     ASTPtr query_ptr;
     Context context;
-    QueryProcessingStage::Enum to_stage;
-    size_t subquery_depth;
 
     std::vector<std::unique_ptr<InterpreterSelectQuery>> nested_interpreters;
 
diff --git a/dbms/src/Interpreters/MutationsInterpreter.cpp b/dbms/src/Interpreters/MutationsInterpreter.cpp
index e9f9310f32f..6415799af10 100644
--- a/dbms/src/Interpreters/MutationsInterpreter.cpp
+++ b/dbms/src/Interpreters/MutationsInterpreter.cpp
@@ -72,7 +72,7 @@ bool MutationsInterpreter::isStorageTouchedByMutations() const
     context_copy.getSettingsRef().merge_tree_uniform_read_distribution = 0;
     context_copy.getSettingsRef().max_threads = 1;
 
-    BlockInputStreamPtr in = InterpreterSelectQuery(select, context_copy, storage, QueryProcessingStage::Complete).execute().in;
+    BlockInputStreamPtr in = InterpreterSelectQuery(select, context_copy, storage).execute().in;
 
     Block block = in->read();
     if (!block.rows())
@@ -367,7 +367,7 @@ void MutationsInterpreter::prepare(bool dry_run)
         select->children.push_back(where_expression);
     }
 
-    interpreter_select = std::make_unique<InterpreterSelectQuery>(select, context, storage, QueryProcessingStage::Complete, dry_run);
+    interpreter_select = std::make_unique<InterpreterSelectQuery>(select, context, storage, SelectQueryOptions().analyze(dry_run));
 
     is_prepared = true;
 }
diff --git a/dbms/src/Interpreters/SelectQueryOptions.h b/dbms/src/Interpreters/SelectQueryOptions.h
new file mode 100644
index 00000000000..0cf5827be3c
--- /dev/null
+++ b/dbms/src/Interpreters/SelectQueryOptions.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <Core/QueryProcessingStage.h>
+
+namespace DB
+{
+
+/**
+ * to_stage
+ * - the stage to which the query is to be executed. By default - till to the end.
+ *   You can perform till the intermediate aggregation state, which are combined from different servers for distributed query processing.
+ *
+ * subquery_depth
+ * - to control the limit on the depth of nesting of subqueries. For subqueries, a value that is incremented by one is passed;
+ *   for INSERT SELECT, a value 1 is passed instead of 0.
+ *
+ * only_analyze
+ * - the object was created only for query analysis.
+ *
+ * is_subquery
+ * - there could be some specific for subqueries. Ex. there's no need to pass duplicated columns in results, cause of indirect results.
+ */
+struct SelectQueryOptions
+{
+    QueryProcessingStage::Enum to_stage;
+    size_t subquery_depth;
+    bool only_analyze;
+    bool modify_inplace;
+    bool remove_duplicates;
+
+    SelectQueryOptions(QueryProcessingStage::Enum stage = QueryProcessingStage::Complete, size_t depth = 0)
+        : to_stage(stage)
+        , subquery_depth(depth)
+        , only_analyze(false)
+        , modify_inplace(false)
+        , remove_duplicates(false)
+    {}
+
+    SelectQueryOptions copy() const { return *this; }
+
+    SelectQueryOptions subquery() const
+    {
+        SelectQueryOptions out = *this;
+        out.to_stage = QueryProcessingStage::Complete;
+        ++out.subquery_depth;
+        return out;
+    }
+
+    SelectQueryOptions & analyze(bool value = true)
+    {
+        only_analyze = value;
+        return *this;
+    }
+
+    SelectQueryOptions & modify(bool value = true)
+    {
+        modify_inplace = value;
+        return *this;
+    }
+
+    SelectQueryOptions & noModify() { return modify(false); }
+
+    SelectQueryOptions & removeDuplicates(bool value = true)
+    {
+        remove_duplicates = value;
+        return *this;
+    }
+
+    SelectQueryOptions & noSubquery()
+    {
+        subquery_depth = 0;
+        return *this;
+    }
+};
+
+}
diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
index e54026deb65..5219f7cbdaf 100644
--- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp
@@ -123,24 +123,69 @@ bool hasArrayJoin(const ASTPtr & ast)
     return false;
 }
 
+/// Keep number of columns for 'GLOBAL IN (SELECT 1 AS a, a)'
+void renameDuplicatedColumns(const ASTSelectQuery * select_query)
+{
+    ASTs & elements = select_query->select_expression_list->children;
+
+    std::set<String> all_column_names;
+    std::set<String> assigned_column_names;
+
+    for (auto & expr : elements)
+        all_column_names.insert(expr->getAliasOrColumnName());
+
+    for (auto & expr : elements)
+    {
+        auto name = expr->getAliasOrColumnName();
+
+        if (!assigned_column_names.insert(name).second)
+        {
+            size_t i = 1;
+            while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
+                ++i;
+
+            name = name + "_" + toString(i);
+            expr = expr->clone();   /// Cancels fuse of the same expressions in the tree.
+            expr->setAlias(name);
+
+            all_column_names.insert(name);
+            assigned_column_names.insert(name);
+        }
+    }
+}
+
 /// Sometimes we have to calculate more columns in SELECT clause than will be returned from query.
 /// This is the case when we have DISTINCT or arrayJoin: we require more columns in SELECT even if we need less columns in result.
-void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns)
+/// Also we have to remove duplicates in case of GLOBAL subqueries. Their results are placed into tables so duplicates are inpossible.
+void removeUnneededColumnsFromSelectClause(const ASTSelectQuery * select_query, const Names & required_result_columns, bool remove_dups)
 {
-    if (required_result_columns.empty())
-        return;
-
     ASTs & elements = select_query->select_expression_list->children;
 
+    std::map<String, size_t> required_columns_with_duplicate_count;
+
+    if (!required_result_columns.empty())
+    {
+        /// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
+        for (const auto & name : required_result_columns)
+        {
+            if (remove_dups)
+                required_columns_with_duplicate_count[name] = 1;
+            else
+                ++required_columns_with_duplicate_count[name];
+        }
+    }
+    else if (remove_dups)
+    {
+        /// Even if we have no requirements there could be duplicates cause of asterisks. SELECT *, t.*
+        for (const auto & elem : elements)
+            required_columns_with_duplicate_count.emplace(elem->getAliasOrColumnName(), 1);
+    }
+    else
+        return;
+
     ASTs new_elements;
     new_elements.reserve(elements.size());
 
-    /// Some columns may be queried multiple times, like SELECT x, y, y FROM table.
-    /// In that case we keep them exactly same number of times.
-    std::map<String, size_t> required_columns_with_duplicate_count;
-    for (const auto & name : required_result_columns)
-        ++required_columns_with_duplicate_count[name];
-
     for (const auto & elem : elements)
     {
         String name = elem->getAliasOrColumnName();
@@ -645,6 +690,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
 
     if (select_query)
     {
+        if (remove_duplicates)
+            renameDuplicatedColumns(select_query);
+
         if (const ASTTablesInSelectQueryElement * node = select_query->join())
         {
             if (settings.enable_optimize_predicate_expression)
@@ -688,7 +736,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze(
     /// Must be after 'normalizeTree' (after expanding aliases, for aliases not get lost)
     ///  and before 'executeScalarSubqueries', 'analyzeAggregation', etc. to avoid excessive calculations.
     if (select_query)
-        removeUnneededColumnsFromSelectClause(select_query, required_result_columns);
+        removeUnneededColumnsFromSelectClause(select_query, required_result_columns, remove_duplicates);
 
     /// Executing scalar subqueries - replacing them with constant values.
     executeScalarSubqueries(query, context, subquery_depth);
diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h
index 4450881ee68..90ab01c2397 100644
--- a/dbms/src/Interpreters/SyntaxAnalyzer.h
+++ b/dbms/src/Interpreters/SyntaxAnalyzer.h
@@ -2,6 +2,7 @@
 
 #include <Interpreters/AnalyzedJoin.h>
 #include <Interpreters/Aliases.h>
+#include <Interpreters/SelectQueryOptions.h>
 
 namespace DB
 {
@@ -55,9 +56,10 @@ using SyntaxAnalyzerResultPtr = std::shared_ptr<const SyntaxAnalyzerResult>;
 class SyntaxAnalyzer
 {
 public:
-    SyntaxAnalyzer(const Context & context_, size_t subquery_depth_ = 0)
+    SyntaxAnalyzer(const Context & context_, const SelectQueryOptions & select_options = {})
         : context(context_)
-        , subquery_depth(subquery_depth_)
+        , subquery_depth(select_options.subquery_depth)
+        , remove_duplicates(select_options.remove_duplicates)
     {}
 
     SyntaxAnalyzerResultPtr analyze(
@@ -69,6 +71,7 @@ public:
 private:
     const Context & context;
     size_t subquery_depth;
+    bool remove_duplicates;
 };
 
 }
diff --git a/dbms/src/Interpreters/interpretSubquery.cpp b/dbms/src/Interpreters/interpretSubquery.cpp
index 4f5b2c1c939..93f9c728712 100644
--- a/dbms/src/Interpreters/interpretSubquery.cpp
+++ b/dbms/src/Interpreters/interpretSubquery.cpp
@@ -41,6 +41,8 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
     subquery_settings.extremes = 0;
     subquery_context.setSettings(subquery_settings);
 
+    auto subquery_options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth).subquery();
+
     ASTPtr query;
     if (table || function)
     {
@@ -83,48 +85,10 @@ std::shared_ptr<InterpreterSelectWithUnionQuery> interpretSubquery(
     else
     {
         query = subquery->children.at(0);
-
-        /** Columns with the same name can be specified in a subquery. For example, SELECT x, x FROM t
-          * This is bad, because the result of such a query can not be saved to the table, because the table can not have the same name columns.
-          * Saving to the table is required for GLOBAL subqueries.
-          *
-          * To avoid this situation, we will rename the same columns.
-          */
-
-        std::set<std::string> all_column_names;
-        std::set<std::string> assigned_column_names;
-
-        if (const auto * select_with_union = query->as<ASTSelectWithUnionQuery>())
-        {
-            if (const auto * select = select_with_union->list_of_selects->children.at(0)->as<ASTSelectQuery>())
-            {
-                for (auto & expr : select->select_expression_list->children)
-                    all_column_names.insert(expr->getAliasOrColumnName());
-
-                for (auto & expr : select->select_expression_list->children)
-                {
-                    auto name = expr->getAliasOrColumnName();
-
-                    if (!assigned_column_names.insert(name).second)
-                    {
-                        size_t i = 1;
-                        while (all_column_names.end() != all_column_names.find(name + "_" + toString(i)))
-                            ++i;
-
-                        name = name + "_" + toString(i);
-                        expr = expr->clone();   /// Cancels fuse of the same expressions in the tree.
-                        expr->setAlias(name);
-
-                        all_column_names.insert(name);
-                        assigned_column_names.insert(name);
-                    }
-                }
-            }
-        }
+        subquery_options.removeDuplicates();
     }
 
-    return std::make_shared<InterpreterSelectWithUnionQuery>(
-        query, subquery_context, required_source_columns, QueryProcessingStage::Complete, subquery_depth + 1);
+    return std::make_shared<InterpreterSelectWithUnionQuery>(query, subquery_context, subquery_options, required_source_columns);
 }
 
 }
diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp
index 96705c3ab39..03a39ba3bd7 100644
--- a/dbms/src/Storages/AlterCommands.cpp
+++ b/dbms/src/Storages/AlterCommands.cpp
@@ -205,7 +205,7 @@ void AlterCommand::apply(ColumnsDescription & columns_description, IndicesDescri
     }
     else if (type == MODIFY_ORDER_BY)
     {
-        if (!primary_key_ast)
+        if (!primary_key_ast && order_by_ast)
         {
             /// Primary and sorting key become independent after this ALTER so we have to
             /// save the old ORDER BY expression as the new primary key.
diff --git a/dbms/src/Storages/StorageBuffer.cpp b/dbms/src/Storages/StorageBuffer.cpp
index c83dccc2521..b92d90231c1 100644
--- a/dbms/src/Storages/StorageBuffer.cpp
+++ b/dbms/src/Storages/StorageBuffer.cpp
@@ -25,6 +25,8 @@
 #include <Poco/Ext/ThreadNumber.h>
 
 #include <ext/range.h>
+#include <DataStreams/FilterBlockInputStream.h>
+#include <DataStreams/ExpressionBlockInputStream.h>
 
 
 namespace ProfileEvents
@@ -221,7 +223,21 @@ BlockInputStreams StorageBuffer::read(
       */
     if (processed_stage > QueryProcessingStage::FetchColumns)
         for (auto & stream : streams_from_buffers)
-            stream = InterpreterSelectQuery(query_info.query, context, stream, processed_stage).execute().in;
+            stream = InterpreterSelectQuery(query_info.query, context, stream, SelectQueryOptions(processed_stage)).execute().in;
+
+    if (query_info.prewhere_info)
+    {
+        for (auto & stream : streams_from_buffers)
+            stream = std::make_shared<FilterBlockInputStream>(stream, query_info.prewhere_info->prewhere_actions,
+                    query_info.prewhere_info->prewhere_column_name, query_info.prewhere_info->remove_prewhere_column);
+
+        if (query_info.prewhere_info->alias_actions)
+        {
+            for (auto & stream : streams_from_buffers)
+                stream = std::make_shared<ExpressionBlockInputStream>(stream, query_info.prewhere_info->alias_actions);
+
+        }
+    }
 
     streams_from_dst.insert(streams_from_dst.end(), streams_from_buffers.begin(), streams_from_buffers.end());
     return streams_from_dst;
diff --git a/dbms/src/Storages/StorageBuffer.h b/dbms/src/Storages/StorageBuffer.h
index f32a4c72c43..9e7127f8417 100644
--- a/dbms/src/Storages/StorageBuffer.h
+++ b/dbms/src/Storages/StorageBuffer.h
@@ -74,7 +74,15 @@ public:
     void rename(const String & /*new_path_to_db*/, const String & /*new_database_name*/, const String & new_table_name) override { name = new_table_name; }
 
     bool supportsSampling() const override { return true; }
-    bool supportsPrewhere() const override { return false; }
+    bool supportsPrewhere() const override
+    {
+        if (no_destination)
+            return false;
+        auto dest = global_context.tryGetTable(destination_database, destination_table);
+        if (dest && dest.get() != this)
+            return dest->supportsPrewhere();
+        return false;
+    }
     bool supportsFinal() const override { return true; }
     bool supportsIndexForIn() const override { return true; }
 
diff --git a/dbms/src/Storages/StorageDistributed.cpp b/dbms/src/Storages/StorageDistributed.cpp
index 27963a914b2..61fa4c0d3e6 100644
--- a/dbms/src/Storages/StorageDistributed.cpp
+++ b/dbms/src/Storages/StorageDistributed.cpp
@@ -286,7 +286,8 @@ BlockInputStreams StorageDistributed::read(
     const auto & modified_query_ast = rewriteSelectQuery(
         query_info.query, remote_database, remote_table, remote_table_function_ptr);
 
-    Block header = materializeBlock(InterpreterSelectQuery(query_info.query, context, Names{}, processed_stage).getSampleBlock());
+    Block header = materializeBlock(
+        InterpreterSelectQuery(query_info.query, context, SelectQueryOptions(processed_stage)).getSampleBlock());
 
     ClusterProxy::SelectStreamFactory select_stream_factory = remote_table_function_ptr
         ? ClusterProxy::SelectStreamFactory(
diff --git a/dbms/src/Storages/StorageMerge.cpp b/dbms/src/Storages/StorageMerge.cpp
index 609aa38dd1c..7572a865af4 100644
--- a/dbms/src/Storages/StorageMerge.cpp
+++ b/dbms/src/Storages/StorageMerge.cpp
@@ -274,7 +274,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer
     if (!storage)
         return BlockInputStreams{
             InterpreterSelectQuery(modified_query_info.query, modified_context, std::make_shared<OneBlockInputStream>(header),
-                                   processed_stage, true).execute().in};
+                                   SelectQueryOptions(processed_stage).analyze()).execute().in};
 
     BlockInputStreams source_streams;
 
@@ -295,7 +295,7 @@ BlockInputStreams StorageMerge::createSourceStreams(const SelectQueryInfo & quer
         modified_context.getSettingsRef().max_threads = UInt64(streams_num);
         modified_context.getSettingsRef().max_streams_to_max_threads_ratio = 1;
 
-        InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, Names{}, processed_stage};
+        InterpreterSelectQuery interpreter{modified_query_info.query, modified_context, SelectQueryOptions(processed_stage)};
         BlockInputStreamPtr interpreter_stream = interpreter.execute().in;
 
         /** Materialization is needed, since from distributed storage the constants come materialized.
@@ -429,7 +429,7 @@ Block StorageMerge::getQueryHeader(
         case QueryProcessingStage::Complete:
             return materializeBlock(InterpreterSelectQuery(
                 query_info.query, context, std::make_shared<OneBlockInputStream>(getSampleBlockForColumns(column_names)),
-                                       processed_stage, true).getSampleBlock());
+                SelectQueryOptions(processed_stage).analyze()).getSampleBlock());
     }
     throw Exception("Logical Error: unknown processed stage.", ErrorCodes::LOGICAL_ERROR);
 }
diff --git a/dbms/src/Storages/StorageView.cpp b/dbms/src/Storages/StorageView.cpp
index 21476febbd2..baa56f8cd00 100644
--- a/dbms/src/Storages/StorageView.cpp
+++ b/dbms/src/Storages/StorageView.cpp
@@ -63,7 +63,7 @@ BlockInputStreams StorageView::read(
             current_inner_query = new_inner_query;
     }
 
-    res = InterpreterSelectWithUnionQuery(current_inner_query, context, column_names).executeWithMultipleStreams();
+    res = InterpreterSelectWithUnionQuery(current_inner_query, context, {}, column_names).executeWithMultipleStreams();
 
     /// It's expected that the columns read from storage are not constant.
     /// Because method 'getSampleBlockForColumns' is used to obtain a structure of result in InterpreterSelectQuery.
diff --git a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
index e2e4b397d0b..97358ac02c9 100644
--- a/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
+++ b/dbms/src/Storages/System/StorageSystemBuildOptions.generated.cpp.in
@@ -56,6 +56,7 @@ const char * auto_config_build[]
     "USE_PROTOBUF", "@USE_PROTOBUF@",
     "USE_BROTLI", "@USE_BROTLI@",
     "USE_SSL", "@USE_SSL@",
+    "USE_HYPERSCAN", "@USE_HYPERSCAN@",
 
     nullptr, nullptr
 };
diff --git a/dbms/tests/clickhouse-test b/dbms/tests/clickhouse-test
index b17f408f959..242dc17de0b 100755
--- a/dbms/tests/clickhouse-test
+++ b/dbms/tests/clickhouse-test
@@ -404,6 +404,8 @@ def main(args):
 
 
 def find_binary(name):
+    if os.path.exists(name) and os.access(name, os.X_OK):
+        return True
     paths = os.environ.get("PATH").split(':')
     for path in paths:
         if os.access(os.path.join(path, name), os.X_OK):
@@ -416,7 +418,7 @@ if __name__ == '__main__':
     parser=ArgumentParser(description='ClickHouse functional tests')
     parser.add_argument('-q', '--queries', help='Path to queries dir')
     parser.add_argument('--tmp', help='Path to tmp dir')
-    parser.add_argument('-b', '--binary', default='clickhouse', help='Main clickhouse binary')
+    parser.add_argument('-b', '--binary', default='clickhouse', help='Path to clickhouse binary or name of binary in PATH')
     parser.add_argument('-c', '--client', help='Client program')
     parser.add_argument('--extract_from_config', help='extract-from-config program')
     parser.add_argument('--configclient', help='Client config (if you use not default ports)')
diff --git a/dbms/tests/clickhouse-test-server b/dbms/tests/clickhouse-test-server
index b324a270473..80a1db4a153 100755
--- a/dbms/tests/clickhouse-test-server
+++ b/dbms/tests/clickhouse-test-server
@@ -9,18 +9,18 @@ ROOT_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd)
 DATA_DIR=${DATA_DIR:=`mktemp -d /tmp/clickhouse.test..XXXXX`}
 DATA_DIR_PATTERN=${DATA_DIR_PATTERN:=/tmp/clickhouse} # path from config file, will be replaced to temporary
 LOG_DIR=${LOG_DIR:=$DATA_DIR/log}
-export CLICKHOUSE_BINARY=${CLICKHOUSE_BINARY:="clickhouse"}
-( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
+export CLICKHOUSE_BINARY_NAME=${CLICKHOUSE_BINARY_NAME:="clickhouse"}
+( [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] || [ -x "$ROOT_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] ) && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR} # Build without separate build dir
 [ -d "$ROOT_DIR/build${BUILD_TYPE}" ] && BUILD_DIR=${BUILD_DIR:=$ROOT_DIR/build${BUILD_TYPE}}
 BUILD_DIR=${BUILD_DIR:=$ROOT_DIR}
-[ -x ${CLICKHOUSE_BINARY}-server" ] && [ -x ${CLICKHOUSE_BINARY}-client" ] && BIN_DIR= # Allow run in /usr/bin
-( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY}-server}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY} server}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY}-client}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY} client}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY}-extract-from-config}
-[ -x "$BIN_DIR/${CLICKHOUSE_BINARY}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY} extract-from-config}
+[ -x ${CLICKHOUSE_BINARY_NAME}-server" ] && [ -x ${CLICKHOUSE_BINARY_NAME}-client" ] && BIN_DIR= # Allow run in /usr/bin
+( [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}" ] || [ -x "$BUILD_DIR/dbms/programs/${CLICKHOUSE_BINARY_NAME}-server" ] ) && BIN_DIR=${BIN_DIR:=$BUILD_DIR/dbms/programs/}
+[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-server}
+[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_SERVER=${CLICKHOUSE_SERVER:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} server}
+[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-client}
+[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_CLIENT=${CLICKHOUSE_CLIENT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} client}
+[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME}-extract-from-config}
+[ -x "$BIN_DIR/${CLICKHOUSE_BINARY_NAME}" ] && CLICKHOUSE_EXTRACT=${CLICKHOUSE_EXTRACT:=$BIN_DIR/${CLICKHOUSE_BINARY_NAME} extract-from-config}
 
 [ -f "$CUR_DIR/server-test.xml" ] && CONFIG_DIR=${CONFIG_DIR=$CUR_DIR}/
 CONFIG_CLIENT_DIR=${CONFIG_CLIENT_DIR=$CONFIG_DIR}
@@ -131,7 +131,7 @@ else
     TEST_DICT=${TEST_DICT=1}
     CLICKHOUSE_CLIENT_QUERY="${CLICKHOUSE_CLIENT} --config ${CLICKHOUSE_CONFIG_CLIENT} --port $CLICKHOUSE_PORT_TCP -m -n -q"
     $CLICKHOUSE_CLIENT_QUERY 'SELECT * from system.build_options; SELECT * FROM system.clusters;'
-    CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
+    CLICKHOUSE_TEST="env ${TEST_DIR}clickhouse-test --binary ${BIN_DIR}${CLICKHOUSE_BINARY_NAME} --configclient $CLICKHOUSE_CONFIG_CLIENT --configserver $CLICKHOUSE_CONFIG --tmp $DATA_DIR/tmp --queries $QUERIES_DIR $TEST_OPT0 $TEST_OPT"
     CLICKHOUSE_PERFORMANCE_TEST="${BIN_DIR}clickhouse-performance-test --port $CLICKHOUSE_PORT_TCP --recursive $CUR_DIR/performance --skip-tags=long"
     if [ "${TEST_RUN_STRESS}" ]; then
         # Running test in parallel will fail some results (tests can create/fill/drop same tables)
diff --git a/dbms/tests/queries/0_stateless/00850_global_join_dups.reference b/dbms/tests/queries/0_stateless/00850_global_join_dups.reference
index b261da18d51..dcaae3f745e 100644
--- a/dbms/tests/queries/0_stateless/00850_global_join_dups.reference
+++ b/dbms/tests/queries/0_stateless/00850_global_join_dups.reference
@@ -1,2 +1,7 @@
 1
 0
+0
+0	0
+0
+0	0
+0	0
diff --git a/dbms/tests/queries/0_stateless/00850_global_join_dups.sql b/dbms/tests/queries/0_stateless/00850_global_join_dups.sql
index faf0397374a..d9b749abd5d 100644
--- a/dbms/tests/queries/0_stateless/00850_global_join_dups.sql
+++ b/dbms/tests/queries/0_stateless/00850_global_join_dups.sql
@@ -35,49 +35,49 @@ GLOBAL INNER JOIN
 ) USING dummy;
 
 
--- SET asterisk_left_columns_only = 0;
---
--- SELECT * FROM remote('127.0.0.2', system.one)
--- GLOBAL INNER JOIN
--- (
---    SELECT *, dummy
---    FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
---    GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
---    USING dummy
--- ) USING dummy;
---
--- SELECT * FROM remote('127.0.0.2', system.one)
--- GLOBAL INNER JOIN
--- (
---   SELECT *, t1.*, t2.*
---    FROM ( SELECT toUInt8(1) AS dummy ) t1
---    INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
---    USING dummy
--- ) USING dummy;
---
--- SELECT * FROM remote('127.0.0.2', system.one)
--- GLOBAL INNER JOIN
--- (
---    SELECT *, dummy
---    FROM ( SELECT toUInt8(1) AS dummy ) t1
---    INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
---    USING dummy
--- ) USING dummy;
---
--- SELECT * FROM remote('127.0.0.2', system.one)
--- GLOBAL INNER JOIN
--- (
---     SELECT *
---     FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
---     GLOBAL INNER JOIN ( SELECT toUInt8(1) AS dummy ) t2
---     USING dummy
--- ) USING dummy;
---
--- SELECT * FROM remote('127.0.0.2', system.one)
--- GLOBAL INNER JOIN
--- (
---     SELECT *
---     FROM ( SELECT toUInt8(1) AS dummy ) t1
---     GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
---     USING dummy
--- ) USING dummy;
+SET asterisk_left_columns_only = 0;
+
+SELECT * FROM remote('127.0.0.2', system.one)
+GLOBAL INNER JOIN
+(
+    SELECT *, dummy
+    FROM ( SELECT dummy FROM remote('127.0.0.2', system.one) ) t1
+    GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
+    USING dummy
+) USING dummy;
+
+SELECT * FROM remote('127.0.0.2', system.one)
+GLOBAL INNER JOIN
+(
+    SELECT *, t1.*, t2.*
+    FROM ( SELECT toUInt8(0) AS dummy ) t1
+    INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
+    USING dummy
+) USING dummy;
+
+SELECT * FROM remote('127.0.0.2', system.one)
+GLOBAL INNER JOIN
+(
+    SELECT *, dummy
+    FROM ( SELECT toUInt8(0) AS dummy ) t1
+    INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
+    USING dummy
+) USING dummy;
+
+SELECT * FROM remote('127.0.0.2', system.one)
+GLOBAL INNER JOIN
+(
+    SELECT *, dummy as other
+    FROM ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t1
+    GLOBAL INNER JOIN ( SELECT toUInt8(0) AS dummy ) t2
+    USING dummy
+) USING dummy;
+
+SELECT * FROM remote('127.0.0.2', system.one)
+GLOBAL INNER JOIN
+(
+    SELECT *, dummy, dummy as other
+    FROM ( SELECT toUInt8(0) AS dummy ) t1
+    GLOBAL INNER JOIN ( SELECT dummy FROM remote('127.0.0.3', system.one) ) t2
+    USING dummy
+) USING dummy;
diff --git a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.reference
@@ -0,0 +1 @@
+1
diff --git a/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql
new file mode 100644
index 00000000000..d7d9813c5f5
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00910_buffer_prewhere.sql
@@ -0,0 +1,7 @@
+DROP DATABASE IF EXISTS test_buffer;
+CREATE DATABASE test_buffer;
+CREATE TABLE test_buffer.mt (uid UInt64, ts DateTime, val Float64) ENGINE = MergeTree PARTITION BY toDate(ts) ORDER BY (uid, ts);
+CREATE TABLE test_buffer.buf as test_buffer.mt ENGINE = Buffer(test_buffer, mt, 2, 10, 60, 10000, 100000, 1000000, 10000000);
+INSERT INTO test_buffer.buf VALUES (1, '2019-03-01 10:00:00', 0.5), (2, '2019-03-02 10:00:00', 0.15), (1, '2019-03-03 10:00:00', 0.25);
+SELECT count() from test_buffer.buf prewhere ts > toDateTime('2019-03-01 12:00:00') and ts < toDateTime('2019-03-02 12:00:00');
+DROP DATABASE test_buffer;
diff --git a/debian/daemons b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.reference
similarity index 100%
rename from debian/daemons
rename to dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.reference
diff --git a/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql
new file mode 100644
index 00000000000..89ea9cbcb28
--- /dev/null
+++ b/dbms/tests/queries/0_stateless/00910_crash_when_distributed_modify_order_by.sql
@@ -0,0 +1,7 @@
+DROP TABLE IF EXISTS test.union1;
+DROP TABLE IF EXISTS test.union2;
+CREATE TABLE test.union1 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = MergeTree(date, (a, date), 8192);
+CREATE TABLE test.union2 ( date Date, a Int32, b Int32, c Int32, d Int32) ENGINE = Distributed(test_shard_localhost, 'test', 'union1');
+ALTER TABLE test.union2 MODIFY ORDER BY a; -- { serverError 48 }
+DROP TABLE test.union1;
+DROP TABLE test.union2;
diff --git a/dbms/tests/queries/bugs/all_join.sql b/dbms/tests/queries/bugs/all_join.sql
deleted file mode 100644
index 8b63adf36f0..00000000000
--- a/dbms/tests/queries/bugs/all_join.sql
+++ /dev/null
@@ -1,24 +0,0 @@
-drop table if exists test.persons;
-drop table if exists test.children;
-
-create table test.persons (
-    id String,
-    name String
-) engine MergeTree order by id;
-
-create table test.children (
-    id String,
-    childName String
-) engine MergeTree order by id;
-
-insert into test.persons (id, name) values ('1', 'John'), ('2', 'Jack'), ('3', 'Daniel'), ('4', 'James'), ('5', 'Amanda');
-insert into test.children (id, childName) values ('1', 'Robert'), ('1', 'Susan'), ('3', 'Sarah'), ('4', 'David'), ('4', 'Joseph'), ('5', 'Robert');
-
-
-select * from test.persons all inner join test.children using id;
-
-select * from test.persons all inner join (select * from test.children) as j using id;
-
-select * from (select * from test.persons) as s all inner join (select * from test.children) as j using id;
-
-
diff --git a/dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql b/dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql
new file mode 100644
index 00000000000..40bf433f5a3
--- /dev/null
+++ b/dbms/tests/queries/bugs/missing_scalar_subquery_removal.sql
@@ -0,0 +1 @@
+SELECT a FROM (SELECT 1 AS a, (SELECT count() FROM system.numbers) AS b);
diff --git a/debian/dupload.conf.in b/debian/dupload.conf.in
deleted file mode 100644
index ca1973b3af4..00000000000
--- a/debian/dupload.conf.in
+++ /dev/null
@@ -1,11 +0,0 @@
-package config;
-
-$default_host = "metrika";
-
-$cfg{'metrika'} = {
-    fqdn => "",
-    method => "scpb",
-    incoming => "/repo/metrika/mini-dinstall/incoming/",
-    dinstall_runs => 0,
-    login => "@AUTHOR@"
-};
diff --git a/docs/en/interfaces/http.md b/docs/en/interfaces/http.md
index 8dece39249f..a72a2e4a06e 100644
--- a/docs/en/interfaces/http.md
+++ b/docs/en/interfaces/http.md
@@ -128,16 +128,29 @@ echo 'DROP TABLE t' | curl 'http://localhost:8123/' --data-binary @-
 
 For successful requests that don't return a data table, an empty response body is returned.
 
-You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special clickhouse-compressor program to work with it (it is installed with the clickhouse-client package).
+You can use the internal ClickHouse compression format when transmitting data. The compressed data has a non-standard format, and you will need to use the special `clickhouse-compressor` program to work with it (it is installed with the `clickhouse-client` package). To increase the efficiency of the data insertion, you may disable the server-side checksum verification with the [http_native_compression_disable_checksumming_on_decompress](../operations/settings/settings.md#settings-http_native_compression_disable_checksumming_on_decompress) setting.
 
-If you specified 'compress=1' in the URL, the server will compress the data it sends you.
-If you specified 'decompress=1' in the URL, the server will decompress the same data that you pass in the POST method.
+If you specified `compress = 1` in the URL, the server compresses the data it sends you.
+If you specified `decompress = 1` in the URL, the server decompresses the same data that you pass in the `POST` method.
 
-It is also possible to use the standard gzip-based HTTP compression. To send a POST request compressed using gzip, append the request header `Content-Encoding: gzip`.
-In order for ClickHouse to compress the response using gzip, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse setting `enable_http_compression`.
+It is also possible to use the standard `gzip`-based [HTTP compression](https://en.wikipedia.org/wiki/HTTP_compression). To send a `POST` request compressed using `gzip`, append the request header `Content-Encoding: gzip`.
+In order for ClickHouse to compress the response using `gzip`, you must append `Accept-Encoding: gzip` to the request headers, and enable the ClickHouse [enable_http_compression](../operations/settings/settings.md#settings-enable_http_compression) setting. You can configure the compression level of the data with the [http_zlib_compression_level](#settings-http_zlib_compression_level) setting.
 
 You can use this to reduce network traffic when transmitting a large amount of data, or for creating dumps that are immediately compressed.
 
+Examples of sending the data with compression:
+
+```bash
+#Sending the data to the server:
+curl -vsS "http://localhost:8123/?enable_http_compression=1" -d 'SELECT number FROM system.numbers LIMIT 10' -H 'Accept-Encoding: gzip'
+
+#Sending the data to the client:
+echo "SELECT 1" | gzip -c | curl -sS --data-binary @- -H 'Content-Encoding: gzip' 'http://localhost:8123/'
+```
+
+!!! note "Note"
+    Some HTTP clients can decompress data (`gzip` and `deflate`) from the server by default and you may get the decompressed data even if you use the compression settings correctly.
+
 You can use the 'database' URL parameter to specify the default database.
 
 ```bash
diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md
index 3e9ecef9d17..00c5d476771 100644
--- a/docs/en/operations/settings/settings.md
+++ b/docs/en/operations/settings/settings.md
@@ -79,6 +79,41 @@ Enable or disable fsync when writing .sql files. Enabled by default.
 
 It makes sense to disable it if the server has millions of tiny table chunks that are constantly being created and destroyed.
 
+## enable_http_compression {#settings-enable_http_compression}
+
+Enables/disables compression of the data in the response to an HTTP request.
+
+For more information, read the [HTTP interface description](../../interfaces/http.md).
+
+Possible values:
+
+- 0 — The functionality is disabled.
+- 1 — The functionality is enabled.
+
+Default value: 0.
+
+## http_zlib_compression_level {#settings-http_zlib_compression_level}
+
+Sets the level of the compression of the data in the response to an HTTP request if [enable_http_compression = 1](#settings-enable_http_compression).
+
+Possible values: numbers from 1 to 9.
+
+Default value: 3.
+
+
+## http_native_compression_disable_checksumming_on_decompress {#settings-http_native_compression_disable_checksumming_on_decompress}
+
+Enables/disables the verification of the checksum when uncompressing the HTTP POST data from the client. Used only for ClickHouse native format of compression (neither `gzip` nor `deflate`).
+
+For more information, read the [HTTP interface description](../../interfaces/http.md).
+
+Possible values:
+
+- 0 — The functionality is disabled.
+- 1 — The functionality is enabled.
+
+Default value: 0.
+
 ## input_format_allow_errors_num
 
 Sets the maximum number of acceptable errors when reading from text formats (CSV, TSV, etc.).
diff --git a/docs/en/operations/table_engines/mergetree.md b/docs/en/operations/table_engines/mergetree.md
index 9846e9fd8e4..ee08913dcd1 100644
--- a/docs/en/operations/table_engines/mergetree.md
+++ b/docs/en/operations/table_engines/mergetree.md
@@ -191,9 +191,7 @@ added dimensions.
 In this case it makes sense to leave only a few columns in the primary key that will provide efficient
 range scans and add the remaining dimension columns to the sorting key tuple.
 
-[ALTER of the sorting key](../../query_language/alter.md) is a
-lightweight operation because when a new column is simultaneously added to the table and to the sorting key
-data parts need not be changed (they remain sorted by the new sorting key expression).
+[ALTER of the sorting key](../../query_language/alter.md) is a lightweight operation because when a new column is simultaneously added to the table and to the sorting key, existing data parts don't need to be changed. Since the old sorting key is a prefix of the new sorting key and there is no data in the just added column, the data at the moment of table modification is sorted by both the old and the new sorting key. 
 
 ### Use of Indexes and Partitions in Queries
 
diff --git a/docs/en/query_language/functions/higher_order_functions.md b/docs/en/query_language/functions/higher_order_functions.md
index dde52c05b7a..ca8612ddab5 100644
--- a/docs/en/query_language/functions/higher_order_functions.md
+++ b/docs/en/query_language/functions/higher_order_functions.md
@@ -119,11 +119,31 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]);
 └────────────────────┘
 ```
 
+Note that NULLs and NaNs go last (NaNs go before NULLs). For example:
+ 
+``` sql
+SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])
+```
+```
+┌─arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
+│ [1,2,3,4,nan,nan,NULL,NULL]                   │
+└───────────────────────────────────────────────┘
+```
+
 ### arrayReverseSort(\[func,\] arr1, ...)
 
 Returns an array as result of sorting the elements of `arr1` in descending order. If the `func` function is specified, sorting order is determined by the result of the function `func` applied to the elements of array (arrays)  
 
-
+Note that NULLs and NaNs go last (NaNs go before NULLs). For example:
+ 
+``` sql
+SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])
+```
+```
+┌─arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
+│ [4,3,2,1,nan,nan,NULL,NULL]                          │
+└──────────────────────────────────────────────────────┘
+```
 
 
 
diff --git a/docs/en/query_language/operators.md b/docs/en/query_language/operators.md
index 61e008c2ba8..602668bf30d 100644
--- a/docs/en/query_language/operators.md
+++ b/docs/en/query_language/operators.md
@@ -51,6 +51,8 @@ Groups of operators are listed in order of priority (the higher it is in the lis
 
 `a BETWEEN b AND c` – The same as `a >= b AND a <= c.`
 
+`a NOT BETWEEN b AND c` – The same as `a < b OR a > c.`
+
 ## Operators for Working With Data Sets
 
 *See the section [IN operators](select.md#select-in-operators).*
diff --git a/docs/en/query_language/select.md b/docs/en/query_language/select.md
index 1e4f287bfcd..8b48d931d9e 100644
--- a/docs/en/query_language/select.md
+++ b/docs/en/query_language/select.md
@@ -761,11 +761,12 @@ DISTINCT is not supported if SELECT has at least one array column.
 ### LIMIT Clause
 
 `LIMIT m` allows you to select the first `m` rows from the result.
-`LIMIT n`, m allows you to select the first `m` rows from the result after skipping the first `n` rows.
+
+`LIMIT n, m` allows you to select the first `m` rows from the result after skipping the first `n` rows. The `LIMIT m OFFSET n` syntax is also supported.
 
 `n` and `m` must be non-negative integers.
 
-If there isn't an ORDER BY clause that explicitly sorts results, the result may be arbitrary and nondeterministic.
+If there isn't an `ORDER BY` clause that explicitly sorts results, the result may be arbitrary and nondeterministic.
 
 ### UNION ALL Clause
 
diff --git a/docs/ru/operations/table_engines/mergetree.md b/docs/ru/operations/table_engines/mergetree.md
index 9182d23a4c8..aeb29c270e1 100644
--- a/docs/ru/operations/table_engines/mergetree.md
+++ b/docs/ru/operations/table_engines/mergetree.md
@@ -189,7 +189,7 @@ ClickHouse не требует уникального первичного кл
 В этом сценарии имеет смысл оставить в первичном ключе всего несколько столбцов, которые обеспечат эффективную
 фильтрацию по индексу, а остальные столбцы-измерения добавить в выражение ключа сортировки.
 
-[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и ключ сортировки не нужно изменять
+[ALTER ключа сортировки](../../query_language/alter.md) — легкая операция, так как при одновременном добавлении нового столбца в таблицу и в ключ сортировки, не нужно изменять
 данные кусков (они остаются упорядоченными и по новому выражению ключа).
 
 ### Использование индексов и партиций в запросах
diff --git a/docs/ru/query_language/functions/higher_order_functions.md b/docs/ru/query_language/functions/higher_order_functions.md
index e26546f11df..f5586cda6ab 100644
--- a/docs/ru/query_language/functions/higher_order_functions.md
+++ b/docs/ru/query_language/functions/higher_order_functions.md
@@ -61,6 +61,7 @@ SELECT
 Вернуть первый элемент массива arr1, для которого функция func возвращает не 0.
 
 ### arrayFirstIndex(func, arr1, ...)
+
 Вернуть индекс первого элемента массива arr1, для которого функция func возвращает не 0.
 
 ### arrayCumSum(\[func,\] arr1, ...)
@@ -98,8 +99,31 @@ SELECT arraySort((x, y) -> y, ['hello', 'world'], [2, 1]);
 └────────────────────┘
 ```
 
+`NULL` и `NaN` будут последними в массиве (при этом `NaN` будет перед `NULL`). Например:
+
+``` sql
+SELECT arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])
+```
+```
+┌─arraySort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
+│ [1,2,3,4,nan,nan,NULL,NULL]                   │
+└───────────────────────────────────────────────┘
+```
+
 ### arrayReverseSort(\[func,\] arr1, ...)
 
 Возвращает отсортированный в нисходящем порядке массив `arr1`. Если задана функция `func`, то порядок сортировки определяется результатом применения функции `func` на элементы массива (массивов).  
 
+`NULL` и `NaN` будут последними в массиве (при этом `NaN` будет перед `NULL`). Например:
+ 
+``` sql
+SELECT arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])
+```
+```
+┌─arrayReverseSort([1, nan, 2, NULL, 3, nan, 4, NULL])─┐
+│ [4,3,2,1,nan,nan,NULL,NULL]                          │
+└──────────────────────────────────────────────────────┘
+```
+
+
 [Оригинальная статья](https://clickhouse.yandex/docs/ru/query_language/functions/higher_order_functions/) <!--hide-->
diff --git a/docs/ru/query_language/operators.md b/docs/ru/query_language/operators.md
index edfc2513d28..89c6dd4c176 100644
--- a/docs/ru/query_language/operators.md
+++ b/docs/ru/query_language/operators.md
@@ -51,6 +51,8 @@
 
 `a BETWEEN b AND c` - равнозначно `a >= b AND a <= c`
 
+`a NOT BETWEEN b AND c` - равнозначно `a < b OR a > c`
+
 ## Операторы для работы с множествами
 
 *Смотрите раздел [Операторы IN](select.md#select-in-operators).*
diff --git a/docs/ru/query_language/select.md b/docs/ru/query_language/select.md
index b9c899f6532..153e20bd8df 100644
--- a/docs/ru/query_language/select.md
+++ b/docs/ru/query_language/select.md
@@ -714,12 +714,13 @@ WHERE и HAVING отличаются тем, что WHERE выполняется
 
 ### Секция LIMIT
 
-LIMIT m позволяет выбрать из результата первые m строк.
-LIMIT n, m позволяет выбрать из результата первые m строк после пропуска первых n строк.
+`LIMIT m` позволяет выбрать из результата первые `m` строк.
 
-n и m должны быть неотрицательными целыми числами.
+`LIMIT n, m` позволяет выбрать из результата первые `m` строк после пропуска первых `n` строк. Синтаксис `LIMIT m OFFSET n` также поддерживается. 
 
-При отсутствии секции ORDER BY, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным.
+`n` и `m` должны быть неотрицательными целыми числами.
+
+При отсутствии секции `ORDER BY`, однозначно сортирующей результат, результат может быть произвольным и может являться недетерминированным.
 
 ### Секция UNION ALL
 
diff --git a/docs/tools/README.md b/docs/tools/README.md
index 9b4dd26dd14..6332a22f164 100644
--- a/docs/tools/README.md
+++ b/docs/tools/README.md
@@ -2,7 +2,7 @@
 
 ClickHouse documentation is built using [build.py](build.py) script that uses [mkdocs](https://www.mkdocs.org) library and it's dependencies to separately build all version of documentations (all languages in either single and multi page mode) as static HTMLs. The results are then put in correct directory structure. It can also generate PDF version.
 
-Finally [the infrustructure](../website) that builds ClickHouse [official website](https://clickhouse.yandex) just puts that directory structure into the same Docker container together with rest of website and deploys it to Yandex private cloud.
+[release.sh](release.sh) also pulls static files needed for [official ClickHouse website](https://clickhouse.yandex) from [../../website](../../website) folder, packs them alongside docs into Docker container and tries to deploy it (possible only from Yandex private network).
 
 ## How to check if the documentation will look fine?
 
diff --git a/docs/tools/build.py b/docs/tools/build.py
index e7e1c777b42..ff89b437ffc 100755
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@@ -21,6 +21,7 @@ from mkdocs import exceptions
 from mkdocs.commands import build as mkdocs_build
 
 from concatenate import concatenate
+from website import build_website, minify_website
 import mdx_clickhouse
 import test
 
@@ -96,7 +97,7 @@ def build_for_lang(lang, args):
             site_name=site_names.get(lang, site_names['en']),
             site_url='https://clickhouse.yandex/docs/%s/' % lang,
             docs_dir=os.path.join(args.docs_dir, lang),
-            site_dir=os.path.join(args.output_dir, lang),
+            site_dir=os.path.join(args.docs_output_dir, lang),
             strict=True,
             theme=theme_cfg,
             copyright='©2016–2019 Yandex LLC',
@@ -168,7 +169,7 @@ def build_single_page_version(lang, args, cfg):
 
                 mkdocs_build.build(cfg)
 
-                single_page_output_path = os.path.join(args.docs_dir, args.output_dir, lang, 'single')
+                single_page_output_path = os.path.join(args.docs_dir, args.docs_output_dir, lang, 'single')
 
                 if os.path.exists(single_page_output_path):
                     shutil.rmtree(single_page_output_path)
@@ -212,29 +213,40 @@ def build_redirects(args):
             to_path = '/docs/$1/' + to_path.replace('.md', '/')
             rewrites.append(' '.join(['rewrite', from_path, to_path, 'permanent;']))
 
-    with open(os.path.join(args.output_dir, 'redirects.conf'), 'w') as f:
+    with open(os.path.join(args.docs_output_dir, 'redirects.conf'), 'w') as f:
         f.write('\n'.join(rewrites))
 
 
 def build(args):
+    if os.path.exists(args.output_dir):
+        shutil.rmtree(args.output_dir)
+
+    if not args.skip_website:
+        build_website(args)
+
     for lang in args.lang.split(','):
         build_for_lang(lang, args)
 
     build_redirects(args)
 
+    if not args.skip_website:
+        minify_website(args)
 
 if __name__ == '__main__':
     arg_parser = argparse.ArgumentParser()
     arg_parser.add_argument('--lang', default='en,ru,zh,fa')
     arg_parser.add_argument('--docs-dir', default='.')
     arg_parser.add_argument('--theme-dir', default='mkdocs-material-theme')
+    arg_parser.add_argument('--website-dir', default=os.path.join('..', 'website'))
     arg_parser.add_argument('--output-dir', default='build')
     arg_parser.add_argument('--skip-single-page', action='store_true')
     arg_parser.add_argument('--skip-pdf', action='store_true')
+    arg_parser.add_argument('--skip-website', action='store_true')
     arg_parser.add_argument('--save-raw-single-page', type=str)
     arg_parser.add_argument('--verbose', action='store_true')
 
     args = arg_parser.parse_args()
+    args.docs_output_dir = os.path.join(args.output_dir, 'docs')
     os.chdir(os.path.join(os.path.dirname(__file__), '..'))
 
     logging.basicConfig(
diff --git a/website/release.sh b/docs/tools/release.sh
similarity index 87%
rename from website/release.sh
rename to docs/tools/release.sh
index 83e25563a57..e671dd8cea0 100755
--- a/website/release.sh
+++ b/docs/tools/release.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -ex
 BASE_DIR=$(dirname $(readlink -f $0))
-cd "${BASE_DIR}"
+BUILD_DIR="${BASE_DIR}/../build"
 IMAGE="clickhouse/website"
 if [[ -z "$1" ]]
 then
@@ -12,12 +12,12 @@ fi
 FULL_NAME="${IMAGE}:${TAG}"
 REMOTE_NAME="registry.yandex.net/${FULL_NAME}"
 DOCKER_HASH="$2"
-GULP="$BASE_DIR/node_modules/gulp/bin/gulp.js"
 if [[ -z "$1" ]]
 then
-    $GULP clean
-    $GULP build
-    docker build -t "${FULL_NAME}" "${BASE_DIR}"
+    source "${BASE_DIR}/venv/bin/activate"
+    python "${BASE_DIR}/build.py"
+    cd "${BUILD_DIR}"
+    docker build -t "${FULL_NAME}" "${BUILD_DIR}"
     docker tag "${FULL_NAME}" "${REMOTE_NAME}"
     DOCKER_HASH=$(docker push "${REMOTE_NAME}" | tail -1 | awk '{print $3;}')
     docker rmi "${FULL_NAME}"
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
index 85cd355dbdc..f0df3b8ff36 100644
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@@ -6,11 +6,14 @@ certifi==2017.11.5
 chardet==3.0.4
 click==6.7
 CommonMark==0.5.4
+cssmin==0.2.0
 docutils==0.14
 futures==3.2.0
+htmlmin==0.1.12
 idna==2.6
 imagesize==0.7.1
 Jinja2==2.10
+jsmin==2.2.2
 livereload==2.5.1
 Markdown==2.6.11
 MarkupSafe==1.0
@@ -18,7 +21,7 @@ mkdocs==1.0.4
 Pygments==2.2.0
 python-slugify==1.2.6
 pytz==2017.3
-PyYAML==4.2b1
+PyYAML==3.12
 recommonmark==0.4.0
 requests==2.21.0
 singledispatch==3.4.0.3
diff --git a/docs/tools/website.py b/docs/tools/website.py
new file mode 100644
index 00000000000..0605058a6c6
--- /dev/null
+++ b/docs/tools/website.py
@@ -0,0 +1,45 @@
+import logging
+import os
+import shutil
+
+import cssmin
+import htmlmin
+import jsmin
+
+def build_website(args):
+    logging.info('Building website')
+    shutil.copytree(
+        args.website_dir,
+        args.output_dir,
+        ignore=shutil.ignore_patterns(
+            '*.md',
+            '*.sh',
+            'build',
+            'docs',
+            'public',
+            'node_modules'
+        )
+    )
+
+def minify_website(args):
+    for root, _, filenames in os.walk(args.output_dir):
+        for filename in filenames:
+            path = os.path.join(root, filename)
+            if not (
+                filename.endswith('.html') or 
+                filename.endswith('.css') or 
+                filename.endswith('.js')
+            ):
+                continue
+
+            logging.info('Minifying %s', path)
+            with open(path, 'r') as f:
+                content = f.read().decode('utf-8')
+            if filename.endswith('.html'):
+                content = htmlmin.minify(content, remove_empty_space=False)
+            elif filename.endswith('.css'):
+                content = cssmin.cssmin(content)
+            elif filename.endswith('.js'):
+                content = jsmin.jsmin(content)
+            with open(path, 'w') as f:
+                f.write(content.encode('utf-8'))
diff --git a/utils/build/build_debian_unbundled.sh b/utils/build/build_debian_unbundled.sh
index 0d9ae74f169..41c951c4bae 100755
--- a/utils/build/build_debian_unbundled.sh
+++ b/utils/build/build_debian_unbundled.sh
@@ -22,5 +22,5 @@ env TEST_RUN=1 \
     `# Use all possible contrib libs from system` \
     `# psmisc - killall` \
     `# gdb - symbol test in pbuilder` \
-    EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev $EXTRAPACKAGES" \
+    EXTRAPACKAGES="psmisc libboost-program-options-dev libboost-system-dev libboost-filesystem-dev libboost-thread-dev libboost-regex-dev zlib1g-dev liblz4-dev libdouble-conversion-dev libsparsehash-dev librdkafka-dev libpoco-dev unixodbc-dev libsparsehash-dev libgoogle-perftools-dev libzstd-dev libre2-dev libunwind-dev googletest libcctz-dev libcapnp-dev libjemalloc-dev libssl-dev libunwind-dev libgsasl7-dev libxml2-dev libbrotli-dev libhyperscan-dev $EXTRAPACKAGES" \
     pdebuild --configfile $ROOT_DIR/debian/.pbuilderrc $PDEBUILD_OPT
diff --git a/website/Dockerfile b/website/Dockerfile
index b66e0c8da34..ee4f9ffccdc 100644
--- a/website/Dockerfile
+++ b/website/Dockerfile
@@ -1,4 +1,4 @@
 FROM nginx:mainline
-COPY public /usr/share/nginx/html
+COPY . /usr/share/nginx/html/public
 COPY nginx/nginx.conf /etc/nginx/nginx.conf
 COPY nginx/default.conf /etc/nginx/conf.d/default.conf
diff --git a/website/README.md b/website/README.md
index d6abca119c2..26bb1dceab5 100644
--- a/website/README.md
+++ b/website/README.md
@@ -1,15 +1,2 @@
-ClickHouse website quickstart:
+ClickHouse website is built alongside it's documentation via [docs/tools](https://github.com/yandex/ClickHouse/tree/master/docs/tools), see [README.md there](https://github.com/yandex/ClickHouse/tree/master/docs/tools/README.md).
 
-On Linux, do the following:
-```
-sudo apt-get install nodejs
-sudo ln -s /usr/bin/nodejs /usr/bin/node
-sudo npm install gulp-cli -g
-sudo npm install gulp -D
-```
-
-1. Make sure you have `npm`, `docker` and `python` installed and available in your `$PATH`.
-2. Run `setup\_gulp.sh` once to install build prerequisites via npm.
-3. Use `gulp build` to minify website to "public" subfolder or just `gulp` to run local webserver with livereload serving it (note: livereload browser extension is required to make it actually reload pages on edits automatically).
-4. There's Dockerfile that can be used to build and run ClickHouse website inside docker.
-5. Deployment to https://clickhouse.yandex/ is managed by `release.sh`, but it is only usable from inside Yandex private network.
diff --git a/website/gulpfile.js b/website/gulpfile.js
deleted file mode 100644
index ca254bf681f..00000000000
--- a/website/gulpfile.js
+++ /dev/null
@@ -1,154 +0,0 @@
-var gulp = require('gulp');
-var concat = require('gulp-concat');
-var uglify = require('gulp-uglify');
-var cleanCss = require('gulp-clean-css');
-var imagemin = require('gulp-imagemin');
-var sourcemaps = require('gulp-sourcemaps');
-var htmlmin = require('gulp-htmlmin');
-var minifyInline = require('gulp-minify-inline');
-var del = require('del');
-var connect = require('gulp-connect');
-var run = require('gulp-run');
-
-var outputDir = 'public';
-var docsDir = '../docs';
-
-var paths = {
-    htmls: [
-        '**/*.html',
-        '!deprecated/reference_ru.html',
-        '!deprecated/reference_en.html',
-        '!node_modules/**/*.html',
-        '!presentations/**/*.html',
-        '!public/**/*.html'],
-    reference: ['deprecated/reference_ru.html', 'deprecated/reference_en.html'],
-    docs: [docsDir + '/build/**/*'],
-    docstxt: ['docs/**/*.txt', 'docs/redirects.conf'],
-    docsjson: ['docs/**/*.json'],
-    docsxml: ['docs/**/*.xml'],
-    docspdf: ['docs/**/*.pdf'],
-    docssitemap: ['sitemap.xml', 'sitemap_static.xml'],
-    scripts: [
-        '**/*.js',
-        '!gulpfile.js',
-        '!node_modules/**/*.js',
-        '!presentations/**/*.js',
-        '!public/**/*.js'],
-    styles: [
-        '**/*.css',
-        '!node_modules/**/*.css',
-        '!presentations/**/*.css',
-        '!public/**/*.css'],
-    images: [
-        '**/*.{jpg,jpeg,png,gif,svg,ico}',
-        '!node_modules/**/*.{jpg,jpeg,png,gif,svg,ico}',
-        '!presentations/**/*.{jpg,jpeg,png,gif,svg,ico}',
-        '!public/**/*.{jpg,jpeg,png,gif,svg,ico}'],
-    robotstxt: ['robots.txt'],
-    presentations: ['presentations/**/*']
-};
-
-gulp.task('clean', function () {
-    return del([outputDir + '/**']);
-});
-
-gulp.task('reference', [], function () {
-    return gulp.src(paths.reference)
-        .pipe(minifyInline())
-        .pipe(gulp.dest(outputDir + '/deprecated'))
-});
-
-gulp.task('docs', [], function () {
-    run('cd ' + docsDir + '/tools; ./build.py');
-    return gulp.src(paths.docs)
-        .pipe(gulp.dest(outputDir + '/../docs'))
-});
-
-gulp.task('docstxt', ['docs'], function () {
-    return gulp.src(paths.docstxt)
-        .pipe(gulp.dest(outputDir + '/docs'))
-});
-
-gulp.task('docsjson', ['docs'], function () {
-    return gulp.src(paths.docsjson)
-        .pipe(gulp.dest(outputDir + '/docs'))
-});
-
-gulp.task('docsxml', ['docs'], function () {
-    return gulp.src(paths.docsxml)
-        .pipe(gulp.dest(outputDir + '/docs'))
-});
-
-gulp.task('docspdf', ['docs'], function () {
-    return gulp.src(paths.docspdf)
-        .pipe(gulp.dest(outputDir + '/docs'))
-});
-
-gulp.task('docssitemap', [], function () {
-    return gulp.src(paths.docssitemap)
-        .pipe(gulp.dest(outputDir + '/docs'))
-});
-
-gulp.task('presentations', [], function () {
-    return gulp.src(paths.presentations)
-        .pipe(gulp.dest(outputDir + '/presentations'))
-});
-
-gulp.task('robotstxt', [], function () {
-    return gulp.src(paths.robotstxt)
-        .pipe(gulp.dest(outputDir))
-});
-
-gulp.task('htmls', ['docs', 'docstxt', 'docsjson', 'docsxml', 'docspdf', 'docssitemap'], function () {
-    return gulp.src(paths.htmls)
-        .pipe(htmlmin({collapseWhitespace: true}))
-        .pipe(minifyInline())
-        .pipe(gulp.dest(outputDir))
-});
-
-gulp.task('sourcemaps', ['docs'], function () {
-    return gulp.src(paths.scripts)
-        .pipe(sourcemaps.init())
-        .pipe(uglify())
-        .pipe(sourcemaps.write())
-        .pipe(gulp.dest(outputDir))
-});
-
-gulp.task('scripts', ['docs'], function () {
-    return gulp.src(paths.scripts)
-        .pipe(uglify())
-        .pipe(gulp.dest(outputDir))
-});
-
-gulp.task('styles', ['docs'], function () {
-    return gulp.src(paths.styles)
-        .pipe(cleanCss())
-        .pipe(gulp.dest(outputDir))
-});
-
-gulp.task('images', ['docs'], function () {
-    return gulp.src(paths.images)
-        .pipe(imagemin({optimizationLevel: 9}))
-        .pipe(gulp.dest(outputDir))
-});
-
-gulp.task('watch', function () {
-    gulp.watch(paths.htmls, ['htmls']);
-    gulp.watch(paths.docs, ['docs']);
-    gulp.watch(paths.reference, ['reference']);
-    gulp.watch(paths.scripts, ['scripts']);
-    gulp.watch(paths.images, ['images']);
-});
-
-gulp.task('connect', function() {
-    connect.server({
-        root: outputDir,
-        port: 8080,
-        keepalive: true,
-        livereload: true
-    })
-});
-
-gulp.task('build', ['htmls', 'robotstxt', 'reference', 'scripts', 'styles', 'images', 'presentations']);
-
-gulp.task('default', ['build', 'connect']);
diff --git a/website/setup_gulp.sh b/website/setup_gulp.sh
deleted file mode 100755
index 06398ccc3e4..00000000000
--- a/website/setup_gulp.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/usr/bin/env bash
-set -ex
-grep require gulpfile.js | awk -F\' '{print $2;}' | xargs npm install