From f801772a862505650fb612137a14bf6f32cc3642 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 23 Sep 2022 13:35:22 +0200
Subject: [PATCH 001/262] 40907 Parameterized views as table functions

Implementation:
* Updated parsers by adding a bool allow_query_parameters while creating ordinary view, which is used in interpreters to allow query parameters in SELECT.
* Added a check in ActionsVisitor if multiple parameters have same names while creating parameterised view.
* Added bool in StorageView to represent parameterized view.
* Updated processing of SELECT with parameter values to check for views and added substitution of values in the query parameters.

Testing:
* Added a test tests/queries/0_stateless/02428_parameterized_view.sql

Documentation:
* Updated the english documentation for VIEW.
---
 .../sql-reference/statements/create/view.md   | 16 +++-
 src/Interpreters/ActionsVisitor.cpp           | 25 +++++-
 src/Interpreters/Context.cpp                  | 79 ++++++++++++++-----
 src/Interpreters/ExpressionAnalyzer.cpp       | 19 ++++-
 src/Interpreters/InterpreterSelectQuery.cpp   |  3 +
 src/Interpreters/QueryNormalizer.cpp          | 19 ++++-
 src/Interpreters/QueryNormalizer.h            |  3 +
 src/Parsers/ASTCreateQuery.cpp                |  7 ++
 src/Parsers/ASTCreateQuery.h                  |  4 +-
 src/Parsers/ASTSelectQuery.cpp                | 51 ++++++++++++
 src/Parsers/ASTSelectQuery.h                  |  7 +-
 src/Parsers/ASTSelectWithUnionQuery.cpp       | 22 ++++++
 src/Parsers/ASTSelectWithUnionQuery.h         |  6 ++
 src/Parsers/ExpressionListParsers.cpp         |  2 +-
 src/Parsers/ExpressionListParsers.h           |  6 ++
 src/Parsers/ParserCreateQuery.cpp             |  3 +
 src/Parsers/ParserSelectQuery.cpp             |  1 +
 src/Parsers/ParserSelectQuery.h               |  7 ++
 src/Parsers/ParserSelectWithUnionQuery.cpp    |  2 +-
 src/Parsers/ParserSelectWithUnionQuery.h      |  3 +
 src/Parsers/ParserUnionQueryElement.cpp       |  2 +-
 src/Parsers/ParserUnionQueryElement.h         |  7 ++
 src/Storages/StorageView.cpp                  | 19 ++++-
 src/Storages/StorageView.h                    |  6 ++
 .../02428_parameterized_view.reference        |  3 +
 .../0_stateless/02428_parameterized_view.sql  | 32 ++++++++
 26 files changed, 320 insertions(+), 34 deletions(-)
 create mode 100644 tests/queries/0_stateless/02428_parameterized_view.reference
 create mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index da68ca05bbb..a7b3f4ef762 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery:
 SELECT a, b, c FROM (SELECT ...)
 ```
 
+## Parameterized View
+This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes.
+
+``` sql
+CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
+```
+The above creates a view for table which can be used as table function by substituting value1 & value2 as show below.
+
+``` sql
+SELECT * FROM view(column1=value1, column2=value2 ...)
+```
+
 ## Materialized View
 
 ``` sql
@@ -74,9 +86,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
 This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
 :::
 
-```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
-```
+
 
 Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
 
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 54faf37f236..0ebc6857779 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -38,6 +38,7 @@
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/ASTQueryParameter.h>
 
 #include <Processors/QueryPlan/QueryPlan.h>
 
@@ -742,9 +743,29 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
         return NameAndTypePair(child_column_name, node->result_type);
 
     if (!data.only_consts)
-        throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
-                        ErrorCodes::UNKNOWN_IDENTIFIER);
+    {
+        bool has_query_parameter = false;
 
+        std::queue<ASTPtr> astQueue;
+        astQueue.push(ast);
+
+        while (!astQueue.empty())
+        {
+            auto current = astQueue.front();
+            astQueue.pop();
+
+            if (auto * ast_query_parameter = current->as<ASTQueryParameter>())
+                has_query_parameter = true;
+
+            for (auto astChild : current->children)
+                astQueue.push(astChild);
+        }
+
+        if (!has_query_parameter)
+            throw Exception(
+                "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
+                ErrorCodes::UNKNOWN_IDENTIFIER);
+    }
     return {};
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 39af21ef027..77c67822aa4 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -93,6 +93,8 @@
 #include <Interpreters/TransactionLog.h>
 #include <filesystem>
 #include <re2/re2.h>
+#include <Storages/StorageView.h>
+#include <Parsers/ASTFunction.h>
 
 #if USE_ROCKSDB
 #include <rocksdb/table.h>
@@ -136,6 +138,7 @@ namespace ErrorCodes
     extern const int INVALID_SETTING_VALUE;
     extern const int UNKNOWN_READ_METHOD;
     extern const int NOT_IMPLEMENTED;
+    extern const int UNKNOWN_FUNCTION;
 }
 
 
@@ -1129,32 +1132,72 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 
     if (!res)
     {
-        TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+        try
         {
-            const auto & insertion_table = getInsertionTable();
-            if (!insertion_table.empty())
+            TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
+            if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
             {
-                const auto & structure_hint
-                    = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                table_function_ptr->setStructureHint(structure_hint);
+                const auto & insertion_table = getInsertionTable();
+                if (!insertion_table.empty())
+                {
+                    const auto & structure_hint
+                        = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
+                    table_function_ptr->setStructureHint(structure_hint);
+                }
             }
-        }
 
-        res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+            res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
 
-        /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
-        ///
-        ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
-        ///
-        auto new_hash = table_expression->getTreeHash();
-        if (hash != new_hash)
+            /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
+            ///
+            ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
+            ///
+            auto new_hash = table_expression->getTreeHash();
+            if (hash != new_hash)
+            {
+                key = toString(new_hash.first) + '_' + toString(new_hash.second);
+                table_function_results[key] = res;
+            }
+
+            return res;
+        }catch (DB::Exception &table_function_exception)
         {
-            key = toString(new_hash.first) + '_' + toString(new_hash.second);
-            table_function_results[key] = res;
+            if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION)
+            {
+                if (auto ast_function = table_expression->as<ASTFunction>())
+                {
+                    try
+                    {
+                        res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext());
+                        if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
+                            return res;
+                        else
+                        {
+                            throw Exception(
+                                ErrorCodes::BAD_ARGUMENTS,
+                                "Not a parameterized view {}",
+                                ast_function->name);
+                        }
+                    }
+                    catch (DB::Exception &view_exception)
+                    {
+                        if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE)
+                            throw Exception(
+                                ErrorCodes::UNKNOWN_FUNCTION,
+                                "Unknown table function {}  OR Unknown parameterized view {}",
+                                table_function_exception.message(),
+                                view_exception.message());
+                        else
+                            throw;
+                    }
+                }
+                else
+                    throw;
+            }
+            else
+                throw;
         }
 
-        return res;
     }
 
     return res;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 9daa42bf499..d56cc47a34b 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1286,6 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     getRootActions(select_query->where(), only_types, step.actions());
 
+    if (select_query->allow_query_parameters && select_query->hasQueryParameters())
+        return true;
+
     auto where_column_name = select_query->where()->getColumnName();
     step.addRequiredOutput(where_column_name);
 
@@ -1902,10 +1905,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                     ExpressionActions(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
-                    auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
-                    /// If the filter column is a constant, record it.
-                    if (column_elem.column)
-                        where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+
+                    if (!(query.allow_query_parameters && query.hasQueryParameters()))
+                    {
+                        auto & column_elem
+                            = before_where_sample.getByName(query.where()->getColumnName());
+                        /// If the filter column is a constant, record it.
+                        if (column_elem.column)
+                            where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+                    }
                 }
             }
             chain.addStep();
@@ -2066,6 +2074,9 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & having_step_num,
     const ASTSelectQuery & query)
 {
+    if (query.allow_query_parameters && query.hasQueryParameters())
+        return;
+
     if (prewhere_step_num >= 0)
     {
         const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num);
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index a77882c85d2..88df37fca34 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -500,7 +500,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
         if (view)
+        {
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);
+            view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues());
+        }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
             query_ptr,
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 2a8b256c3d1..280904c9aa9 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -122,6 +122,16 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
     }
 }
 
+void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data)
+{
+    auto it_alias = data.aliases.find(node.name);
+    if (it_alias != data.aliases.end())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
+
+    data.aliases[node.name] =ast;
+}
+
+
 void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
 {
     /// normalize JOIN ON section
@@ -142,6 +152,8 @@ static bool needVisitChild(const ASTPtr & child)
 /// special visitChildren() for ASTSelectQuery
 void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
 {
+    data.allow_query_parameters = select.allow_query_parameters;
+
     for (auto & child : select.children)
     {
         if (needVisitChild(child))
@@ -257,7 +269,12 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
     else if (auto * node_select = ast->as<ASTSelectQuery>())
         visit(*node_select, ast, data);
     else if (auto * node_param = ast->as<ASTQueryParameter>())
-        throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
+    {
+        if (data.allow_query_parameters)
+            visit(*node_param, ast, data);
+        else
+            throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
+    }
     else if (auto * node_function = ast->as<ASTFunction>())
         if (node_function->parameters)
             visit(node_function->parameters, data);
diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h
index f532d869789..ffd2c46ca77 100644
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@@ -13,6 +13,7 @@ class ASTSelectQuery;
 class ASTIdentifier;
 struct ASTTablesInSelectQueryElement;
 class Context;
+class ASTQueryParameter;
 
 
 class QueryNormalizer
@@ -52,6 +53,7 @@ public:
 
         /// It's Ok to have "c + 1 AS c" in queries, but not in table definition
         const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
+        bool allow_query_parameters;
 
         Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
             : aliases(aliases_)
@@ -80,6 +82,7 @@ private:
     static void visit(ASTIdentifier &, ASTPtr &, Data &);
     static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &);
     static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
+    static void visit(ASTQueryParameter &, const ASTPtr &, Data &);
 
     static void visitChildren(IAST * node, Data & data);
 };
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index a277960643b..0fd7ca098e1 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -449,4 +449,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
     }
 }
 
+bool ASTCreateQuery::isParameterizedView() const
+{
+    if (is_ordinary_view && select && select->hasQueryParameters())
+        return true;
+    return false;
+}
+
 }
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index f3729b1523f..2a6da778211 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -24,7 +24,7 @@ public:
     IAST * sample_by = nullptr;
     IAST * ttl_table = nullptr;
     ASTSetQuery * settings = nullptr;
-
+    bool allow_query_parameters = false;
 
     String getID(char) const override { return "Storage definition"; }
 
@@ -120,6 +120,8 @@ public:
 
     bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; }
 
+    bool isParameterizedView() const;
+
     QueryKind getQueryKind() const override { return QueryKind::Create; }
 
 protected:
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 76849653b4e..b97c3dbc585 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -7,6 +7,7 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTLiteral.h>
 
 
 namespace DB
@@ -474,4 +475,54 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
     tables_element.table_expression->as<ASTTableExpression &>().final = true;
 }
 
+bool ASTSelectQuery::hasQueryParameters() const
+{
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto ast = queue.front();
+        queue.pop();
+
+        if (ast->as<ASTQueryParameter>())
+            return true;
+
+        for (auto child : ast->children)
+            queue.push(child);
+    }
+    return false;
+}
+
+NameToNameMap ASTSelectQuery::getQueryParameterValues() const
+{
+    NameToNameMap parameter_values;
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto ast = queue.front();
+        queue.pop();
+        if (auto expression_list = ast->as<ASTExpressionList>())
+        {
+            if (expression_list->children.size() == 2)
+            {
+                if (auto identifier = expression_list->children[0]->as<ASTIdentifier>())
+                {
+                    if (auto literal = expression_list->children[1]->as<ASTLiteral>())
+                    {
+
+                        parameter_values[identifier->name()] = toString(literal->value);
+                    }
+                }
+            }
+        }
+        for (auto child : ast->children)
+            queue.push(child);
+    }
+
+    return parameter_values;
+}
+
 }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 5e3af545f12..e8eed092472 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,7 +2,9 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
-
+#include <queue>
+#include <Parsers/ASTQueryParameter.h>
+#include <Core/Field.h>
 
 namespace DB
 {
@@ -88,6 +90,7 @@ public:
     bool group_by_with_constant_keys = false;
     bool group_by_with_grouping_sets = false;
     bool limit_with_ties = false;
+    bool allow_query_parameters = false;
 
     ASTPtr & refSelect()    { return getExpression(Expression::SELECT); }
     ASTPtr & refTables()    { return getExpression(Expression::TABLES); }
@@ -142,6 +145,8 @@ public:
     void setFinal();
 
     QueryKind getQueryKind() const override { return QueryKind::Select; }
+    bool hasQueryParameters() const;
+    NameToNameMap getQueryParameterValues() const;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index bc413fbe16d..11ac252aee2 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -86,4 +86,26 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const
         || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT);
 }
 
+bool ASTSelectWithUnionQuery::hasQueryParameters() const
+{
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto current = queue.front();
+        queue.pop();
+
+        if (auto * select = current->as<ASTSelectQuery>())
+        {
+            if (select->hasQueryParameters())
+                return true;
+        }
+
+        for (auto child : current->children)
+            queue.push(child);
+    }
+    return false;
+}
+
 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 457a3361b1e..ef8e50c47fd 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,6 +2,8 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
+#include <queue>
+#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {
@@ -31,6 +33,10 @@ public:
 
     /// Consider any mode other than ALL as non-default.
     bool hasNonDefaultUnionMode() const;
+
+    bool hasQueryParameters() const;
+
+
 };
 
 }
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 4e88e5c68e6..59b5b8b98cb 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -121,7 +121,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
 bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    ParserUnionQueryElement elem_parser;
+    ParserUnionQueryElement elem_parser(allow_query_parameters);
     ParserKeyword s_union_parser("UNION");
     ParserKeyword s_all_parser("ALL");
     ParserKeyword s_distinct_parser("DISTINCT");
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 05c7ec946ee..b0f6e66c213 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -84,6 +84,11 @@ private:
 class ParserUnionList : public IParserBase
 {
 public:
+    ParserUnionList(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+
     template <typename ElemFunc, typename SepFunc>
     static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator)
     {
@@ -108,6 +113,7 @@ public:
     }
 
     auto getUnionModes() const { return union_modes; }
+    bool allow_query_parameters;
 
 protected:
     const char * getName() const override { return "list of union elements"; }
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 08240abe8c6..26dcfd5079f 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1285,7 +1285,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         is_materialized_view = true;
     }
     else
+    {
         is_ordinary_view = true;
+        select_p.allow_query_parameters = true;
+    }
 
     if (!s_view.ignore(pos, expected))
         return false;
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index cf335270734..61381573421 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -34,6 +34,7 @@ namespace ErrorCodes
 bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto select_query = std::make_shared<ASTSelectQuery>();
+    select_query->allow_query_parameters = allow_query_parameters;
     node = select_query;
 
     ParserKeyword s_select("SELECT");
diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h
index deac25df57d..708b051e046 100644
--- a/src/Parsers/ParserSelectQuery.h
+++ b/src/Parsers/ParserSelectQuery.h
@@ -9,6 +9,13 @@ namespace DB
 
 class ParserSelectQuery : public IParserBase
 {
+public:
+    ParserSelectQuery(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+    bool allow_query_parameters;
+
 protected:
     const char * getName() const override { return "SELECT query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp
index 532a9e20735..39204ee457d 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.cpp
+++ b/src/Parsers/ParserSelectWithUnionQuery.cpp
@@ -10,7 +10,7 @@ namespace DB
 bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr list_node;
-    ParserUnionList parser;
+    ParserUnionList parser(allow_query_parameters);
 
     if (!parser.parse(pos, list_node, expected))
         return false;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h
index 0bf2946e429..6edf8a8d60e 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.h
+++ b/src/Parsers/ParserSelectWithUnionQuery.h
@@ -8,6 +8,9 @@ namespace DB
 
 class ParserSelectWithUnionQuery : public IParserBase
 {
+public:
+    bool allow_query_parameters = false;
+
 protected:
     const char * getName() const override { return "SELECT query, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp
index efd022e6362..0ddaa323404 100644
--- a/src/Parsers/ParserUnionQueryElement.cpp
+++ b/src/Parsers/ParserUnionQueryElement.cpp
@@ -10,7 +10,7 @@ namespace DB
 
 bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected))
+    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected))
         return false;
 
     if (const auto * ast_subquery = node->as<ASTSubquery>())
diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h
index 6b63c62c85b..a3fd47c496b 100644
--- a/src/Parsers/ParserUnionQueryElement.h
+++ b/src/Parsers/ParserUnionQueryElement.h
@@ -9,6 +9,13 @@ namespace DB
 
 class ParserUnionQueryElement : public IParserBase
 {
+public:
+    ParserUnionQueryElement(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+    bool allow_query_parameters;
+
 protected:
     const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index adaf1c4e404..32d0a08777d 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -23,6 +23,8 @@
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
+
 namespace DB
 {
 
@@ -99,6 +101,7 @@ StorageView::StorageView(
     SelectQueryDescription description;
 
     description.inner_query = query.select->ptr();
+    is_parameterized_view = query.isParameterizedView();
     storage_metadata.setSelectQuery(description);
     setInMemoryMetadata(storage_metadata);
 }
@@ -173,6 +176,15 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
     return select_element->table_expression->as<ASTTableExpression>();
 }
 
+void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values)
+{
+    if (is_parameterized_view)
+    {
+        ReplaceQueryParameterVisitor visitor(parameter_values);
+        visitor.visit(outer_query);
+    }
+}
+
 void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name)
 {
     ASTTableExpression * table_expression = getFirstTableExpression(outer_query);
@@ -185,8 +197,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
             auto table_function_name = table_expression->table_function->as<ASTFunction>()->name;
             if (table_function_name == "view" || table_function_name == "viewIfPermitted")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__view");
-            if (table_function_name == "merge")
+            else if (table_function_name == "merge")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__merge");
+            else
+                table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>(table_function_name);
+
         }
         if (!table_expression->database_and_table_name)
             throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR);
@@ -204,6 +219,8 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
     for (auto & child : table_expression->children)
         if (child.get() == view_name.get())
             child = view_query;
+        else if (child.get() && child->as<ASTFunction>() && child->as<ASTFunction>()->name == table_expression->table_function->as<ASTFunction>()->name)
+            child = view_query;
 }
 
 ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name)
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 31c96addd08..297847e83bf 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -19,6 +19,7 @@ public:
 
     std::string getName() const override { return "View"; }
     bool isView() const override { return true; }
+    bool isParameterizedView() const { return is_parameterized_view; }
 
     /// It is passed inside the query and solved at its level.
     bool supportsSampling() const override { return true; }
@@ -34,6 +35,8 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
+    void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values);
+
     static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot)
     {
         replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name);
@@ -41,6 +44,9 @@ public:
 
     static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name);
     static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name);
+
+protected:
+    bool is_parameterized_view;
 };
 
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
new file mode 100644
index 00000000000..d9afe5ff69c
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -0,0 +1,3 @@
+20
+50
+10
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
new file mode 100644
index 00000000000..0153ed95428
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS v1;
+DROP TABLE IF EXISTS Catalog;
+
+CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+
+INSERT INTO Catalog VALUES ('Pen', 10, 3);
+INSERT INTO Catalog VALUES ('Book', 50, 2);
+INSERT INTO Catalog VALUES ('Paper', 20, 1);
+
+CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
+SELECT Price FROM v1(price=20);
+
+SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION }
+
+CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10;
+SELECT Price FROM v10(price=10);  -- { serverError BAD_ARGUMENTS }
+
+
+CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
+SELECT Price FROM v2(price=50,quantity=2);
+
+SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
+
+CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3;
+SELECT Price FROM v3(price=10);
+
+CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS}
+
+DROP TABLE v1;
+DROP TABLE v2;
+DROP TABLE v3;
+DROP TABLE Catalog;

From bb451b2b27f444b98dd2b1def3e360e660f1216e Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 23 Sep 2022 13:35:22 +0200
Subject: [PATCH 002/262] 40907 Parameterized views as table functions

Implementation:
* Updated parsers by adding a bool allow_query_parameters while creating ordinary view, which is used in interpreters to allow query parameters in SELECT.
* Added a check in ActionsVisitor if multiple parameters have same names while creating parameterised view.
* Added bool in StorageView to represent parameterized view.
* Updated processing of SELECT with parameter values to check for views and added substitution of values in the query parameters.

Testing:
* Added a test tests/queries/0_stateless/02428_parameterized_view.sql

Documentation:
* Updated the english documentation for VIEW.
---
 .../sql-reference/statements/create/view.md   | 16 +++-
 src/Interpreters/ActionsVisitor.cpp           | 25 +++++-
 src/Interpreters/Context.cpp                  | 79 ++++++++++++++-----
 src/Interpreters/ExpressionAnalyzer.cpp       | 19 ++++-
 src/Interpreters/InterpreterSelectQuery.cpp   |  3 +
 src/Interpreters/QueryNormalizer.cpp          | 19 ++++-
 src/Interpreters/QueryNormalizer.h            |  3 +
 src/Parsers/ASTCreateQuery.cpp                |  7 ++
 src/Parsers/ASTCreateQuery.h                  |  4 +-
 src/Parsers/ASTSelectQuery.cpp                | 51 ++++++++++++
 src/Parsers/ASTSelectQuery.h                  |  7 +-
 src/Parsers/ASTSelectWithUnionQuery.cpp       | 22 ++++++
 src/Parsers/ASTSelectWithUnionQuery.h         |  6 ++
 src/Parsers/ExpressionListParsers.cpp         |  2 +-
 src/Parsers/ExpressionListParsers.h           |  6 ++
 src/Parsers/ParserCreateQuery.cpp             |  3 +
 src/Parsers/ParserSelectQuery.cpp             |  1 +
 src/Parsers/ParserSelectQuery.h               |  7 ++
 src/Parsers/ParserSelectWithUnionQuery.cpp    |  2 +-
 src/Parsers/ParserSelectWithUnionQuery.h      |  3 +
 src/Parsers/ParserUnionQueryElement.cpp       |  2 +-
 src/Parsers/ParserUnionQueryElement.h         |  7 ++
 src/Storages/StorageView.cpp                  | 19 ++++-
 src/Storages/StorageView.h                    |  6 ++
 .../02428_parameterized_view.reference        |  3 +
 .../0_stateless/02428_parameterized_view.sql  | 32 ++++++++
 26 files changed, 320 insertions(+), 34 deletions(-)
 create mode 100644 tests/queries/0_stateless/02428_parameterized_view.reference
 create mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index da68ca05bbb..a7b3f4ef762 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery:
 SELECT a, b, c FROM (SELECT ...)
 ```
 
+## Parameterized View
+This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes.
+
+``` sql
+CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
+```
+The above creates a view for table which can be used as table function by substituting value1 & value2 as show below.
+
+``` sql
+SELECT * FROM view(column1=value1, column2=value2 ...)
+```
+
 ## Materialized View
 
 ``` sql
@@ -74,9 +86,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
 This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
 :::
 
-```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
-```
+
 
 Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
 
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 54faf37f236..0ebc6857779 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -38,6 +38,7 @@
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/ASTQueryParameter.h>
 
 #include <Processors/QueryPlan/QueryPlan.h>
 
@@ -742,9 +743,29 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
         return NameAndTypePair(child_column_name, node->result_type);
 
     if (!data.only_consts)
-        throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
-                        ErrorCodes::UNKNOWN_IDENTIFIER);
+    {
+        bool has_query_parameter = false;
 
+        std::queue<ASTPtr> astQueue;
+        astQueue.push(ast);
+
+        while (!astQueue.empty())
+        {
+            auto current = astQueue.front();
+            astQueue.pop();
+
+            if (auto * ast_query_parameter = current->as<ASTQueryParameter>())
+                has_query_parameter = true;
+
+            for (auto astChild : current->children)
+                astQueue.push(astChild);
+        }
+
+        if (!has_query_parameter)
+            throw Exception(
+                "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
+                ErrorCodes::UNKNOWN_IDENTIFIER);
+    }
     return {};
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index be47338541c..e3276f6006d 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -94,6 +94,8 @@
 #include <Interpreters/TransactionLog.h>
 #include <filesystem>
 #include <re2/re2.h>
+#include <Storages/StorageView.h>
+#include <Parsers/ASTFunction.h>
 
 #if USE_ROCKSDB
 #include <rocksdb/table.h>
@@ -137,6 +139,7 @@ namespace ErrorCodes
     extern const int INVALID_SETTING_VALUE;
     extern const int UNKNOWN_READ_METHOD;
     extern const int NOT_IMPLEMENTED;
+    extern const int UNKNOWN_FUNCTION;
 }
 
 
@@ -1131,32 +1134,72 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 
     if (!res)
     {
-        TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+        try
         {
-            const auto & insertion_table = getInsertionTable();
-            if (!insertion_table.empty())
+            TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
+            if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
             {
-                const auto & structure_hint
-                    = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                table_function_ptr->setStructureHint(structure_hint);
+                const auto & insertion_table = getInsertionTable();
+                if (!insertion_table.empty())
+                {
+                    const auto & structure_hint
+                        = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
+                    table_function_ptr->setStructureHint(structure_hint);
+                }
             }
-        }
 
-        res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+            res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
 
-        /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
-        ///
-        ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
-        ///
-        auto new_hash = table_expression->getTreeHash();
-        if (hash != new_hash)
+            /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
+            ///
+            ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
+            ///
+            auto new_hash = table_expression->getTreeHash();
+            if (hash != new_hash)
+            {
+                key = toString(new_hash.first) + '_' + toString(new_hash.second);
+                table_function_results[key] = res;
+            }
+
+            return res;
+        }catch (DB::Exception &table_function_exception)
         {
-            key = toString(new_hash.first) + '_' + toString(new_hash.second);
-            table_function_results[key] = res;
+            if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION)
+            {
+                if (auto ast_function = table_expression->as<ASTFunction>())
+                {
+                    try
+                    {
+                        res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext());
+                        if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
+                            return res;
+                        else
+                        {
+                            throw Exception(
+                                ErrorCodes::BAD_ARGUMENTS,
+                                "Not a parameterized view {}",
+                                ast_function->name);
+                        }
+                    }
+                    catch (DB::Exception &view_exception)
+                    {
+                        if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE)
+                            throw Exception(
+                                ErrorCodes::UNKNOWN_FUNCTION,
+                                "Unknown table function {}  OR Unknown parameterized view {}",
+                                table_function_exception.message(),
+                                view_exception.message());
+                        else
+                            throw;
+                    }
+                }
+                else
+                    throw;
+            }
+            else
+                throw;
         }
 
-        return res;
     }
 
     return res;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 9daa42bf499..d56cc47a34b 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1286,6 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     getRootActions(select_query->where(), only_types, step.actions());
 
+    if (select_query->allow_query_parameters && select_query->hasQueryParameters())
+        return true;
+
     auto where_column_name = select_query->where()->getColumnName();
     step.addRequiredOutput(where_column_name);
 
@@ -1902,10 +1905,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                     ExpressionActions(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
-                    auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
-                    /// If the filter column is a constant, record it.
-                    if (column_elem.column)
-                        where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+
+                    if (!(query.allow_query_parameters && query.hasQueryParameters()))
+                    {
+                        auto & column_elem
+                            = before_where_sample.getByName(query.where()->getColumnName());
+                        /// If the filter column is a constant, record it.
+                        if (column_elem.column)
+                            where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+                    }
                 }
             }
             chain.addStep();
@@ -2066,6 +2074,9 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & having_step_num,
     const ASTSelectQuery & query)
 {
+    if (query.allow_query_parameters && query.hasQueryParameters())
+        return;
+
     if (prewhere_step_num >= 0)
     {
         const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num);
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index a77882c85d2..88df37fca34 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -500,7 +500,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
         if (view)
+        {
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);
+            view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues());
+        }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
             query_ptr,
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 2a8b256c3d1..280904c9aa9 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -122,6 +122,16 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
     }
 }
 
+void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data)
+{
+    auto it_alias = data.aliases.find(node.name);
+    if (it_alias != data.aliases.end())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
+
+    data.aliases[node.name] =ast;
+}
+
+
 void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
 {
     /// normalize JOIN ON section
@@ -142,6 +152,8 @@ static bool needVisitChild(const ASTPtr & child)
 /// special visitChildren() for ASTSelectQuery
 void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
 {
+    data.allow_query_parameters = select.allow_query_parameters;
+
     for (auto & child : select.children)
     {
         if (needVisitChild(child))
@@ -257,7 +269,12 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
     else if (auto * node_select = ast->as<ASTSelectQuery>())
         visit(*node_select, ast, data);
     else if (auto * node_param = ast->as<ASTQueryParameter>())
-        throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
+    {
+        if (data.allow_query_parameters)
+            visit(*node_param, ast, data);
+        else
+            throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
+    }
     else if (auto * node_function = ast->as<ASTFunction>())
         if (node_function->parameters)
             visit(node_function->parameters, data);
diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h
index f532d869789..ffd2c46ca77 100644
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@@ -13,6 +13,7 @@ class ASTSelectQuery;
 class ASTIdentifier;
 struct ASTTablesInSelectQueryElement;
 class Context;
+class ASTQueryParameter;
 
 
 class QueryNormalizer
@@ -52,6 +53,7 @@ public:
 
         /// It's Ok to have "c + 1 AS c" in queries, but not in table definition
         const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
+        bool allow_query_parameters;
 
         Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
             : aliases(aliases_)
@@ -80,6 +82,7 @@ private:
     static void visit(ASTIdentifier &, ASTPtr &, Data &);
     static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &);
     static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
+    static void visit(ASTQueryParameter &, const ASTPtr &, Data &);
 
     static void visitChildren(IAST * node, Data & data);
 };
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index a277960643b..0fd7ca098e1 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -449,4 +449,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
     }
 }
 
+bool ASTCreateQuery::isParameterizedView() const
+{
+    if (is_ordinary_view && select && select->hasQueryParameters())
+        return true;
+    return false;
+}
+
 }
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index f3729b1523f..2a6da778211 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -24,7 +24,7 @@ public:
     IAST * sample_by = nullptr;
     IAST * ttl_table = nullptr;
     ASTSetQuery * settings = nullptr;
-
+    bool allow_query_parameters = false;
 
     String getID(char) const override { return "Storage definition"; }
 
@@ -120,6 +120,8 @@ public:
 
     bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; }
 
+    bool isParameterizedView() const;
+
     QueryKind getQueryKind() const override { return QueryKind::Create; }
 
 protected:
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 76849653b4e..b97c3dbc585 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -7,6 +7,7 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTLiteral.h>
 
 
 namespace DB
@@ -474,4 +475,54 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
     tables_element.table_expression->as<ASTTableExpression &>().final = true;
 }
 
+bool ASTSelectQuery::hasQueryParameters() const
+{
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto ast = queue.front();
+        queue.pop();
+
+        if (ast->as<ASTQueryParameter>())
+            return true;
+
+        for (auto child : ast->children)
+            queue.push(child);
+    }
+    return false;
+}
+
+NameToNameMap ASTSelectQuery::getQueryParameterValues() const
+{
+    NameToNameMap parameter_values;
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto ast = queue.front();
+        queue.pop();
+        if (auto expression_list = ast->as<ASTExpressionList>())
+        {
+            if (expression_list->children.size() == 2)
+            {
+                if (auto identifier = expression_list->children[0]->as<ASTIdentifier>())
+                {
+                    if (auto literal = expression_list->children[1]->as<ASTLiteral>())
+                    {
+
+                        parameter_values[identifier->name()] = toString(literal->value);
+                    }
+                }
+            }
+        }
+        for (auto child : ast->children)
+            queue.push(child);
+    }
+
+    return parameter_values;
+}
+
 }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 5e3af545f12..e8eed092472 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,7 +2,9 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
-
+#include <queue>
+#include <Parsers/ASTQueryParameter.h>
+#include <Core/Field.h>
 
 namespace DB
 {
@@ -88,6 +90,7 @@ public:
     bool group_by_with_constant_keys = false;
     bool group_by_with_grouping_sets = false;
     bool limit_with_ties = false;
+    bool allow_query_parameters = false;
 
     ASTPtr & refSelect()    { return getExpression(Expression::SELECT); }
     ASTPtr & refTables()    { return getExpression(Expression::TABLES); }
@@ -142,6 +145,8 @@ public:
     void setFinal();
 
     QueryKind getQueryKind() const override { return QueryKind::Select; }
+    bool hasQueryParameters() const;
+    NameToNameMap getQueryParameterValues() const;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index bc413fbe16d..11ac252aee2 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -86,4 +86,26 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const
         || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT);
 }
 
+bool ASTSelectWithUnionQuery::hasQueryParameters() const
+{
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto current = queue.front();
+        queue.pop();
+
+        if (auto * select = current->as<ASTSelectQuery>())
+        {
+            if (select->hasQueryParameters())
+                return true;
+        }
+
+        for (auto child : current->children)
+            queue.push(child);
+    }
+    return false;
+}
+
 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 457a3361b1e..ef8e50c47fd 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,6 +2,8 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
+#include <queue>
+#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {
@@ -31,6 +33,10 @@ public:
 
     /// Consider any mode other than ALL as non-default.
     bool hasNonDefaultUnionMode() const;
+
+    bool hasQueryParameters() const;
+
+
 };
 
 }
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 4e88e5c68e6..59b5b8b98cb 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -121,7 +121,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
 bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    ParserUnionQueryElement elem_parser;
+    ParserUnionQueryElement elem_parser(allow_query_parameters);
     ParserKeyword s_union_parser("UNION");
     ParserKeyword s_all_parser("ALL");
     ParserKeyword s_distinct_parser("DISTINCT");
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 05c7ec946ee..b0f6e66c213 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -84,6 +84,11 @@ private:
 class ParserUnionList : public IParserBase
 {
 public:
+    ParserUnionList(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+
     template <typename ElemFunc, typename SepFunc>
     static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator)
     {
@@ -108,6 +113,7 @@ public:
     }
 
     auto getUnionModes() const { return union_modes; }
+    bool allow_query_parameters;
 
 protected:
     const char * getName() const override { return "list of union elements"; }
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 08240abe8c6..26dcfd5079f 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1285,7 +1285,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         is_materialized_view = true;
     }
     else
+    {
         is_ordinary_view = true;
+        select_p.allow_query_parameters = true;
+    }
 
     if (!s_view.ignore(pos, expected))
         return false;
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index cf335270734..61381573421 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -34,6 +34,7 @@ namespace ErrorCodes
 bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto select_query = std::make_shared<ASTSelectQuery>();
+    select_query->allow_query_parameters = allow_query_parameters;
     node = select_query;
 
     ParserKeyword s_select("SELECT");
diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h
index deac25df57d..708b051e046 100644
--- a/src/Parsers/ParserSelectQuery.h
+++ b/src/Parsers/ParserSelectQuery.h
@@ -9,6 +9,13 @@ namespace DB
 
 class ParserSelectQuery : public IParserBase
 {
+public:
+    ParserSelectQuery(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+    bool allow_query_parameters;
+
 protected:
     const char * getName() const override { return "SELECT query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp
index 532a9e20735..39204ee457d 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.cpp
+++ b/src/Parsers/ParserSelectWithUnionQuery.cpp
@@ -10,7 +10,7 @@ namespace DB
 bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr list_node;
-    ParserUnionList parser;
+    ParserUnionList parser(allow_query_parameters);
 
     if (!parser.parse(pos, list_node, expected))
         return false;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h
index 0bf2946e429..6edf8a8d60e 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.h
+++ b/src/Parsers/ParserSelectWithUnionQuery.h
@@ -8,6 +8,9 @@ namespace DB
 
 class ParserSelectWithUnionQuery : public IParserBase
 {
+public:
+    bool allow_query_parameters = false;
+
 protected:
     const char * getName() const override { return "SELECT query, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp
index efd022e6362..0ddaa323404 100644
--- a/src/Parsers/ParserUnionQueryElement.cpp
+++ b/src/Parsers/ParserUnionQueryElement.cpp
@@ -10,7 +10,7 @@ namespace DB
 
 bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected))
+    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected))
         return false;
 
     if (const auto * ast_subquery = node->as<ASTSubquery>())
diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h
index 6b63c62c85b..a3fd47c496b 100644
--- a/src/Parsers/ParserUnionQueryElement.h
+++ b/src/Parsers/ParserUnionQueryElement.h
@@ -9,6 +9,13 @@ namespace DB
 
 class ParserUnionQueryElement : public IParserBase
 {
+public:
+    ParserUnionQueryElement(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+    bool allow_query_parameters;
+
 protected:
     const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index adaf1c4e404..32d0a08777d 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -23,6 +23,8 @@
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
+
 namespace DB
 {
 
@@ -99,6 +101,7 @@ StorageView::StorageView(
     SelectQueryDescription description;
 
     description.inner_query = query.select->ptr();
+    is_parameterized_view = query.isParameterizedView();
     storage_metadata.setSelectQuery(description);
     setInMemoryMetadata(storage_metadata);
 }
@@ -173,6 +176,15 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
     return select_element->table_expression->as<ASTTableExpression>();
 }
 
+void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values)
+{
+    if (is_parameterized_view)
+    {
+        ReplaceQueryParameterVisitor visitor(parameter_values);
+        visitor.visit(outer_query);
+    }
+}
+
 void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name)
 {
     ASTTableExpression * table_expression = getFirstTableExpression(outer_query);
@@ -185,8 +197,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
             auto table_function_name = table_expression->table_function->as<ASTFunction>()->name;
             if (table_function_name == "view" || table_function_name == "viewIfPermitted")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__view");
-            if (table_function_name == "merge")
+            else if (table_function_name == "merge")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__merge");
+            else
+                table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>(table_function_name);
+
         }
         if (!table_expression->database_and_table_name)
             throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR);
@@ -204,6 +219,8 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
     for (auto & child : table_expression->children)
         if (child.get() == view_name.get())
             child = view_query;
+        else if (child.get() && child->as<ASTFunction>() && child->as<ASTFunction>()->name == table_expression->table_function->as<ASTFunction>()->name)
+            child = view_query;
 }
 
 ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name)
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 31c96addd08..297847e83bf 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -19,6 +19,7 @@ public:
 
     std::string getName() const override { return "View"; }
     bool isView() const override { return true; }
+    bool isParameterizedView() const { return is_parameterized_view; }
 
     /// It is passed inside the query and solved at its level.
     bool supportsSampling() const override { return true; }
@@ -34,6 +35,8 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
+    void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values);
+
     static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot)
     {
         replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name);
@@ -41,6 +44,9 @@ public:
 
     static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name);
     static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name);
+
+protected:
+    bool is_parameterized_view;
 };
 
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
new file mode 100644
index 00000000000..d9afe5ff69c
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -0,0 +1,3 @@
+20
+50
+10
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
new file mode 100644
index 00000000000..0153ed95428
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS v1;
+DROP TABLE IF EXISTS Catalog;
+
+CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+
+INSERT INTO Catalog VALUES ('Pen', 10, 3);
+INSERT INTO Catalog VALUES ('Book', 50, 2);
+INSERT INTO Catalog VALUES ('Paper', 20, 1);
+
+CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
+SELECT Price FROM v1(price=20);
+
+SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION }
+
+CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10;
+SELECT Price FROM v10(price=10);  -- { serverError BAD_ARGUMENTS }
+
+
+CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
+SELECT Price FROM v2(price=50,quantity=2);
+
+SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
+
+CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3;
+SELECT Price FROM v3(price=10);
+
+CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS}
+
+DROP TABLE v1;
+DROP TABLE v2;
+DROP TABLE v3;
+DROP TABLE Catalog;

From 456baddbc74a1e1d783647ee9779ad676da08171 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sat, 24 Sep 2022 12:01:18 +0200
Subject: [PATCH 003/262] 40907 Parameterized views as table functions
 Implementation * Fix for Build fails - updated conversion of Field to String
 and includes

---
 src/Parsers/ASTSelectQuery.cpp          | 5 ++++-
 src/Parsers/ASTSelectQuery.h            | 3 ---
 src/Parsers/ASTSelectWithUnionQuery.cpp | 2 ++
 src/Parsers/ASTSelectWithUnionQuery.h   | 2 --
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index b97c3dbc585..b187dc74f02 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -8,7 +8,10 @@
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTQueryParameter.h>
+#include <Common/FieldVisitorToString.h>
 
+#include <queue>
 
 namespace DB
 {
@@ -513,7 +516,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
                     if (auto literal = expression_list->children[1]->as<ASTLiteral>())
                     {
 
-                        parameter_values[identifier->name()] = toString(literal->value);
+                        parameter_values[identifier->name()] = convertFieldToString(literal->value);
                     }
                 }
             }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index e8eed092472..8ece05808f7 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,9 +2,6 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
-#include <queue>
-#include <Parsers/ASTQueryParameter.h>
-#include <Core/Field.h>
 
 namespace DB
 {
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 11ac252aee2..c38e4e2c747 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -3,7 +3,9 @@
 #include <Common/typeid_cast.h>
 #include <Parsers/SelectUnionMode.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTSelectQuery.h>
 
+#include <queue>
 #include <iostream>
 
 namespace DB
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index ef8e50c47fd..8d93760426a 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,8 +2,6 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
-#include <queue>
-#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {

From 4d414b69b86875dc72e3a458cb021389d33e476c Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 26 Sep 2022 10:30:28 +0200
Subject: [PATCH 004/262] 40907 Parameterized views as table functions
 Implementation * Fix for clang-today build fails - updated to use const
 reference in ASTSelectQuery.cpp

---
 src/Parsers/ASTSelectQuery.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index b187dc74f02..3be3f6c5c9a 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -507,13 +507,13 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
     {
         auto ast = queue.front();
         queue.pop();
-        if (auto expression_list = ast->as<ASTExpressionList>())
+        if (auto * expression_list = ast->as<ASTExpressionList>())
         {
             if (expression_list->children.size() == 2)
             {
-                if (auto identifier = expression_list->children[0]->as<ASTIdentifier>())
+                if (auto * identifier = expression_list->children[0]->as<ASTIdentifier>())
                 {
-                    if (auto literal = expression_list->children[1]->as<ASTLiteral>())
+                    if (auto * literal = expression_list->children[1]->as<ASTLiteral>())
                     {
 
                         parameter_values[identifier->name()] = convertFieldToString(literal->value);
@@ -521,7 +521,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
                 }
             }
         }
-        for (auto child : ast->children)
+        for (const auto & child : ast->children)
             queue.push(child);
     }
 

From 4fb1dffb9e04d44e9074d7cc8012d84cc96fc662 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 26 Sep 2022 13:08:54 +0200
Subject: [PATCH 005/262] 40907 Parameterized views as table functions
 Implementation * Fix for clang-today build fails - updated to use const
 reference in ASTSelectQuery.cpp & ASTSelectWithUnionQuery.cpp

---
 src/Parsers/ASTSelectQuery.cpp          | 2 +-
 src/Parsers/ASTSelectWithUnionQuery.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 3be3f6c5c9a..7537628b3a5 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -491,7 +491,7 @@ bool ASTSelectQuery::hasQueryParameters() const
         if (ast->as<ASTQueryParameter>())
             return true;
 
-        for (auto child : ast->children)
+        for (const auto & child : ast->children)
             queue.push(child);
     }
     return false;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index c38e4e2c747..76fe9582615 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -104,7 +104,7 @@ bool ASTSelectWithUnionQuery::hasQueryParameters() const
                 return true;
         }
 
-        for (auto child : current->children)
+        for (const auto & child : current->children)
             queue.push(child);
     }
     return false;

From 038352bfce1a24c1620067f4f5d2a160e69aea8b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 23 Sep 2022 13:35:22 +0200
Subject: [PATCH 006/262] 40907 Parameterized views as table functions

Implementation:
* Updated parsers by adding a bool allow_query_parameters while creating ordinary view, which is used in interpreters to allow query parameters in SELECT.
* Added a check in ActionsVisitor if multiple parameters have same names while creating parameterised view.
* Added bool in StorageView to represent parameterized view.
* Updated processing of SELECT with parameter values to check for views and added substitution of values in the query parameters.

Testing:
* Added a test tests/queries/0_stateless/02428_parameterized_view.sql

Documentation:
* Updated the english documentation for VIEW.
---
 .../sql-reference/statements/create/view.md   | 16 +++-
 src/Interpreters/ActionsVisitor.cpp           | 25 +++++-
 src/Interpreters/Context.cpp                  | 79 ++++++++++++++-----
 src/Interpreters/ExpressionAnalyzer.cpp       | 19 ++++-
 src/Interpreters/InterpreterSelectQuery.cpp   |  3 +
 src/Interpreters/QueryNormalizer.cpp          | 19 ++++-
 src/Interpreters/QueryNormalizer.h            |  3 +
 src/Parsers/ASTCreateQuery.cpp                |  7 ++
 src/Parsers/ASTCreateQuery.h                  |  4 +-
 src/Parsers/ASTSelectQuery.cpp                | 51 ++++++++++++
 src/Parsers/ASTSelectQuery.h                  |  7 +-
 src/Parsers/ASTSelectWithUnionQuery.cpp       | 22 ++++++
 src/Parsers/ASTSelectWithUnionQuery.h         |  6 ++
 src/Parsers/ExpressionListParsers.cpp         |  2 +-
 src/Parsers/ExpressionListParsers.h           |  6 ++
 src/Parsers/ParserCreateQuery.cpp             |  3 +
 src/Parsers/ParserSelectQuery.cpp             |  1 +
 src/Parsers/ParserSelectQuery.h               |  7 ++
 src/Parsers/ParserSelectWithUnionQuery.cpp    |  2 +-
 src/Parsers/ParserSelectWithUnionQuery.h      |  3 +
 src/Parsers/ParserUnionQueryElement.cpp       |  2 +-
 src/Parsers/ParserUnionQueryElement.h         |  7 ++
 src/Storages/StorageView.cpp                  | 19 ++++-
 src/Storages/StorageView.h                    |  6 ++
 .../02428_parameterized_view.reference        |  3 +
 .../0_stateless/02428_parameterized_view.sql  | 32 ++++++++
 26 files changed, 320 insertions(+), 34 deletions(-)
 create mode 100644 tests/queries/0_stateless/02428_parameterized_view.reference
 create mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index da68ca05bbb..a7b3f4ef762 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery:
 SELECT a, b, c FROM (SELECT ...)
 ```
 
+## Parameterized View
+This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes.
+
+``` sql
+CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
+```
+The above creates a view for table which can be used as table function by substituting value1 & value2 as show below.
+
+``` sql
+SELECT * FROM view(column1=value1, column2=value2 ...)
+```
+
 ## Materialized View
 
 ``` sql
@@ -74,9 +86,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
 This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
 :::
 
-```sql
-CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
-```
+
 
 Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
 
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 54faf37f236..0ebc6857779 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -38,6 +38,7 @@
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTSubquery.h>
 #include <Parsers/ASTTablesInSelectQuery.h>
+#include <Parsers/ASTQueryParameter.h>
 
 #include <Processors/QueryPlan/QueryPlan.h>
 
@@ -742,9 +743,29 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
         return NameAndTypePair(child_column_name, node->result_type);
 
     if (!data.only_consts)
-        throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
-                        ErrorCodes::UNKNOWN_IDENTIFIER);
+    {
+        bool has_query_parameter = false;
 
+        std::queue<ASTPtr> astQueue;
+        astQueue.push(ast);
+
+        while (!astQueue.empty())
+        {
+            auto current = astQueue.front();
+            astQueue.pop();
+
+            if (auto * ast_query_parameter = current->as<ASTQueryParameter>())
+                has_query_parameter = true;
+
+            for (auto astChild : current->children)
+                astQueue.push(astChild);
+        }
+
+        if (!has_query_parameter)
+            throw Exception(
+                "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
+                ErrorCodes::UNKNOWN_IDENTIFIER);
+    }
     return {};
 }
 
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index be47338541c..e3276f6006d 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -94,6 +94,8 @@
 #include <Interpreters/TransactionLog.h>
 #include <filesystem>
 #include <re2/re2.h>
+#include <Storages/StorageView.h>
+#include <Parsers/ASTFunction.h>
 
 #if USE_ROCKSDB
 #include <rocksdb/table.h>
@@ -137,6 +139,7 @@ namespace ErrorCodes
     extern const int INVALID_SETTING_VALUE;
     extern const int UNKNOWN_READ_METHOD;
     extern const int NOT_IMPLEMENTED;
+    extern const int UNKNOWN_FUNCTION;
 }
 
 
@@ -1131,32 +1134,72 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 
     if (!res)
     {
-        TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+        try
         {
-            const auto & insertion_table = getInsertionTable();
-            if (!insertion_table.empty())
+            TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
+            if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
             {
-                const auto & structure_hint
-                    = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                table_function_ptr->setStructureHint(structure_hint);
+                const auto & insertion_table = getInsertionTable();
+                if (!insertion_table.empty())
+                {
+                    const auto & structure_hint
+                        = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
+                    table_function_ptr->setStructureHint(structure_hint);
+                }
             }
-        }
 
-        res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+            res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
 
-        /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
-        ///
-        ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
-        ///
-        auto new_hash = table_expression->getTreeHash();
-        if (hash != new_hash)
+            /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
+            ///
+            ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
+            ///
+            auto new_hash = table_expression->getTreeHash();
+            if (hash != new_hash)
+            {
+                key = toString(new_hash.first) + '_' + toString(new_hash.second);
+                table_function_results[key] = res;
+            }
+
+            return res;
+        }catch (DB::Exception &table_function_exception)
         {
-            key = toString(new_hash.first) + '_' + toString(new_hash.second);
-            table_function_results[key] = res;
+            if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION)
+            {
+                if (auto ast_function = table_expression->as<ASTFunction>())
+                {
+                    try
+                    {
+                        res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext());
+                        if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
+                            return res;
+                        else
+                        {
+                            throw Exception(
+                                ErrorCodes::BAD_ARGUMENTS,
+                                "Not a parameterized view {}",
+                                ast_function->name);
+                        }
+                    }
+                    catch (DB::Exception &view_exception)
+                    {
+                        if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE)
+                            throw Exception(
+                                ErrorCodes::UNKNOWN_FUNCTION,
+                                "Unknown table function {}  OR Unknown parameterized view {}",
+                                table_function_exception.message(),
+                                view_exception.message());
+                        else
+                            throw;
+                    }
+                }
+                else
+                    throw;
+            }
+            else
+                throw;
         }
 
-        return res;
     }
 
     return res;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 9daa42bf499..d56cc47a34b 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1286,6 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     getRootActions(select_query->where(), only_types, step.actions());
 
+    if (select_query->allow_query_parameters && select_query->hasQueryParameters())
+        return true;
+
     auto where_column_name = select_query->where()->getColumnName();
     step.addRequiredOutput(where_column_name);
 
@@ -1902,10 +1905,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                     ExpressionActions(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
-                    auto & column_elem = before_where_sample.getByName(query.where()->getColumnName());
-                    /// If the filter column is a constant, record it.
-                    if (column_elem.column)
-                        where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+
+                    if (!(query.allow_query_parameters && query.hasQueryParameters()))
+                    {
+                        auto & column_elem
+                            = before_where_sample.getByName(query.where()->getColumnName());
+                        /// If the filter column is a constant, record it.
+                        if (column_elem.column)
+                            where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
+                    }
                 }
             }
             chain.addStep();
@@ -2066,6 +2074,9 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & having_step_num,
     const ASTSelectQuery & query)
 {
+    if (query.allow_query_parameters && query.hasQueryParameters())
+        return;
+
     if (prewhere_step_num >= 0)
     {
         const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num);
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index a77882c85d2..88df37fca34 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -500,7 +500,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
         if (view)
+        {
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);
+            view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues());
+        }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
             query_ptr,
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 2a8b256c3d1..280904c9aa9 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -122,6 +122,16 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
     }
 }
 
+void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data)
+{
+    auto it_alias = data.aliases.find(node.name);
+    if (it_alias != data.aliases.end())
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
+
+    data.aliases[node.name] =ast;
+}
+
+
 void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
 {
     /// normalize JOIN ON section
@@ -142,6 +152,8 @@ static bool needVisitChild(const ASTPtr & child)
 /// special visitChildren() for ASTSelectQuery
 void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
 {
+    data.allow_query_parameters = select.allow_query_parameters;
+
     for (auto & child : select.children)
     {
         if (needVisitChild(child))
@@ -257,7 +269,12 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
     else if (auto * node_select = ast->as<ASTSelectQuery>())
         visit(*node_select, ast, data);
     else if (auto * node_param = ast->as<ASTQueryParameter>())
-        throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
+    {
+        if (data.allow_query_parameters)
+            visit(*node_param, ast, data);
+        else
+            throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
+    }
     else if (auto * node_function = ast->as<ASTFunction>())
         if (node_function->parameters)
             visit(node_function->parameters, data);
diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h
index f532d869789..ffd2c46ca77 100644
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@@ -13,6 +13,7 @@ class ASTSelectQuery;
 class ASTIdentifier;
 struct ASTTablesInSelectQueryElement;
 class Context;
+class ASTQueryParameter;
 
 
 class QueryNormalizer
@@ -52,6 +53,7 @@ public:
 
         /// It's Ok to have "c + 1 AS c" in queries, but not in table definition
         const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
+        bool allow_query_parameters;
 
         Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
             : aliases(aliases_)
@@ -80,6 +82,7 @@ private:
     static void visit(ASTIdentifier &, ASTPtr &, Data &);
     static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &);
     static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
+    static void visit(ASTQueryParameter &, const ASTPtr &, Data &);
 
     static void visitChildren(IAST * node, Data & data);
 };
diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp
index a277960643b..0fd7ca098e1 100644
--- a/src/Parsers/ASTCreateQuery.cpp
+++ b/src/Parsers/ASTCreateQuery.cpp
@@ -449,4 +449,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat
     }
 }
 
+bool ASTCreateQuery::isParameterizedView() const
+{
+    if (is_ordinary_view && select && select->hasQueryParameters())
+        return true;
+    return false;
+}
+
 }
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index f3729b1523f..2a6da778211 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -24,7 +24,7 @@ public:
     IAST * sample_by = nullptr;
     IAST * ttl_table = nullptr;
     ASTSetQuery * settings = nullptr;
-
+    bool allow_query_parameters = false;
 
     String getID(char) const override { return "Storage definition"; }
 
@@ -120,6 +120,8 @@ public:
 
     bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; }
 
+    bool isParameterizedView() const;
+
     QueryKind getQueryKind() const override { return QueryKind::Create; }
 
 protected:
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 76849653b4e..b97c3dbc585 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -7,6 +7,7 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTLiteral.h>
 
 
 namespace DB
@@ -474,4 +475,54 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
     tables_element.table_expression->as<ASTTableExpression &>().final = true;
 }
 
+bool ASTSelectQuery::hasQueryParameters() const
+{
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto ast = queue.front();
+        queue.pop();
+
+        if (ast->as<ASTQueryParameter>())
+            return true;
+
+        for (auto child : ast->children)
+            queue.push(child);
+    }
+    return false;
+}
+
+NameToNameMap ASTSelectQuery::getQueryParameterValues() const
+{
+    NameToNameMap parameter_values;
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto ast = queue.front();
+        queue.pop();
+        if (auto expression_list = ast->as<ASTExpressionList>())
+        {
+            if (expression_list->children.size() == 2)
+            {
+                if (auto identifier = expression_list->children[0]->as<ASTIdentifier>())
+                {
+                    if (auto literal = expression_list->children[1]->as<ASTLiteral>())
+                    {
+
+                        parameter_values[identifier->name()] = toString(literal->value);
+                    }
+                }
+            }
+        }
+        for (auto child : ast->children)
+            queue.push(child);
+    }
+
+    return parameter_values;
+}
+
 }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 5e3af545f12..e8eed092472 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,7 +2,9 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
-
+#include <queue>
+#include <Parsers/ASTQueryParameter.h>
+#include <Core/Field.h>
 
 namespace DB
 {
@@ -88,6 +90,7 @@ public:
     bool group_by_with_constant_keys = false;
     bool group_by_with_grouping_sets = false;
     bool limit_with_ties = false;
+    bool allow_query_parameters = false;
 
     ASTPtr & refSelect()    { return getExpression(Expression::SELECT); }
     ASTPtr & refTables()    { return getExpression(Expression::TABLES); }
@@ -142,6 +145,8 @@ public:
     void setFinal();
 
     QueryKind getQueryKind() const override { return QueryKind::Select; }
+    bool hasQueryParameters() const;
+    NameToNameMap getQueryParameterValues() const;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index bc413fbe16d..11ac252aee2 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -86,4 +86,26 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const
         || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT);
 }
 
+bool ASTSelectWithUnionQuery::hasQueryParameters() const
+{
+    std::queue<ASTPtr> queue;
+    queue.push(this->clone());
+
+    while (!queue.empty())
+    {
+        auto current = queue.front();
+        queue.pop();
+
+        if (auto * select = current->as<ASTSelectQuery>())
+        {
+            if (select->hasQueryParameters())
+                return true;
+        }
+
+        for (auto child : current->children)
+            queue.push(child);
+    }
+    return false;
+}
+
 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 457a3361b1e..ef8e50c47fd 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,6 +2,8 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
+#include <queue>
+#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {
@@ -31,6 +33,10 @@ public:
 
     /// Consider any mode other than ALL as non-default.
     bool hasNonDefaultUnionMode() const;
+
+    bool hasQueryParameters() const;
+
+
 };
 
 }
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 4e88e5c68e6..59b5b8b98cb 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -121,7 +121,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
 bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    ParserUnionQueryElement elem_parser;
+    ParserUnionQueryElement elem_parser(allow_query_parameters);
     ParserKeyword s_union_parser("UNION");
     ParserKeyword s_all_parser("ALL");
     ParserKeyword s_distinct_parser("DISTINCT");
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 05c7ec946ee..b0f6e66c213 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -84,6 +84,11 @@ private:
 class ParserUnionList : public IParserBase
 {
 public:
+    ParserUnionList(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+
     template <typename ElemFunc, typename SepFunc>
     static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator)
     {
@@ -108,6 +113,7 @@ public:
     }
 
     auto getUnionModes() const { return union_modes; }
+    bool allow_query_parameters;
 
 protected:
     const char * getName() const override { return "list of union elements"; }
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 08240abe8c6..26dcfd5079f 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1285,7 +1285,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         is_materialized_view = true;
     }
     else
+    {
         is_ordinary_view = true;
+        select_p.allow_query_parameters = true;
+    }
 
     if (!s_view.ignore(pos, expected))
         return false;
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index cf335270734..61381573421 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -34,6 +34,7 @@ namespace ErrorCodes
 bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto select_query = std::make_shared<ASTSelectQuery>();
+    select_query->allow_query_parameters = allow_query_parameters;
     node = select_query;
 
     ParserKeyword s_select("SELECT");
diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h
index deac25df57d..708b051e046 100644
--- a/src/Parsers/ParserSelectQuery.h
+++ b/src/Parsers/ParserSelectQuery.h
@@ -9,6 +9,13 @@ namespace DB
 
 class ParserSelectQuery : public IParserBase
 {
+public:
+    ParserSelectQuery(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+    bool allow_query_parameters;
+
 protected:
     const char * getName() const override { return "SELECT query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp
index 532a9e20735..39204ee457d 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.cpp
+++ b/src/Parsers/ParserSelectWithUnionQuery.cpp
@@ -10,7 +10,7 @@ namespace DB
 bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr list_node;
-    ParserUnionList parser;
+    ParserUnionList parser(allow_query_parameters);
 
     if (!parser.parse(pos, list_node, expected))
         return false;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h
index 0bf2946e429..6edf8a8d60e 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.h
+++ b/src/Parsers/ParserSelectWithUnionQuery.h
@@ -8,6 +8,9 @@ namespace DB
 
 class ParserSelectWithUnionQuery : public IParserBase
 {
+public:
+    bool allow_query_parameters = false;
+
 protected:
     const char * getName() const override { return "SELECT query, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp
index efd022e6362..0ddaa323404 100644
--- a/src/Parsers/ParserUnionQueryElement.cpp
+++ b/src/Parsers/ParserUnionQueryElement.cpp
@@ -10,7 +10,7 @@ namespace DB
 
 bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected))
+    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected))
         return false;
 
     if (const auto * ast_subquery = node->as<ASTSubquery>())
diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h
index 6b63c62c85b..a3fd47c496b 100644
--- a/src/Parsers/ParserUnionQueryElement.h
+++ b/src/Parsers/ParserUnionQueryElement.h
@@ -9,6 +9,13 @@ namespace DB
 
 class ParserUnionQueryElement : public IParserBase
 {
+public:
+    ParserUnionQueryElement(bool allow_query_parameters_=false)
+        : allow_query_parameters(allow_query_parameters_)
+    {
+    }
+    bool allow_query_parameters;
+
 protected:
     const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index adaf1c4e404..32d0a08777d 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -23,6 +23,8 @@
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
+#include <Interpreters/ReplaceQueryParameterVisitor.h>
+
 namespace DB
 {
 
@@ -99,6 +101,7 @@ StorageView::StorageView(
     SelectQueryDescription description;
 
     description.inner_query = query.select->ptr();
+    is_parameterized_view = query.isParameterizedView();
     storage_metadata.setSelectQuery(description);
     setInMemoryMetadata(storage_metadata);
 }
@@ -173,6 +176,15 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
     return select_element->table_expression->as<ASTTableExpression>();
 }
 
+void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values)
+{
+    if (is_parameterized_view)
+    {
+        ReplaceQueryParameterVisitor visitor(parameter_values);
+        visitor.visit(outer_query);
+    }
+}
+
 void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name)
 {
     ASTTableExpression * table_expression = getFirstTableExpression(outer_query);
@@ -185,8 +197,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
             auto table_function_name = table_expression->table_function->as<ASTFunction>()->name;
             if (table_function_name == "view" || table_function_name == "viewIfPermitted")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__view");
-            if (table_function_name == "merge")
+            else if (table_function_name == "merge")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__merge");
+            else
+                table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>(table_function_name);
+
         }
         if (!table_expression->database_and_table_name)
             throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR);
@@ -204,6 +219,8 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
     for (auto & child : table_expression->children)
         if (child.get() == view_name.get())
             child = view_query;
+        else if (child.get() && child->as<ASTFunction>() && child->as<ASTFunction>()->name == table_expression->table_function->as<ASTFunction>()->name)
+            child = view_query;
 }
 
 ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name)
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 31c96addd08..297847e83bf 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -19,6 +19,7 @@ public:
 
     std::string getName() const override { return "View"; }
     bool isView() const override { return true; }
+    bool isParameterizedView() const { return is_parameterized_view; }
 
     /// It is passed inside the query and solved at its level.
     bool supportsSampling() const override { return true; }
@@ -34,6 +35,8 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
+    void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values);
+
     static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot)
     {
         replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name);
@@ -41,6 +44,9 @@ public:
 
     static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name);
     static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name);
+
+protected:
+    bool is_parameterized_view;
 };
 
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
new file mode 100644
index 00000000000..d9afe5ff69c
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -0,0 +1,3 @@
+20
+50
+10
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
new file mode 100644
index 00000000000..0153ed95428
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -0,0 +1,32 @@
+DROP TABLE IF EXISTS v1;
+DROP TABLE IF EXISTS Catalog;
+
+CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+
+INSERT INTO Catalog VALUES ('Pen', 10, 3);
+INSERT INTO Catalog VALUES ('Book', 50, 2);
+INSERT INTO Catalog VALUES ('Paper', 20, 1);
+
+CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
+SELECT Price FROM v1(price=20);
+
+SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION }
+
+CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10;
+SELECT Price FROM v10(price=10);  -- { serverError BAD_ARGUMENTS }
+
+
+CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
+SELECT Price FROM v2(price=50,quantity=2);
+
+SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
+
+CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3;
+SELECT Price FROM v3(price=10);
+
+CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS}
+
+DROP TABLE v1;
+DROP TABLE v2;
+DROP TABLE v3;
+DROP TABLE Catalog;

From 1e3be976adb3d75fb927d2bb93196f4f2d21ef7d Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Sat, 24 Sep 2022 12:01:18 +0200
Subject: [PATCH 007/262] 40907 Parameterized views as table functions
 Implementation * Fix for Build fails - updated conversion of Field to String
 and includes

---
 src/Parsers/ASTSelectQuery.cpp          | 5 ++++-
 src/Parsers/ASTSelectQuery.h            | 3 ---
 src/Parsers/ASTSelectWithUnionQuery.cpp | 2 ++
 src/Parsers/ASTSelectWithUnionQuery.h   | 2 --
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index b97c3dbc585..b187dc74f02 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -8,7 +8,10 @@
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTQueryParameter.h>
+#include <Common/FieldVisitorToString.h>
 
+#include <queue>
 
 namespace DB
 {
@@ -513,7 +516,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
                     if (auto literal = expression_list->children[1]->as<ASTLiteral>())
                     {
 
-                        parameter_values[identifier->name()] = toString(literal->value);
+                        parameter_values[identifier->name()] = convertFieldToString(literal->value);
                     }
                 }
             }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index e8eed092472..8ece05808f7 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,9 +2,6 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
-#include <queue>
-#include <Parsers/ASTQueryParameter.h>
-#include <Core/Field.h>
 
 namespace DB
 {
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 11ac252aee2..c38e4e2c747 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -3,7 +3,9 @@
 #include <Common/typeid_cast.h>
 #include <Parsers/SelectUnionMode.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTSelectQuery.h>
 
+#include <queue>
 #include <iostream>
 
 namespace DB
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index ef8e50c47fd..8d93760426a 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,8 +2,6 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
-#include <queue>
-#include <Parsers/ASTSelectQuery.h>
 
 namespace DB
 {

From 21de85d67cc9de7942317e1c33f1d58bc551045e Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 26 Sep 2022 10:30:28 +0200
Subject: [PATCH 008/262] 40907 Parameterized views as table functions
 Implementation * Fix for clang-today build fails - updated to use const
 reference in ASTSelectQuery.cpp

---
 src/Parsers/ASTSelectQuery.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index b187dc74f02..3be3f6c5c9a 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -507,13 +507,13 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
     {
         auto ast = queue.front();
         queue.pop();
-        if (auto expression_list = ast->as<ASTExpressionList>())
+        if (auto * expression_list = ast->as<ASTExpressionList>())
         {
             if (expression_list->children.size() == 2)
             {
-                if (auto identifier = expression_list->children[0]->as<ASTIdentifier>())
+                if (auto * identifier = expression_list->children[0]->as<ASTIdentifier>())
                 {
-                    if (auto literal = expression_list->children[1]->as<ASTLiteral>())
+                    if (auto * literal = expression_list->children[1]->as<ASTLiteral>())
                     {
 
                         parameter_values[identifier->name()] = convertFieldToString(literal->value);
@@ -521,7 +521,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
                 }
             }
         }
-        for (auto child : ast->children)
+        for (const auto & child : ast->children)
             queue.push(child);
     }
 

From 8b3c4ac50471d76bf0a7c3ae67b6bc70b288d63e Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 26 Sep 2022 13:08:54 +0200
Subject: [PATCH 009/262] 40907 Parameterized views as table functions
 Implementation * Fix for clang-today build fails - updated to use const
 reference in ASTSelectQuery.cpp & ASTSelectWithUnionQuery.cpp

---
 src/Parsers/ASTSelectQuery.cpp          | 2 +-
 src/Parsers/ASTSelectWithUnionQuery.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 3be3f6c5c9a..7537628b3a5 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -491,7 +491,7 @@ bool ASTSelectQuery::hasQueryParameters() const
         if (ast->as<ASTQueryParameter>())
             return true;
 
-        for (auto child : ast->children)
+        for (const auto & child : ast->children)
             queue.push(child);
     }
     return false;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index c38e4e2c747..76fe9582615 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -104,7 +104,7 @@ bool ASTSelectWithUnionQuery::hasQueryParameters() const
                 return true;
         }
 
-        for (auto child : current->children)
+        for (const auto & child : current->children)
             queue.push(child);
     }
     return false;

From bbc33a54b2e8b63870f08d91eba02f178aaeae6f Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 27 Sep 2022 16:30:59 +0200
Subject: [PATCH 010/262] Addressed review comments - 40907 Parameterized views
 as table functions Implementation * Fix for clang-today build fails - updated
 to use const in Context.cpp & const function in ActionsVisitior.cpp * Updated
 to use QueryParameterVisitor to check if query has query parameters * Updated
 executeTableFunction to check if table/table exists instead of try-catch
 approach * Fixed small review comments and style comments. Documentation: *
 Addressed review comments and added the LIVE view part which was removed by
 mistake in the previous commits.

---
 .../sql-reference/statements/create/view.md   |   6 +-
 src/Interpreters/ActionsVisitor.cpp           |  27 +----
 src/Interpreters/Context.cpp                  | 107 +++++++-----------
 src/Interpreters/ExpressionAnalyzer.cpp       |   5 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |   2 +-
 src/Interpreters/QueryNormalizer.cpp          |   2 +-
 src/Interpreters/QueryParameterVisitor.cpp    |  16 ++-
 src/Interpreters/QueryParameterVisitor.h      |   3 +
 src/Parsers/ASTSelectQuery.cpp                |  23 +---
 src/Parsers/ASTSelectWithUnionQuery.cpp       |  19 +---
 src/Parsers/ExpressionListParsers.h           |   2 +-
 src/Parsers/ParserSelectQuery.h               |   2 +-
 src/Parsers/ParserUnionQueryElement.h         |   3 +-
 src/Storages/StorageView.cpp                  |   8 +-
 src/Storages/StorageView.h                    |   2 +-
 15 files changed, 89 insertions(+), 138 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index a7b3f4ef762..23df3f72318 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -42,7 +42,7 @@ This is similar to normal view but can be created with parameter instead of lite
 ``` sql
 CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
 ```
-The above creates a view for table which can be used as table function by substituting value1 & value2 as show below.
+The above creates a view for table which can be used as table function by substituting parameters as show below.
 
 ``` sql
 SELECT * FROM view(column1=value1, column2=value2 ...)
@@ -86,7 +86,9 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop
 This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`.
 :::
 
-
+```sql
+CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ...
+```
 
 Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query.
 
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 0ebc6857779..148d8e4d30b 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -54,6 +54,7 @@
 #include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/UserDefinedExecutableFunctionFactory.h>
+#include <Interpreters/QueryParameterVisitor.h>
 
 
 namespace DB
@@ -742,29 +743,11 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
     if (const auto * node = index.tryGetNode(child_column_name))
         return NameAndTypePair(child_column_name, node->result_type);
 
-    if (!data.only_consts)
+    if (!data.only_consts && analyzeReceiveQueryParams(ast).empty())
     {
-        bool has_query_parameter = false;
-
-        std::queue<ASTPtr> astQueue;
-        astQueue.push(ast);
-
-        while (!astQueue.empty())
-        {
-            auto current = astQueue.front();
-            astQueue.pop();
-
-            if (auto * ast_query_parameter = current->as<ASTQueryParameter>())
-                has_query_parameter = true;
-
-            for (auto astChild : current->children)
-                astQueue.push(astChild);
-        }
-
-        if (!has_query_parameter)
-            throw Exception(
-                "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
-                ErrorCodes::UNKNOWN_IDENTIFIER);
+        throw Exception(
+            "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
+            ErrorCodes::UNKNOWN_IDENTIFIER);
     }
     return {};
 }
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index e3276f6006d..45a73e09909 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1127,82 +1127,55 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 {
-    auto hash = table_expression->getTreeHash();
-    String key = toString(hash.first) + '_' + toString(hash.second);
-
-    StoragePtr & res = table_function_results[key];
-
-    if (!res)
+    if (const auto * function = table_expression->as<ASTFunction>())
     {
-        try
+        if (TableFunctionFactory::instance().isTableFunctionName(function->name))
         {
-            TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-            if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+            auto hash = table_expression->getTreeHash();
+            String key = toString(hash.first) + '_' + toString(hash.second);
+            StoragePtr & res = table_function_results[key];
+            if (!res)
             {
-                const auto & insertion_table = getInsertionTable();
-                if (!insertion_table.empty())
+                TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
+                if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
                 {
-                    const auto & structure_hint
-                        = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                    table_function_ptr->setStructureHint(structure_hint);
+                    const auto & insertion_table = getInsertionTable();
+                    if (!insertion_table.empty())
+                    {
+                        const auto & structure_hint
+                            = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
+                        table_function_ptr->setStructureHint(structure_hint);
+                    }
+                }
+
+                res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+
+                /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
+                ///
+                ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
+                ///
+                auto new_hash = table_expression->getTreeHash();
+                if (hash != new_hash)
+                {
+                    key = toString(new_hash.first) + '_' + toString(new_hash.second);
+                    table_function_results[key] = res;
                 }
             }
-
-            res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
-
-            /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
-            ///
-            ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
-            ///
-            auto new_hash = table_expression->getTreeHash();
-            if (hash != new_hash)
-            {
-                key = toString(new_hash.first) + '_' + toString(new_hash.second);
-                table_function_results[key] = res;
-            }
-
             return res;
-        }catch (DB::Exception &table_function_exception)
-        {
-            if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION)
-            {
-                if (auto ast_function = table_expression->as<ASTFunction>())
-                {
-                    try
-                    {
-                        res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext());
-                        if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
-                            return res;
-                        else
-                        {
-                            throw Exception(
-                                ErrorCodes::BAD_ARGUMENTS,
-                                "Not a parameterized view {}",
-                                ast_function->name);
-                        }
-                    }
-                    catch (DB::Exception &view_exception)
-                    {
-                        if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE)
-                            throw Exception(
-                                ErrorCodes::UNKNOWN_FUNCTION,
-                                "Unknown table function {}  OR Unknown parameterized view {}",
-                                table_function_exception.message(),
-                                view_exception.message());
-                        else
-                            throw;
-                    }
-                }
-                else
-                    throw;
-            }
-            else
-                throw;
         }
-
+        else if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext()))
+        {
+            StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext());
+            if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
+                return res;
+            else
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name);
+            }
+        }
+        throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function or incorrect parameterized view:  `{}`", function->name);
     }
-
-    return res;
+    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query");
 }
 
 
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index d56cc47a34b..b49df1b1fe7 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1906,11 +1906,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
 
-                    if (!(query.allow_query_parameters && query.hasQueryParameters()))
+                    bool has_query_parameters = query.allow_query_parameters && query.hasQueryParameters();
+                    if (!has_query_parameters)
                     {
                         auto & column_elem
                             = before_where_sample.getByName(query.where()->getColumnName());
-                        /// If the filter column is a constant, record it.
+                        /// If the filter column is a constant and not a query parameter, record it.
                         if (column_elem.column)
                             where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
                     }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 88df37fca34..298dfd03bc6 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -502,7 +502,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (view)
         {
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);
-            view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues());
+            view->replaceQueryParametersIfParametrizedView(query_ptr, getSelectQuery().getQueryParameterValues());
         }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 280904c9aa9..ae07d5f5ad7 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -128,7 +128,7 @@ void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data &
     if (it_alias != data.aliases.end())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
 
-    data.aliases[node.name] =ast;
+    data.aliases[node.name] = ast;
 }
 
 
diff --git a/src/Interpreters/QueryParameterVisitor.cpp b/src/Interpreters/QueryParameterVisitor.cpp
index 0c0f74d402e..491c05ac3d2 100644
--- a/src/Interpreters/QueryParameterVisitor.cpp
+++ b/src/Interpreters/QueryParameterVisitor.cpp
@@ -17,11 +17,11 @@ public:
 
     void visit(const ASTPtr & ast)
     {
-        for (const auto & child : ast->children)
+        if (const auto & query_parameter = ast->as<ASTQueryParameter>())
+            visitQueryParameter(*query_parameter);
+        else
         {
-            if (const auto & query_parameter = child->as<ASTQueryParameter>())
-                visitQueryParameter(*query_parameter);
-            else
+            for (const auto & child : ast->children)
                 visit(child);
         }
     }
@@ -48,4 +48,12 @@ NameSet analyzeReceiveQueryParams(const std::string & query)
     return query_params;
 }
 
+NameSet analyzeReceiveQueryParams(const ASTPtr & ast)
+{
+    NameSet query_params;
+    QueryParameterVisitor(query_params).visit(ast);
+    return query_params;
+}
+
+
 }
diff --git a/src/Interpreters/QueryParameterVisitor.h b/src/Interpreters/QueryParameterVisitor.h
index 531de2ddafa..6d9d49e1ed2 100644
--- a/src/Interpreters/QueryParameterVisitor.h
+++ b/src/Interpreters/QueryParameterVisitor.h
@@ -2,6 +2,7 @@
 
 #include <string>
 #include <Core/Names.h>
+#include <Parsers/IAST_fwd.h>
 
 
 namespace DB
@@ -10,4 +11,6 @@ namespace DB
 /// Find parameters in a query and collect them into set.
 NameSet analyzeReceiveQueryParams(const std::string & query);
 
+NameSet analyzeReceiveQueryParams(const ASTPtr & ast);
+
 }
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 7537628b3a5..65fe8e30c44 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -8,8 +8,8 @@
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
 #include <Parsers/ASTLiteral.h>
-#include <Parsers/ASTQueryParameter.h>
 #include <Common/FieldVisitorToString.h>
+#include <Interpreters/QueryParameterVisitor.h>
 
 #include <queue>
 
@@ -480,19 +480,9 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
 
 bool ASTSelectQuery::hasQueryParameters() const
 {
-    std::queue<ASTPtr> queue;
-    queue.push(this->clone());
-
-    while (!queue.empty())
+    if (!analyzeReceiveQueryParams(this->where()).empty())
     {
-        auto ast = queue.front();
-        queue.pop();
-
-        if (ast->as<ASTQueryParameter>())
-            return true;
-
-        for (const auto & child : ast->children)
-            queue.push(child);
+        return true;
     }
     return false;
 }
@@ -507,15 +497,14 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const
     {
         auto ast = queue.front();
         queue.pop();
-        if (auto * expression_list = ast->as<ASTExpressionList>())
+        if (const auto * expression_list = ast->as<ASTExpressionList>())
         {
             if (expression_list->children.size() == 2)
             {
-                if (auto * identifier = expression_list->children[0]->as<ASTIdentifier>())
+                if (const auto * identifier = expression_list->children[0]->as<ASTIdentifier>())
                 {
-                    if (auto * literal = expression_list->children[1]->as<ASTLiteral>())
+                    if (const auto * literal = expression_list->children[1]->as<ASTLiteral>())
                     {
-
                         parameter_values[identifier->name()] = convertFieldToString(literal->value);
                     }
                 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 76fe9582615..1cd59a0b571 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -4,8 +4,8 @@
 #include <Parsers/SelectUnionMode.h>
 #include <IO/Operators.h>
 #include <Parsers/ASTSelectQuery.h>
+#include <Interpreters/QueryParameterVisitor.h>
 
-#include <queue>
 #include <iostream>
 
 namespace DB
@@ -90,22 +90,9 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const
 
 bool ASTSelectWithUnionQuery::hasQueryParameters() const
 {
-    std::queue<ASTPtr> queue;
-    queue.push(this->clone());
-
-    while (!queue.empty())
+    if (!analyzeReceiveQueryParams(this->list_of_selects).empty())
     {
-        auto current = queue.front();
-        queue.pop();
-
-        if (auto * select = current->as<ASTSelectQuery>())
-        {
-            if (select->hasQueryParameters())
-                return true;
-        }
-
-        for (const auto & child : current->children)
-            queue.push(child);
+        return true;
     }
     return false;
 }
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index b0f6e66c213..9b22b4b4b98 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -84,7 +84,7 @@ private:
 class ParserUnionList : public IParserBase
 {
 public:
-    ParserUnionList(bool allow_query_parameters_=false)
+    explicit ParserUnionList(bool allow_query_parameters_ = false)
         : allow_query_parameters(allow_query_parameters_)
     {
     }
diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h
index 708b051e046..ac79cc0637d 100644
--- a/src/Parsers/ParserSelectQuery.h
+++ b/src/Parsers/ParserSelectQuery.h
@@ -10,7 +10,7 @@ namespace DB
 class ParserSelectQuery : public IParserBase
 {
 public:
-    ParserSelectQuery(bool allow_query_parameters_=false)
+    explicit ParserSelectQuery(bool allow_query_parameters_ = false)
         : allow_query_parameters(allow_query_parameters_)
     {
     }
diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h
index a3fd47c496b..ca372052306 100644
--- a/src/Parsers/ParserUnionQueryElement.h
+++ b/src/Parsers/ParserUnionQueryElement.h
@@ -10,10 +10,11 @@ namespace DB
 class ParserUnionQueryElement : public IParserBase
 {
 public:
-    ParserUnionQueryElement(bool allow_query_parameters_=false)
+    explicit ParserUnionQueryElement(bool allow_query_parameters_ = false)
         : allow_query_parameters(allow_query_parameters_)
     {
     }
+
     bool allow_query_parameters;
 
 protected:
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 32d0a08777d..ec7c665e135 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -176,7 +176,7 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
     return select_element->table_expression->as<ASTTableExpression>();
 }
 
-void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values)
+void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const
 {
     if (is_parameterized_view)
     {
@@ -219,7 +219,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
     for (auto & child : table_expression->children)
         if (child.get() == view_name.get())
             child = view_query;
-        else if (child.get() && child->as<ASTFunction>() && child->as<ASTFunction>()->name == table_expression->table_function->as<ASTFunction>()->name)
+        else if (child.get()
+                 && child->as<ASTFunction>()
+                 && table_expression->table_function
+                 && table_expression->table_function->as<ASTFunction>()
+                 && child->as<ASTFunction>()->name == table_expression->table_function->as<ASTFunction>()->name)
             child = view_query;
 }
 
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 297847e83bf..1ed64c482e0 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -35,7 +35,7 @@ public:
         size_t max_block_size,
         unsigned num_streams) override;
 
-    void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values);
+    void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const;
 
     static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot)
     {

From a12d2fcf44c3ff5eff64527ed731f89b3e9ee308 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 27 Sep 2022 22:13:20 +0200
Subject: [PATCH 011/262] Updated ASTSelectQuery to fix FastTest fails for
 40907 Parameterized views as table functions

---
 src/Parsers/ASTSelectQuery.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 65fe8e30c44..f3bb094c41e 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -480,7 +480,7 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
 
 bool ASTSelectQuery::hasQueryParameters() const
 {
-    if (!analyzeReceiveQueryParams(this->where()).empty())
+    if (!analyzeReceiveQueryParams(this->clone()).empty())
     {
         return true;
     }

From 618f63d6c7ec4d92a742bd974c9a611a2fccce2f Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 28 Sep 2022 10:00:12 +0200
Subject: [PATCH 012/262] Updated executeTableFunctions in Context.cpp to check
 for table/view & fallback to function to fix test fails - 40907 Parameterized
 views as table functions

---
 src/Interpreters/Context.cpp | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 45a73e09909..136d2b1283f 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1129,7 +1129,17 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 {
     if (const auto * function = table_expression->as<ASTFunction>())
     {
-        if (TableFunctionFactory::instance().isTableFunctionName(function->name))
+        if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext()))
+        {
+            StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext());
+            if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
+                return res;
+            else
+            {
+                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name);
+            }
+        }
+        else
         {
             auto hash = table_expression->getTreeHash();
             String key = toString(hash.first) + '_' + toString(hash.second);
@@ -1163,17 +1173,6 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
             }
             return res;
         }
-        else if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext()))
-        {
-            StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext());
-            if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
-                return res;
-            else
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name);
-            }
-        }
-        throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function or incorrect parameterized view:  `{}`", function->name);
     }
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query");
 }

From e6672832b9289c8fbe2a8133e5e84142002c159f Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 28 Sep 2022 10:17:04 +0200
Subject: [PATCH 013/262] Removed unused errorcodes in Context.cpp - 40907
 Parameterized views as table functions

---
 src/Interpreters/Context.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 136d2b1283f..957e9a2bce1 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -139,7 +139,6 @@ namespace ErrorCodes
     extern const int INVALID_SETTING_VALUE;
     extern const int UNKNOWN_READ_METHOD;
     extern const int NOT_IMPLEMENTED;
-    extern const int UNKNOWN_FUNCTION;
 }
 
 

From f78f846503f70cd7f81538c3e48baa16aa1a55ce Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 28 Sep 2022 13:23:11 +0200
Subject: [PATCH 014/262] Fixed issues executeTableFunctions in Context.cpp to
 fall back on TableFunction if not parameterized view & updated test - 40907
 Parameterized views as table functions

---
 src/Interpreters/Context.cpp                  | 66 ++++++++++---------
 .../0_stateless/02428_parameterized_view.sql  |  2 +-
 2 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 957e9a2bce1..2c6f27070e3 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1133,45 +1133,47 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
             StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext());
             if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
                 return res;
-            else
-            {
-                throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name);
-            }
         }
-        else
+        auto hash = table_expression->getTreeHash();
+        String key = toString(hash.first) + '_' + toString(hash.second);
+        StoragePtr & res = table_function_results[key];
+        if (!res)
         {
-            auto hash = table_expression->getTreeHash();
-            String key = toString(hash.first) + '_' + toString(hash.second);
-            StoragePtr & res = table_function_results[key];
-            if (!res)
+            TableFunctionPtr table_function_ptr;
+            try
             {
-                TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-                if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+                table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
+            }
+            catch (Exception & e)
+            {
+                e.addMessage(" or incorrect parameterized view");
+                throw;
+            }
+            if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+            {
+                const auto & insertion_table = getInsertionTable();
+                if (!insertion_table.empty())
                 {
-                    const auto & insertion_table = getInsertionTable();
-                    if (!insertion_table.empty())
-                    {
-                        const auto & structure_hint
-                            = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                        table_function_ptr->setStructureHint(structure_hint);
-                    }
-                }
-
-                res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
-
-                /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
-                ///
-                ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
-                ///
-                auto new_hash = table_expression->getTreeHash();
-                if (hash != new_hash)
-                {
-                    key = toString(new_hash.first) + '_' + toString(new_hash.second);
-                    table_function_results[key] = res;
+                    const auto & structure_hint
+                        = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
+                    table_function_ptr->setStructureHint(structure_hint);
                 }
             }
-            return res;
+
+            res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+
+            /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
+            ///
+            ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
+            ///
+            auto new_hash = table_expression->getTreeHash();
+            if (hash != new_hash)
+            {
+                key = toString(new_hash.first) + '_' + toString(new_hash.second);
+                table_function_results[key] = res;
+            }
         }
+        return res;
     }
     throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query");
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 0153ed95428..ff451f91f5b 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -13,7 +13,7 @@ SELECT Price FROM v1(price=20);
 SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION }
 
 CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10;
-SELECT Price FROM v10(price=10);  -- { serverError BAD_ARGUMENTS }
+SELECT Price FROM v10(price=10);  -- { serverError UNKNOWN_FUNCTION }
 
 
 CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};

From d94c3438ad1a0938d2604b9b71b999a9891de273 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 29 Sep 2022 09:17:11 +0200
Subject: [PATCH 015/262] Fixing build issues by including
 QueryParameterVisitor - 40907 Parameterized views as table functions

---
 src/Parsers/ASTSelectQuery.h          | 1 +
 src/Parsers/ASTSelectWithUnionQuery.h | 1 +
 2 files changed, 2 insertions(+)

diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 8ece05808f7..19e3d2b814a 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,6 +2,7 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
+#include <Interpreters/QueryParameterVisitor.h>
 
 namespace DB
 {
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 8d93760426a..64cf5287211 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,6 +2,7 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
+#include <Interpreters/QueryParameterVisitor.h>
 
 namespace DB
 {

From e5c0c6a1b6cd24f231971a097f46f1496b74beeb Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 3 Oct 2022 14:27:38 +0200
Subject: [PATCH 016/262] Addressed review comments - 40907 Parameterized views
 as table functions Implementation * Updated handling of has_query_parameters
 to avoid recalculation. * Fixed style comments * Updated formatImpl of
 ASTTableExpression to prioritise table_function before sub_query. Testing: *
 Added test for ATTACH, DETACH, INSERT INTO view and EXPLAIN SYNTAX to
 tests/queries/0_stateless/02428_parameterized_view.sql

---
 src/Parsers/ASTSelectQuery.cpp                | 13 ++++----
 src/Parsers/ASTSelectQuery.h                  |  5 ++-
 src/Parsers/ASTSelectWithUnionQuery.cpp       | 32 ++++++++++++++++---
 src/Parsers/ASTSelectWithUnionQuery.h         |  6 ++--
 src/Parsers/ASTTablesInSelectQuery.cpp        | 10 +++---
 src/Parsers/ExpressionListParsers.cpp         |  1 +
 src/Parsers/ParserSelectQuery.h               |  1 +
 src/Parsers/ParserSelectWithUnionQuery.cpp    |  1 +
 src/Storages/StorageView.cpp                  |  4 +++
 .../02428_parameterized_view.reference        | 10 ++++++
 .../0_stateless/02428_parameterized_view.sql  |  9 +++++-
 11 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index f3bb094c41e..aa4ff96e050 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -478,13 +478,14 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
     tables_element.table_expression->as<ASTTableExpression &>().final = true;
 }
 
-bool ASTSelectQuery::hasQueryParameters() const
+
+void ASTSelectQuery::setHasQueryParameters()
 {
-    if (!analyzeReceiveQueryParams(this->clone()).empty())
-    {
-        return true;
-    }
-    return false;
+    if (!this->where())
+        return;
+
+    if (!analyzeReceiveQueryParams(this->where()).empty())
+        has_query_parameters = true;
 }
 
 NameToNameMap ASTSelectQuery::getQueryParameterValues() const
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 19e3d2b814a..b3f29009df6 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -143,7 +143,10 @@ public:
     void setFinal();
 
     QueryKind getQueryKind() const override { return QueryKind::Select; }
-    bool hasQueryParameters() const;
+
+    bool has_query_parameters = false;
+    bool hasQueryParameters() const { return has_query_parameters; }
+    void setHasQueryParameters();
     NameToNameMap getQueryParameterValues() const;
 
 protected:
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 1cd59a0b571..50e929a6f46 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -4,7 +4,6 @@
 #include <Parsers/SelectUnionMode.h>
 #include <IO/Operators.h>
 #include <Parsers/ASTSelectQuery.h>
-#include <Interpreters/QueryParameterVisitor.h>
 
 #include <iostream>
 
@@ -88,13 +87,36 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const
         || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT);
 }
 
-bool ASTSelectWithUnionQuery::hasQueryParameters() const
+
+void ASTSelectWithUnionQuery::setHasQueryParameters()
 {
-    if (!analyzeReceiveQueryParams(this->list_of_selects).empty())
+    if (!list_of_selects)
+        return;
+
+    for (const auto & child : list_of_selects->children)
     {
-        return true;
+        if (auto * select_node = child->as<ASTSelectQuery>())
+        {
+            select_node->setHasQueryParameters();
+            if (select_node->hasQueryParameters())
+            {
+                has_query_parameters = true;
+                break;
+            }
+        }
+    }
+}
+
+void ASTSelectWithUnionQuery::clearAllowQueryParameters()
+{
+    if (!list_of_selects)
+        return;
+
+    for (const auto & child : list_of_selects->children)
+    {
+        if (auto * select_node = child->as<ASTSelectQuery>())
+            select_node->allow_query_parameters = false;
     }
-    return false;
 }
 
 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 64cf5287211..6562bdc4c3d 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -2,7 +2,6 @@
 
 #include <Parsers/ASTQueryWithOutput.h>
 #include <Parsers/SelectUnionMode.h>
-#include <Interpreters/QueryParameterVisitor.h>
 
 namespace DB
 {
@@ -33,8 +32,11 @@ public:
     /// Consider any mode other than ALL as non-default.
     bool hasNonDefaultUnionMode() const;
 
-    bool hasQueryParameters() const;
+    bool has_query_parameters = false;
+    bool hasQueryParameters() const { return has_query_parameters; }
+    void setHasQueryParameters();
 
+    void clearAllowQueryParameters();
 
 };
 
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 3b7a3a342e6..3f687f76c86 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -112,16 +112,16 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState
         settings.ostr << " ";
         database_and_table_name->formatImpl(settings, state, frame);
     }
-    else if (table_function)
-    {
-        settings.ostr << " ";
-        table_function->formatImpl(settings, state, frame);
-    }
     else if (subquery)
     {
         settings.ostr << settings.nl_or_ws << indent_str;
         subquery->formatImpl(settings, state, frame);
     }
+    else if (table_function)
+    {
+        settings.ostr << " ";
+        table_function->formatImpl(settings, state, frame);
+    }
 
     if (final)
     {
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 59b5b8b98cb..a8f0f0d0d58 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -280,6 +280,7 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type)
     select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
     select_with_union_query->list_of_selects->children.push_back(std::move(select_query));
     select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
+    select_with_union_query->setHasQueryParameters();
 
     auto new_subquery = std::make_shared<ASTSubquery>();
     new_subquery->children.push_back(select_with_union_query);
diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h
index ac79cc0637d..ea9f71f36e0 100644
--- a/src/Parsers/ParserSelectQuery.h
+++ b/src/Parsers/ParserSelectQuery.h
@@ -14,6 +14,7 @@ public:
         : allow_query_parameters(allow_query_parameters_)
     {
     }
+
     bool allow_query_parameters;
 
 protected:
diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp
index 39204ee457d..49f631a2881 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.cpp
+++ b/src/Parsers/ParserSelectWithUnionQuery.cpp
@@ -36,6 +36,7 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
     select_with_union_query->list_of_selects = list_node;
     select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
     select_with_union_query->list_of_modes = parser.getUnionModes();
+    select_with_union_query->setHasQueryParameters();
 
     return true;
 }
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index ec7c665e135..2a82bf327e7 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -100,6 +100,10 @@ StorageView::StorageView(
         throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
     SelectQueryDescription description;
 
+    //When storing the select_query clear allow_query_parameters from the select, so that when this view is used in select,
+    //the query parameters are expected to be substituted
+    query.select->clearAllowQueryParameters();
+
     description.inner_query = query.select->ptr();
     is_parameterized_view = query.isParameterizedView();
     storage_metadata.setSelectQuery(description);
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index d9afe5ff69c..bf21cdb6308 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -1,3 +1,13 @@
 20
+SELECT
+    Name,
+    Price,
+    Quantity
+FROM
+(
+    SELECT *
+    FROM default.Catalog
+    WHERE Price = _CAST(10, \'UInt64\')
+) AS v1
 50
 10
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index ff451f91f5b..fe7ec419b1a 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -10,11 +10,18 @@ INSERT INTO Catalog VALUES ('Paper', 20, 1);
 CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
 SELECT Price FROM v1(price=20);
 
+DETACH TABLE v1;
+ATTACH TABLE v1;
+
+EXPLAIN SYNTAX SELECT * from v1(price=10);
+
+INSERT INTO v1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED}
+
 SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION }
 
 CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10;
-SELECT Price FROM v10(price=10);  -- { serverError UNKNOWN_FUNCTION }
 
+SELECT Price FROM v10(price=10);  -- { serverError UNKNOWN_FUNCTION }
 
 CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
 SELECT Price FROM v2(price=50,quantity=2);

From 7033a56ff2bc7b9bc19b40066ffc2bba8a7a67ea Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 3 Oct 2022 16:42:44 +0200
Subject: [PATCH 017/262] Moved QueryParameterVisitor to Parsers & EXPLAIN
 SYNTAX test fix - 40907 Parameterized views as table functions

---
 src/Interpreters/ActionsVisitor.cpp                    |  2 +-
 src/Interpreters/ExpressionAnalyzer.cpp                |  6 ++++++
 src/Parsers/ASTSelectQuery.cpp                         |  5 ++++-
 src/Parsers/ASTSelectQuery.h                           |  1 -
 src/Parsers/ASTSelectWithUnionQuery.cpp                |  2 ++
 src/Parsers/ASTTablesInSelectQuery.cpp                 | 10 ++++++++++
 .../QueryParameterVisitor.cpp                          |  2 +-
 src/{Interpreters => Parsers}/QueryParameterVisitor.h  |  0
 src/Server/HTTPHandler.cpp                             |  2 +-
 9 files changed, 25 insertions(+), 5 deletions(-)
 rename src/{Interpreters => Parsers}/QueryParameterVisitor.cpp (96%)
 rename src/{Interpreters => Parsers}/QueryParameterVisitor.h (100%)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 148d8e4d30b..c694f6007fc 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -54,7 +54,7 @@
 #include <Interpreters/DatabaseAndTableWithAlias.h>
 #include <Interpreters/IdentifierSemantic.h>
 #include <Interpreters/UserDefinedExecutableFunctionFactory.h>
-#include <Interpreters/QueryParameterVisitor.h>
+#include <Parsers/QueryParameterVisitor.h>
 
 
 namespace DB
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index b49df1b1fe7..4efb08c414f 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1286,6 +1286,8 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     getRootActions(select_query->where(), only_types, step.actions());
 
+    //For creating parameterized view, query parameters are allowed in select
+    //As select will be stored without substituting query parameters, we don't want to evaluate the where expression
     if (select_query->allow_query_parameters && select_query->hasQueryParameters())
         return true;
 
@@ -1906,6 +1908,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
 
+                    //For creating parameterized view, query parameters are allowed in select
+                    //As select will be stored without substituting query parameters, we don't want to evaluate the where expression
                     bool has_query_parameters = query.allow_query_parameters && query.hasQueryParameters();
                     if (!has_query_parameters)
                     {
@@ -2075,6 +2079,8 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & having_step_num,
     const ASTSelectQuery & query)
 {
+    //For creating parameterized view, query parameters are allowed in select
+    //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
     if (query.allow_query_parameters && query.hasQueryParameters())
         return;
 
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index aa4ff96e050..c2ca04eaa13 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -9,7 +9,7 @@
 #include <IO/Operators.h>
 #include <Parsers/ASTLiteral.h>
 #include <Common/FieldVisitorToString.h>
-#include <Interpreters/QueryParameterVisitor.h>
+#include <Parsers/QueryParameterVisitor.h>
 
 #include <queue>
 
@@ -41,6 +41,9 @@ ASTPtr ASTSelectQuery::clone() const
     for (const auto & child : children)
         res->children.push_back(child->clone());
 
+    res->allow_query_parameters = allow_query_parameters;
+    res->has_query_parameters = has_query_parameters;
+
     return res;
 }
 
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index b3f29009df6..dae718aa040 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -2,7 +2,6 @@
 
 #include <Parsers/IAST.h>
 #include <Core/Names.h>
-#include <Interpreters/QueryParameterVisitor.h>
 
 namespace DB
 {
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 50e929a6f46..792fffe5f12 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -23,6 +23,8 @@ ASTPtr ASTSelectWithUnionQuery::clone() const
     res->list_of_modes = list_of_modes;
     res->set_of_modes = set_of_modes;
 
+    res->has_query_parameters = has_query_parameters;
+
     cloneOutputOptions(*res);
     return res;
 }
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 3f687f76c86..85db26efcd2 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -3,6 +3,7 @@
 #include <Parsers/ASTExpressionList.h>
 #include <Common/SipHash.h>
 #include <IO/Operators.h>
+#include <Parsers/ASTFunction.h>
 
 
 namespace DB
@@ -112,6 +113,15 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState
         settings.ostr << " ";
         database_and_table_name->formatImpl(settings, state, frame);
     }
+    //In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX
+    else if (table_function && table_function->as<ASTFunction>() && table_function->as<ASTFunction>()->name=="view")
+    {
+        settings.ostr << " ";
+        table_function->formatImpl(settings, state, frame);
+
+    }
+    //For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX
+    //we cannot remove the table function part, as its needed for query substitution
     else if (subquery)
     {
         settings.ostr << settings.nl_or_ws << indent_str;
diff --git a/src/Interpreters/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp
similarity index 96%
rename from src/Interpreters/QueryParameterVisitor.cpp
rename to src/Parsers/QueryParameterVisitor.cpp
index 491c05ac3d2..14750845034 100644
--- a/src/Interpreters/QueryParameterVisitor.cpp
+++ b/src/Parsers/QueryParameterVisitor.cpp
@@ -1,4 +1,4 @@
-#include <Interpreters/QueryParameterVisitor.h>
+#include <Parsers/QueryParameterVisitor.h>
 #include <Parsers/ASTQueryParameter.h>
 #include <Parsers/ParserQuery.h>
 #include <Parsers/parseQuery.h>
diff --git a/src/Interpreters/QueryParameterVisitor.h b/src/Parsers/QueryParameterVisitor.h
similarity index 100%
rename from src/Interpreters/QueryParameterVisitor.h
rename to src/Parsers/QueryParameterVisitor.h
diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp
index 8886a77c9b5..45d4bd824f2 100644
--- a/src/Server/HTTPHandler.cpp
+++ b/src/Server/HTTPHandler.cpp
@@ -15,7 +15,7 @@
 #include <IO/WriteHelpers.h>
 #include <IO/copyData.h>
 #include <Interpreters/Context.h>
-#include <Interpreters/QueryParameterVisitor.h>
+#include <Parsers/QueryParameterVisitor.h>
 #include <Interpreters/executeQuery.h>
 #include <Interpreters/Session.h>
 #include <Server/HTTPHandlerFactory.h>

From a0b1085f3be248800ccb3a850de3d27161f6949b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 4 Oct 2022 17:32:48 +0200
Subject: [PATCH 018/262] Fixed issues with using database name in SELECT and
 added tests - 40907 Parameterized views as table functions

---
 src/Interpreters/Context.cpp                  | 22 ++++++++++++++++---
 src/Parsers/ASTFunction.h                     |  3 +++
 src/Parsers/ASTSelectWithUnionQuery.h         |  3 ++-
 src/Parsers/ExpressionElementParsers.cpp      | 10 +++++++--
 .../02428_parameterized_view.reference        |  2 ++
 .../0_stateless/02428_parameterized_view.sql  | 14 ++++++++++++
 6 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 2c6f27070e3..9b371b7477d 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -96,6 +96,7 @@
 #include <re2/re2.h>
 #include <Storages/StorageView.h>
 #include <Parsers/ASTFunction.h>
+#include <base/find_symbols.h>
 
 #if USE_ROCKSDB
 #include <rocksdb/table.h>
@@ -1126,11 +1127,26 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 {
-    if (const auto * function = table_expression->as<ASTFunction>())
+    if (auto * function = table_expression->as<ASTFunction>())
     {
-        if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext()))
+        String database_name = getCurrentDatabase();
+        String table_name = function->name;
+
+        if (function->has_database_name)
         {
-            StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext());
+            std::vector<std::string> parts;
+            splitInto<'.'>(parts, function->name);
+
+            if (parts.size() == 2)
+            {
+                database_name = parts[0];
+                table_name = parts[1];
+            }
+        }
+
+        if (DatabaseCatalog::instance().isTableExist({database_name, table_name}, getQueryContext()))
+        {
+            StoragePtr res = DatabaseCatalog::instance().getTable({database_name, table_name}, getQueryContext());
             if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
                 return res;
         }
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index 6d5089f802e..3da7b4f9862 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -53,6 +53,9 @@ public:
 
     std::string getWindowDescription() const;
 
+    //This is used for parameterized view, to identify if name is 'db.view'
+    bool has_database_name = false;
+
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
     void appendColumnNameImpl(WriteBuffer & ostr) const override;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 6562bdc4c3d..d2a2dff2c7b 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -36,7 +36,8 @@ public:
     bool hasQueryParameters() const { return has_query_parameters; }
     void setHasQueryParameters();
 
-    void clearAllowQueryParameters();
+    //clang-tidy wants it to be const, but it changes flags of children
+    void clearAllowQueryParameters();// NOLINT
 
 };
 
diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp
index 1de9adb834e..8924e64e18e 100644
--- a/src/Parsers/ExpressionElementParsers.cpp
+++ b/src/Parsers/ExpressionElementParsers.cpp
@@ -822,7 +822,7 @@ namespace
 
 bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    ParserIdentifier id_parser;
+    ParserCompoundIdentifier compound_id_parser;
 
     bool has_all = false;
     bool has_distinct = false;
@@ -838,7 +838,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
             return true;
     }
 
-    if (!id_parser.parse(pos, identifier, expected))
+    if (!compound_id_parser.parse(pos, identifier, expected))
         return false;
 
     if (pos->type != TokenType::OpeningRoundBracket)
@@ -1034,6 +1034,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
         function_node->children.push_back(function_node->parameters);
     }
 
+    if (const auto *compound_identifier  = identifier->as<ASTIdentifier>())
+    {
+        if (!compound_identifier->isShort())
+            function_node->has_database_name = true;
+    }
+
     ParserKeyword filter("FILTER");
     ParserKeyword over("OVER");
 
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index bf21cdb6308..8fc3b86ddc1 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -1,4 +1,5 @@
 20
+20
 SELECT
     Name,
     Price,
@@ -11,3 +12,4 @@ FROM
 ) AS v1
 50
 10
+20
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index fe7ec419b1a..dffe75c4c4e 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -9,6 +9,7 @@ INSERT INTO Catalog VALUES ('Paper', 20, 1);
 
 CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
 SELECT Price FROM v1(price=20);
+SELECT Price FROM `v1`(price=20);
 
 DETACH TABLE v1;
 ATTACH TABLE v1;
@@ -37,3 +38,16 @@ DROP TABLE v1;
 DROP TABLE v2;
 DROP TABLE v3;
 DROP TABLE Catalog;
+
+CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+
+INSERT INTO system.Catalog VALUES ('Pen', 10, 3);
+INSERT INTO system.Catalog VALUES ('Book', 50, 2);
+INSERT INTO system.Catalog VALUES ('Paper', 20, 1);
+
+CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64};
+SELECT Price FROM system.v1(price=20);
+SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION }
+
+DROP TABLE system.v1;
+DROP TABLE system.Catalog;
\ No newline at end of file

From 4f0f214e8424f1bb8028843a686477a0cea3f603 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 5 Oct 2022 13:55:30 +0200
Subject: [PATCH 019/262] Fixed test to check for tables and drop after test -
 40907 Parameterized views as table functions

---
 .../0_stateless/02428_parameterized_view.sql  | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index dffe75c4c4e..9dfa65a5cbd 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -1,5 +1,9 @@
-DROP TABLE IF EXISTS v1;
 DROP TABLE IF EXISTS Catalog;
+DROP TABLE IF EXISTS v1;
+DROP TABLE IF EXISTS v2;
+DROP TABLE IF EXISTS v3;
+DROP TABLE IF EXISTS system.Catalog;
+DROP TABLE IF EXISTS system.v1;
 
 CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 
@@ -34,11 +38,6 @@ SELECT Price FROM v3(price=10);
 
 CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS}
 
-DROP TABLE v1;
-DROP TABLE v2;
-DROP TABLE v3;
-DROP TABLE Catalog;
-
 CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 
 INSERT INTO system.Catalog VALUES ('Pen', 10, 3);
@@ -49,5 +48,9 @@ CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}
 SELECT Price FROM system.v1(price=20);
 SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION }
 
-DROP TABLE system.v1;
-DROP TABLE system.Catalog;
\ No newline at end of file
+DROP TABLE Catalog;
+DROP TABLE v1;
+DROP TABLE v2;
+DROP TABLE v3;
+DROP TABLE system.Catalog;
+DROP TABLE system.v1;
\ No newline at end of file

From e256b32fd583f1406a70f4d43bfa9bdc323a5180 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 6 Oct 2022 08:55:15 +0200
Subject: [PATCH 020/262] Fixed clang-tidy non-const function issue - 40907
 Parameterized views as table functions

---
 src/Parsers/ASTSelectWithUnionQuery.cpp | 2 +-
 src/Parsers/ASTSelectWithUnionQuery.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 99c5bc5f933..739b50fc3a1 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -115,7 +115,7 @@ void ASTSelectWithUnionQuery::setHasQueryParameters()
     }
 }
 
-void ASTSelectWithUnionQuery::clearAllowQueryParameters()
+void ASTSelectWithUnionQuery::clearAllowQueryParameters() // NOLINT
 {
     if (!list_of_selects)
         return;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index d2a2dff2c7b..b1c174fb3a6 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -37,7 +37,7 @@ public:
     void setHasQueryParameters();
 
     //clang-tidy wants it to be const, but it changes flags of children
-    void clearAllowQueryParameters();// NOLINT
+    void clearAllowQueryParameters(); // NOLINT
 
 };
 

From e990c8fdb46398260a762ca68c607451e6b4effc Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 6 Oct 2022 13:22:06 +0200
Subject: [PATCH 021/262] Updated order of deletion of tables/views in test -
 40907 Parameterized views as table functions

---
 tests/queries/0_stateless/02428_parameterized_view.sql | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 9dfa65a5cbd..01b76ffc54d 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -48,9 +48,10 @@ CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}
 SELECT Price FROM system.v1(price=20);
 SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION }
 
-DROP TABLE Catalog;
+
 DROP TABLE v1;
 DROP TABLE v2;
 DROP TABLE v3;
-DROP TABLE system.Catalog;
-DROP TABLE system.v1;
\ No newline at end of file
+DROP TABLE Catalog;
+DROP TABLE system.v1;
+DROP TABLE system.Catalog;
\ No newline at end of file

From 8aab336176f9b73f9ff4595c7c56fc8edf802072 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 6 Oct 2022 14:56:54 +0200
Subject: [PATCH 022/262] Updated order of deletion of tables/views in the
 beginning of test - 40907 Parameterized views as table functions

---
 tests/queries/0_stateless/02428_parameterized_view.sql | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 01b76ffc54d..fa689937234 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -1,9 +1,9 @@
-DROP TABLE IF EXISTS Catalog;
 DROP TABLE IF EXISTS v1;
 DROP TABLE IF EXISTS v2;
 DROP TABLE IF EXISTS v3;
-DROP TABLE IF EXISTS system.Catalog;
+DROP TABLE IF EXISTS Catalog;
 DROP TABLE IF EXISTS system.v1;
+DROP TABLE IF EXISTS system.Catalog;
 
 CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 

From 614fd4cf42ca77dc0329639cc4003e1e2ea2f242 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 17 Oct 2022 18:11:22 +0200
Subject: [PATCH 023/262] Added is_parameterized_view to Context and removed
 flag from ASTs, updated to use tryGetTable, addressed review comments - 40907
 Parameterized views as table functions

---
 .../sql-reference/statements/create/view.md   |   2 +-
 src/Interpreters/ActionsVisitor.cpp           |   2 +-
 src/Interpreters/Context.cpp                  | 117 +++++++++---------
 src/Interpreters/Context.h                    |   5 +
 src/Interpreters/ExpressionAnalyzer.cpp       |  11 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |   1 +
 src/Interpreters/QueryNormalizer.cpp          |  14 +--
 src/Interpreters/QueryNormalizer.h            |   8 +-
 src/Interpreters/TreeRewriter.cpp             |   2 +-
 src/Parsers/ASTCreateQuery.h                  |   1 -
 src/Parsers/ASTSelectQuery.cpp                |   1 -
 src/Parsers/ASTSelectQuery.h                  |   1 -
 src/Parsers/ASTSelectWithUnionQuery.cpp       |  12 --
 src/Parsers/ASTSelectWithUnionQuery.h         |   3 -
 src/Parsers/ASTTablesInSelectQuery.cpp        |   7 +-
 src/Parsers/ExpressionListParsers.cpp         |   6 +-
 src/Parsers/ExpressionListParsers.h           |   6 -
 src/Parsers/ParserCreateQuery.cpp             |   3 -
 src/Parsers/ParserSelectQuery.cpp             |   1 -
 src/Parsers/ParserSelectQuery.h               |   8 --
 src/Parsers/ParserSelectWithUnionQuery.cpp    |   2 +-
 src/Parsers/ParserSelectWithUnionQuery.h      |   3 -
 src/Parsers/ParserUnionQueryElement.cpp       |   2 +-
 src/Parsers/ParserUnionQueryElement.h         |   8 --
 src/Storages/StorageView.cpp                  |   4 -
 25 files changed, 90 insertions(+), 140 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index 70ac5629004..c4cf9da7022 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -42,7 +42,7 @@ This is similar to normal view but can be created with parameter instead of lite
 ``` sql
 CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
 ```
-The above creates a view for table which can be used as table function by substituting parameters as show below.
+The above creates a view for table which can be used as table function by substituting parameters as shown below.
 
 ``` sql
 SELECT * FROM view(column1=value1, column2=value2 ...)
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index c694f6007fc..5abf0e61c00 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -743,7 +743,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
     if (const auto * node = index.tryGetNode(child_column_name))
         return NameAndTypePair(child_column_name, node->result_type);
 
-    if (!data.only_consts && analyzeReceiveQueryParams(ast).empty())
+    if (!data.only_consts && data.getContext()->isParameterizedView() && analyzeReceiveQueryParams(ast).empty())
     {
         throw Exception(
             "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 679b3c155dc..9d466d45ef2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1229,71 +1229,68 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 {
-    if (auto * function = table_expression->as<ASTFunction>())
+    const ASTFunction * function = assert_cast<const ASTFunction *>(table_expression.get());
+    String database_name = getCurrentDatabase();
+    String table_name = function->name;
+
+    if (function->has_database_name)
     {
-        String database_name = getCurrentDatabase();
-        String table_name = function->name;
+        std::vector<std::string> parts;
+        splitInto<'.'>(parts, function->name);
 
-        if (function->has_database_name)
+        if (parts.size() == 2)
         {
-            std::vector<std::string> parts;
-            splitInto<'.'>(parts, function->name);
-
-            if (parts.size() == 2)
-            {
-                database_name = parts[0];
-                table_name = parts[1];
-            }
+            database_name = parts[0];
+            table_name = parts[1];
         }
-
-        if (DatabaseCatalog::instance().isTableExist({database_name, table_name}, getQueryContext()))
-        {
-            StoragePtr res = DatabaseCatalog::instance().getTable({database_name, table_name}, getQueryContext());
-            if (res.get()->isView() && res->as<StorageView>()->isParameterizedView())
-                return res;
-        }
-        auto hash = table_expression->getTreeHash();
-        String key = toString(hash.first) + '_' + toString(hash.second);
-        StoragePtr & res = table_function_results[key];
-        if (!res)
-        {
-            TableFunctionPtr table_function_ptr;
-            try
-            {
-                table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
-            }
-            catch (Exception & e)
-            {
-                e.addMessage(" or incorrect parameterized view");
-                throw;
-            }
-            if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
-            {
-                const auto & insertion_table = getInsertionTable();
-                if (!insertion_table.empty())
-                {
-                    const auto & structure_hint
-                        = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
-                    table_function_ptr->setStructureHint(structure_hint);
-                }
-            }
-
-            res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
-
-            /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
-            ///
-            ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
-            ///
-            auto new_hash = table_expression->getTreeHash();
-            if (hash != new_hash)
-            {
-                key = toString(new_hash.first) + '_' + toString(new_hash.second);
-                table_function_results[key] = res;
-            }
-        }
-        return res;
     }
-    throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query");
+
+    StoragePtr table = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, getQueryContext());
+    if (table)
+    {
+        if (table.get()->isView() && table->as<StorageView>()->isParameterizedView())
+            return table;
+    }
+    auto hash = table_expression->getTreeHash();
+    String key = toString(hash.first) + '_' + toString(hash.second);
+    StoragePtr & res = table_function_results[key];
+    if (!res)
+    {
+        TableFunctionPtr table_function_ptr;
+        try
+        {
+            table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this());
+        }
+        catch (Exception & e)
+        {
+            e.addMessage(" or incorrect parameterized view");
+            throw;
+        }
+        if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
+        {
+            const auto & insertion_table = getInsertionTable();
+            if (!insertion_table.empty())
+            {
+                const auto & structure_hint
+                    = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns;
+                table_function_ptr->setStructureHint(structure_hint);
+            }
+        }
+
+        res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName());
+
+        /// Since ITableFunction::parseArguments() may change table_expression, i.e.:
+        ///
+        ///     remote('127.1', system.one) -> remote('127.1', 'system.one'),
+        ///
+        auto new_hash = table_expression->getTreeHash();
+        if (hash != new_hash)
+        {
+            key = toString(new_hash.first) + '_' + toString(new_hash.second);
+            table_function_results[key] = res;
+        }
+    }
+    return res;
 }
 
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 635c571b173..c39c6fb8ee3 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -366,6 +366,8 @@ private:
 
     /// Temporary data for query execution accounting.
     TemporaryDataOnDiskScopePtr temp_data_on_disk;
+
+    bool is_parameterized_view = false;
 public:
     /// Some counters for current query execution.
     /// Most of them are workarounds and should be removed in the future.
@@ -940,6 +942,9 @@ public:
     bool applyDeletedMask() const { return apply_deleted_mask; }
     void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
 
+    bool isParameterizedView() const { return is_parameterized_view; }
+    void setIsParameterizedView(bool is_parameterized_view_) { is_parameterized_view = is_parameterized_view_; }
+
     ActionLocksManagerPtr getActionLocksManager() const;
 
     enum class ApplicationType
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 4efb08c414f..ebfde738e5a 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1286,9 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     getRootActions(select_query->where(), only_types, step.actions());
 
-    //For creating parameterized view, query parameters are allowed in select
-    //As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-    if (select_query->allow_query_parameters && select_query->hasQueryParameters())
+    /// For creating parameterized view, query parameters are allowed in select
+    /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
+    if (this->getContext()->isParameterizedView())
         return true;
 
     auto where_column_name = select_query->where()->getColumnName();
@@ -1910,8 +1910,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
                     //For creating parameterized view, query parameters are allowed in select
                     //As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-                    bool has_query_parameters = query.allow_query_parameters && query.hasQueryParameters();
-                    if (!has_query_parameters)
+                    if (!context->isParameterizedView())
                     {
                         auto & column_elem
                             = before_where_sample.getByName(query.where()->getColumnName());
@@ -2081,7 +2080,7 @@ void ExpressionAnalysisResult::finalize(
 {
     //For creating parameterized view, query parameters are allowed in select
     //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
-    if (query.allow_query_parameters && query.hasQueryParameters())
+    if (chain.getContext()->isParameterizedView())
         return;
 
     if (prewhere_step_num >= 0)
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 41c378babcd..4d810bb682d 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -714,6 +714,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
     }
     else if (create.select)
     {
+        getContext()->setIsParameterizedView(create.isParameterizedView());
         Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext());
         properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList());
     }
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index ae07d5f5ad7..941896c24de 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -122,13 +122,13 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
     }
 }
 
-void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data)
+void QueryNormalizer::visit(ASTQueryParameter & node, Data & data)
 {
-    auto it_alias = data.aliases.find(node.name);
-    if (it_alias != data.aliases.end())
+    auto it_alias = data.query_parameters.find(node.name);
+    if (it_alias != data.query_parameters.end())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
 
-    data.aliases[node.name] = ast;
+    data.query_parameters.insert(node.name);
 }
 
 
@@ -152,8 +152,6 @@ static bool needVisitChild(const ASTPtr & child)
 /// special visitChildren() for ASTSelectQuery
 void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data)
 {
-    data.allow_query_parameters = select.allow_query_parameters;
-
     for (auto & child : select.children)
     {
         if (needVisitChild(child))
@@ -270,8 +268,8 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
         visit(*node_select, ast, data);
     else if (auto * node_param = ast->as<ASTQueryParameter>())
     {
-        if (data.allow_query_parameters)
-            visit(*node_param, ast, data);
+        if (data.is_parameterized_view)
+            visit(*node_param, data);
         else
             throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
     }
diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h
index ffd2c46ca77..5268c8ac157 100644
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@@ -43,6 +43,7 @@ public:
         Aliases & aliases;
         const NameSet & source_columns_set;
         ExtractedSettings settings;
+        NameSet query_parameters;
 
         /// tmp data
         size_t level;
@@ -53,15 +54,16 @@ public:
 
         /// It's Ok to have "c + 1 AS c" in queries, but not in table definition
         const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
-        bool allow_query_parameters;
+        bool is_parameterized_view;
 
-        Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_)
+        Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_parameterized_view_ = false)
             : aliases(aliases_)
             , source_columns_set(source_columns_set_)
             , settings(settings_)
             , level(0)
             , ignore_alias(ignore_alias_)
             , allow_self_aliases(allow_self_aliases_)
+            , is_parameterized_view(is_parameterized_view_)
         {}
     };
 
@@ -82,7 +84,7 @@ private:
     static void visit(ASTIdentifier &, ASTPtr &, Data &);
     static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &);
     static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
-    static void visit(ASTQueryParameter &, const ASTPtr &, Data &);
+    static void visit(ASTQueryParameter &, Data &);
 
     static void visitChildren(IAST * node, Data & data);
 };
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 2f5bfd00938..e34ef89d511 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1479,7 +1479,7 @@ void TreeRewriter::normalize(
         FunctionNameNormalizer().visit(query.get());
 
     /// Common subexpression elimination. Rewrite rules.
-    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases);
+    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isParameterizedView());
     QueryNormalizer(normalizer_data).visit(query);
 
     optimizeGroupingSets(query);
diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h
index 2a6da778211..567376cffcb 100644
--- a/src/Parsers/ASTCreateQuery.h
+++ b/src/Parsers/ASTCreateQuery.h
@@ -24,7 +24,6 @@ public:
     IAST * sample_by = nullptr;
     IAST * ttl_table = nullptr;
     ASTSetQuery * settings = nullptr;
-    bool allow_query_parameters = false;
 
     String getID(char) const override { return "Storage definition"; }
 
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index c2ca04eaa13..5b2644e550b 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -41,7 +41,6 @@ ASTPtr ASTSelectQuery::clone() const
     for (const auto & child : children)
         res->children.push_back(child->clone());
 
-    res->allow_query_parameters = allow_query_parameters;
     res->has_query_parameters = has_query_parameters;
 
     return res;
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index dae718aa040..20905d3e1ea 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -87,7 +87,6 @@ public:
     bool group_by_with_constant_keys = false;
     bool group_by_with_grouping_sets = false;
     bool limit_with_ties = false;
-    bool allow_query_parameters = false;
 
     ASTPtr & refSelect()    { return getExpression(Expression::SELECT); }
     ASTPtr & refTables()    { return getExpression(Expression::TABLES); }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index 739b50fc3a1..d139ae42cae 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -115,16 +115,4 @@ void ASTSelectWithUnionQuery::setHasQueryParameters()
     }
 }
 
-void ASTSelectWithUnionQuery::clearAllowQueryParameters() // NOLINT
-{
-    if (!list_of_selects)
-        return;
-
-    for (const auto & child : list_of_selects->children)
-    {
-        if (auto * select_node = child->as<ASTSelectQuery>())
-            select_node->allow_query_parameters = false;
-    }
-}
-
 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index b1c174fb3a6..ef804616cbb 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -36,9 +36,6 @@ public:
     bool hasQueryParameters() const { return has_query_parameters; }
     void setHasQueryParameters();
 
-    //clang-tidy wants it to be const, but it changes flags of children
-    void clearAllowQueryParameters(); // NOLINT
-
 };
 
 }
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 85db26efcd2..9e889c796f7 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -113,15 +113,14 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState
         settings.ostr << " ";
         database_and_table_name->formatImpl(settings, state, frame);
     }
-    //In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX
+    /// In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX
     else if (table_function && table_function->as<ASTFunction>() && table_function->as<ASTFunction>()->name=="view")
     {
         settings.ostr << " ";
         table_function->formatImpl(settings, state, frame);
-
     }
-    //For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX
-    //we cannot remove the table function part, as its needed for query substitution
+    /// For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX
+    /// we cannot remove the table function part, as its needed for query substitution
     else if (subquery)
     {
         settings.ostr << settings.nl_or_ws << indent_str;
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 137c7c87cca..5307d4aec5c 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -62,7 +62,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 
 bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    ParserUnionQueryElement elem_parser(allow_query_parameters);
+    ParserUnionQueryElement elem_parser;
     ParserKeyword s_union_parser("UNION");
     ParserKeyword s_all_parser("ALL");
     ParserKeyword s_distinct_parser("DISTINCT");
@@ -1997,9 +1997,9 @@ std::unique_ptr<Layer> getFunctionLayer(ASTPtr identifier, bool is_table_functio
     else
     {
         bool has_database_name = false;
-        if (const auto *compound_identifier  = identifier->as<ASTIdentifier>())
+        if (const auto *ast_identifier  = identifier->as<ASTIdentifier>())
         {
-            if (!compound_identifier->isShort())
+            if (ast_identifier->compound())
                 has_database_name = true;
         }
         return std::make_unique<OrdinaryFunctionLayer>(function_name, allow_function_parameters_, has_database_name);
diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h
index 5ab7b5e7857..653654e5a33 100644
--- a/src/Parsers/ExpressionListParsers.h
+++ b/src/Parsers/ExpressionListParsers.h
@@ -84,11 +84,6 @@ private:
 class ParserUnionList : public IParserBase
 {
 public:
-    explicit ParserUnionList(bool allow_query_parameters_ = false)
-        : allow_query_parameters(allow_query_parameters_)
-    {
-    }
-
     template <typename ElemFunc, typename SepFunc>
     static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator)
     {
@@ -113,7 +108,6 @@ public:
     }
 
     auto getUnionModes() const { return union_modes; }
-    bool allow_query_parameters;
 
 protected:
     const char * getName() const override { return "list of union elements"; }
diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp
index 8d8e4352769..fc90f9ce3ed 100644
--- a/src/Parsers/ParserCreateQuery.cpp
+++ b/src/Parsers/ParserCreateQuery.cpp
@@ -1285,10 +1285,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec
         is_materialized_view = true;
     }
     else
-    {
         is_ordinary_view = true;
-        select_p.allow_query_parameters = true;
-    }
 
     if (!s_view.ignore(pos, expected))
         return false;
diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp
index 61381573421..cf335270734 100644
--- a/src/Parsers/ParserSelectQuery.cpp
+++ b/src/Parsers/ParserSelectQuery.cpp
@@ -34,7 +34,6 @@ namespace ErrorCodes
 bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     auto select_query = std::make_shared<ASTSelectQuery>();
-    select_query->allow_query_parameters = allow_query_parameters;
     node = select_query;
 
     ParserKeyword s_select("SELECT");
diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h
index ea9f71f36e0..deac25df57d 100644
--- a/src/Parsers/ParserSelectQuery.h
+++ b/src/Parsers/ParserSelectQuery.h
@@ -9,14 +9,6 @@ namespace DB
 
 class ParserSelectQuery : public IParserBase
 {
-public:
-    explicit ParserSelectQuery(bool allow_query_parameters_ = false)
-        : allow_query_parameters(allow_query_parameters_)
-    {
-    }
-
-    bool allow_query_parameters;
-
 protected:
     const char * getName() const override { return "SELECT query"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp
index 49f631a2881..e046030bc38 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.cpp
+++ b/src/Parsers/ParserSelectWithUnionQuery.cpp
@@ -10,7 +10,7 @@ namespace DB
 bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
     ASTPtr list_node;
-    ParserUnionList parser(allow_query_parameters);
+    ParserUnionList parser;
 
     if (!parser.parse(pos, list_node, expected))
         return false;
diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h
index 6edf8a8d60e..0bf2946e429 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.h
+++ b/src/Parsers/ParserSelectWithUnionQuery.h
@@ -8,9 +8,6 @@ namespace DB
 
 class ParserSelectWithUnionQuery : public IParserBase
 {
-public:
-    bool allow_query_parameters = false;
-
 protected:
     const char * getName() const override { return "SELECT query, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp
index 0ddaa323404..efd022e6362 100644
--- a/src/Parsers/ParserUnionQueryElement.cpp
+++ b/src/Parsers/ParserUnionQueryElement.cpp
@@ -10,7 +10,7 @@ namespace DB
 
 bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected)
 {
-    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected))
+    if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected))
         return false;
 
     if (const auto * ast_subquery = node->as<ASTSubquery>())
diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h
index ca372052306..6b63c62c85b 100644
--- a/src/Parsers/ParserUnionQueryElement.h
+++ b/src/Parsers/ParserUnionQueryElement.h
@@ -9,14 +9,6 @@ namespace DB
 
 class ParserUnionQueryElement : public IParserBase
 {
-public:
-    explicit ParserUnionQueryElement(bool allow_query_parameters_ = false)
-        : allow_query_parameters(allow_query_parameters_)
-    {
-    }
-
-    bool allow_query_parameters;
-
 protected:
     const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; }
     bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override;
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 2a82bf327e7..ec7c665e135 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -100,10 +100,6 @@ StorageView::StorageView(
         throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY);
     SelectQueryDescription description;
 
-    //When storing the select_query clear allow_query_parameters from the select, so that when this view is used in select,
-    //the query parameters are expected to be substituted
-    query.select->clearAllowQueryParameters();
-
     description.inner_query = query.select->ptr();
     is_parameterized_view = query.isParameterizedView();
     storage_metadata.setSelectQuery(description);

From e4ac3d0e18f57e4a777aa1bce05cb1a4b034ab5f Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 19 Oct 2022 18:30:03 +0200
Subject: [PATCH 024/262] Added FunctionParameterValuesVisitor, renamed bool to
 is_create_parameterized_view, added checks for parameterized view and support
 & test to propagate query parameters - 40907 Parameterized views as table
 functions

---
 src/Interpreters/ActionsVisitor.cpp           |  6 +-
 src/Interpreters/Context.h                    |  6 +-
 src/Interpreters/ExpressionAnalyzer.cpp       |  6 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |  1 -
 src/Interpreters/InterpreterExplainQuery.cpp  |  2 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |  8 +-
 src/Interpreters/QueryNormalizer.cpp          |  4 +-
 src/Interpreters/QueryNormalizer.h            |  6 +-
 src/Interpreters/TreeRewriter.cpp             |  2 +-
 src/Interpreters/executeQuery.cpp             |  5 +-
 src/Parsers/ASTSelectQuery.cpp                | 32 --------
 src/Parsers/ASTSelectQuery.h                  |  1 -
 .../FunctionParameterValuesVisitor.cpp        | 75 +++++++++++++++++++
 src/Parsers/FunctionParameterValuesVisitor.h  | 14 ++++
 src/Storages/SelectQueryInfo.h                |  2 +
 src/Storages/StorageView.cpp                  |  7 +-
 src/Storages/StorageView.h                    |  6 +-
 .../02428_parameterized_view.reference        |  1 +
 .../0_stateless/02428_parameterized_view.sql  |  4 +
 19 files changed, 131 insertions(+), 57 deletions(-)
 create mode 100644 src/Parsers/FunctionParameterValuesVisitor.cpp
 create mode 100644 src/Parsers/FunctionParameterValuesVisitor.h

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index b7510f41561..2a0bf4016e8 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -763,7 +763,11 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
     if (const auto * node = index.tryGetNode(child_column_name))
         return NameAndTypePair(child_column_name, node->result_type);
 
-    if (!data.only_consts && data.getContext()->isParameterizedView() && analyzeReceiveQueryParams(ast).empty())
+    /// For parameterized view, we allow query parameters in create which will be substituted by select queries
+    /// so these cannot be evaluated. But if its a parameterized view with sub part ast which does not contain query parameters
+    /// then it can be evaluated
+    /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair
+    if (!data.only_consts && (data.getContext()->isCreateParameterizedView() && analyzeReceiveQueryParams(ast).empty()))
     {
         throw Exception(
             "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 9afe2da4384..70e875cb868 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -369,7 +369,7 @@ private:
     /// Temporary data for query execution accounting.
     TemporaryDataOnDiskScopePtr temp_data_on_disk;
 
-    bool is_parameterized_view = false;
+    bool is_create_parameterized_view = false;
 public:
     /// Some counters for current query execution.
     /// Most of them are workarounds and should be removed in the future.
@@ -948,8 +948,8 @@ public:
     bool applyDeletedMask() const { return apply_deleted_mask; }
     void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
 
-    bool isParameterizedView() const { return is_parameterized_view; }
-    void setIsParameterizedView(bool is_parameterized_view_) { is_parameterized_view = is_parameterized_view_; }
+    bool isCreateParameterizedView() const { return is_create_parameterized_view; }
+    void setIsCreateParameterizedView(bool is_create_parameterized_view_) { is_create_parameterized_view = is_create_parameterized_view_; }
 
     ActionLocksManagerPtr getActionLocksManager() const;
 
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index ebfde738e5a..53c2eb0bbe3 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1288,7 +1288,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     /// For creating parameterized view, query parameters are allowed in select
     /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-    if (this->getContext()->isParameterizedView())
+    if (this->getContext()->isCreateParameterizedView())
         return true;
 
     auto where_column_name = select_query->where()->getColumnName();
@@ -1910,7 +1910,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
                     //For creating parameterized view, query parameters are allowed in select
                     //As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-                    if (!context->isParameterizedView())
+                    if (!context->isCreateParameterizedView())
                     {
                         auto & column_elem
                             = before_where_sample.getByName(query.where()->getColumnName());
@@ -2080,7 +2080,7 @@ void ExpressionAnalysisResult::finalize(
 {
     //For creating parameterized view, query parameters are allowed in select
     //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
-    if (chain.getContext()->isParameterizedView())
+    if (chain.getContext()->isCreateParameterizedView())
         return;
 
     if (prewhere_step_num >= 0)
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 32f57ebdc9a..e89aa2244fe 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -726,7 +726,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
     }
     else if (create.select)
     {
-        getContext()->setIsParameterizedView(create.isParameterizedView());
         Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext());
         properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList());
     }
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 4799970b6a1..b2e61f04b4f 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -68,7 +68,7 @@ namespace
             if (query_info.view_query)
             {
                 ASTPtr tmp;
-                StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp);
+                StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp, query_info.is_parameterized_view);
             }
         }
     };
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 9d96eb5c0a3..a5883a44201 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -88,7 +88,7 @@
 #include <QueryPipeline/SizeLimits.h>
 #include <base/map.h>
 #include <Common/scope_guard_safe.h>
-
+#include <Parsers/FunctionParameterValuesVisitor.h>
 
 namespace DB
 {
@@ -501,8 +501,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         ASTPtr view_table;
         if (view)
         {
-            view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);
-            view->replaceQueryParametersIfParametrizedView(query_ptr, getSelectQuery().getQueryParameterValues());
+            NameToNameMap parameter_values = analyzeReceiveFunctionParamValues(query_ptr);
+            query_info.is_parameterized_view = view->isParameterizedView();
+            view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView());
+            view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values);
         }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 941896c24de..19112a7c4c0 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -124,6 +124,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
 
 void QueryNormalizer::visit(ASTQueryParameter & node, Data & data)
 {
+    /// This is used only for create parameterized view to check if same parameter name is used twice
+    /// Eg: CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2={c1:UInt64}; - c1 is used twice
     auto it_alias = data.query_parameters.find(node.name);
     if (it_alias != data.query_parameters.end())
         throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
@@ -268,7 +270,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
         visit(*node_select, ast, data);
     else if (auto * node_param = ast->as<ASTQueryParameter>())
     {
-        if (data.is_parameterized_view)
+        if (data.is_create_parameterized_view)
             visit(*node_param, data);
         else
             throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h
index 5268c8ac157..5006d3ad83c 100644
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@@ -54,16 +54,16 @@ public:
 
         /// It's Ok to have "c + 1 AS c" in queries, but not in table definition
         const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column"
-        bool is_parameterized_view;
+        bool is_create_parameterized_view;
 
-        Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_parameterized_view_ = false)
+        Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_create_parameterized_view_ = false)
             : aliases(aliases_)
             , source_columns_set(source_columns_set_)
             , settings(settings_)
             , level(0)
             , ignore_alias(ignore_alias_)
             , allow_self_aliases(allow_self_aliases_)
-            , is_parameterized_view(is_parameterized_view_)
+            , is_create_parameterized_view(is_create_parameterized_view_)
         {}
     };
 
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 7ff065f2228..c58aa8f87a1 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1480,7 +1480,7 @@ void TreeRewriter::normalize(
         FunctionNameNormalizer().visit(query.get());
 
     /// Common subexpression elimination. Rewrite rules.
-    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isParameterizedView());
+    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isCreateParameterizedView());
     QueryNormalizer(normalizer_data).visit(query);
 
     optimizeGroupingSets(query);
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index abca563de55..b73a8769301 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -408,8 +408,11 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         if (const auto * insert_query = ast->as<ASTInsertQuery>(); insert_query && insert_query->data)
             query_end = insert_query->data;
 
+        if (const auto * create_query = ast->as<ASTCreateQuery>())
+            context->setIsCreateParameterizedView(create_query->isParameterizedView());
+
         /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-        if (context->hasQueryParameters())
+        if (!context->isCreateParameterizedView() && context->hasQueryParameters())
         {
             ReplaceQueryParameterVisitor visitor(context->getQueryParameters());
             visitor.visit(ast);
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 5b2644e550b..913e582acf1 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -7,8 +7,6 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Interpreters/StorageID.h>
 #include <IO/Operators.h>
-#include <Parsers/ASTLiteral.h>
-#include <Common/FieldVisitorToString.h>
 #include <Parsers/QueryParameterVisitor.h>
 
 #include <queue>
@@ -490,34 +488,4 @@ void ASTSelectQuery::setHasQueryParameters()
         has_query_parameters = true;
 }
 
-NameToNameMap ASTSelectQuery::getQueryParameterValues() const
-{
-    NameToNameMap parameter_values;
-    std::queue<ASTPtr> queue;
-    queue.push(this->clone());
-
-    while (!queue.empty())
-    {
-        auto ast = queue.front();
-        queue.pop();
-        if (const auto * expression_list = ast->as<ASTExpressionList>())
-        {
-            if (expression_list->children.size() == 2)
-            {
-                if (const auto * identifier = expression_list->children[0]->as<ASTIdentifier>())
-                {
-                    if (const auto * literal = expression_list->children[1]->as<ASTLiteral>())
-                    {
-                        parameter_values[identifier->name()] = convertFieldToString(literal->value);
-                    }
-                }
-            }
-        }
-        for (const auto & child : ast->children)
-            queue.push(child);
-    }
-
-    return parameter_values;
-}
-
 }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 20905d3e1ea..5ec56de891e 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -145,7 +145,6 @@ public:
     bool has_query_parameters = false;
     bool hasQueryParameters() const { return has_query_parameters; }
     void setHasQueryParameters();
-    NameToNameMap getQueryParameterValues() const;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp
new file mode 100644
index 00000000000..93192084187
--- /dev/null
+++ b/src/Parsers/FunctionParameterValuesVisitor.cpp
@@ -0,0 +1,75 @@
+#include <Parsers/FunctionParameterValuesVisitor.h>
+#include <Parsers/ASTExpressionList.h>
+#include <Parsers/ASTFunction.h>
+#include <Parsers/ASTIdentifier.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/parseQuery.h>
+#include <Common/FieldVisitorToString.h>
+#include <Parsers/ASTHelpers.h>
+#include <Common/assert_cast.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+class FunctionParameterValuesVisitor
+{
+public:
+    explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_)
+        : parameter_values(parameter_values_)
+    {
+    }
+
+    void visit(const ASTPtr & ast)
+    {
+        if (const auto * expression = ast->as<ASTExpressionList>())
+            visitExpressionList(*expression);
+        for (const auto & child : ast->children)
+            visit(child);
+    }
+
+private:
+    NameToNameMap & parameter_values;
+
+    void visitExpressionList(const ASTExpressionList & expression_list)
+    {
+        if (expression_list.children.size() == 2)
+        {
+            if (const auto * identifier = expression_list.children[0]->as<ASTIdentifier>())
+            {
+                if (const auto * literal = expression_list.children[1]->as<ASTLiteral>())
+                {
+                    parameter_values[identifier->name()] = convertFieldToString(literal->value);
+                }
+                else if (const auto * function = expression_list.children[1]->as<ASTFunction>())
+                {
+                    if (isFunctionCast(function))
+                    {
+                        const auto * cast_expression = assert_cast<ASTExpressionList*>(function->arguments.get());
+                        if (cast_expression->children.size() != 2)
+                            throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                        if (const auto * cast_literal = cast_expression->children[0]->as<ASTLiteral>())
+                        {
+                            parameter_values[identifier->name()] = convertFieldToString(cast_literal->value);
+                        }
+                    }
+                }
+            }
+        }
+    }
+};
+
+NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast)
+{
+    NameToNameMap parameter_values;
+    FunctionParameterValuesVisitor(parameter_values).visit(ast);
+    return parameter_values;
+}
+
+
+}
diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h
new file mode 100644
index 00000000000..02053fe4a3d
--- /dev/null
+++ b/src/Parsers/FunctionParameterValuesVisitor.h
@@ -0,0 +1,14 @@
+#pragma once
+
+#include <string>
+#include <Core/Names.h>
+#include <Parsers/IAST_fwd.h>
+
+
+namespace DB
+{
+
+/// Find parameters in a query paramater values and collect them into map.
+NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast);
+
+}
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index f2835ab4dbf..082cab3d49e 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -220,6 +220,8 @@ struct SelectQueryInfo
     Block minmax_count_projection_block;
     MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr;
 
+    bool is_parameterized_view = false;
+
     InputOrderInfoPtr getInputOrderInfo() const
     {
         return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr);
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index ec7c665e135..44d0cead4b3 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -185,13 +185,14 @@ void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query,
     }
 }
 
-void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name)
+void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, bool parameterized_view)
 {
     ASTTableExpression * table_expression = getFirstTableExpression(outer_query);
 
     if (!table_expression->database_and_table_name)
     {
-        // If it's a view or merge table function, add a fake db.table name.
+        /// If it's a view or merge table function, add a fake db.table name.
+        /// For parameterized view, the function name is the db.view name, so add the function name
         if (table_expression->table_function)
         {
             auto table_function_name = table_expression->table_function->as<ASTFunction>()->name;
@@ -199,7 +200,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__view");
             else if (table_function_name == "merge")
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>("__merge");
-            else
+            else if (parameterized_view)
                 table_expression->database_and_table_name = std::make_shared<ASTTableIdentifier>(table_function_name);
 
         }
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 1ed64c482e0..1b08801af3b 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -37,12 +37,12 @@ public:
 
     void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const;
 
-    static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot)
+    static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view)
     {
-        replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name);
+        replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name, parameterized_view);
     }
 
-    static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name);
+    static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view);
     static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name);
 
 protected:
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index 8fc3b86ddc1..37120d2d1b9 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -1,5 +1,6 @@
 20
 20
+10
 SELECT
     Name,
     Price,
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index fa689937234..e0a67e7690b 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -15,6 +15,10 @@ CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
 SELECT Price FROM v1(price=20);
 SELECT Price FROM `v1`(price=20);
 
+set param_price=10;
+SELECT Price FROM v1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
+SELECT Price FROM v1(price={price:UInt64});
+
 DETACH TABLE v1;
 ATTACH TABLE v1;
 

From 1dde95b6e7e314d049a3fc33484ba215d368f8bd Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 20 Oct 2022 10:31:41 +0200
Subject: [PATCH 025/262] Updated hasQueryParameters using mutable optional
 bool and typo comment fixed - 40907 Parameterized views as table functions

---
 src/Parsers/ASTSelectQuery.cpp               | 15 ++++++++-----
 src/Parsers/ASTSelectQuery.h                 |  5 ++---
 src/Parsers/ASTSelectWithUnionQuery.cpp      | 23 ++++++++++----------
 src/Parsers/ASTSelectWithUnionQuery.h        |  5 ++---
 src/Parsers/ExpressionListParsers.cpp        |  1 -
 src/Parsers/FunctionParameterValuesVisitor.h |  2 +-
 src/Parsers/ParserSelectWithUnionQuery.cpp   |  1 -
 7 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 913e582acf1..b5198e3df0d 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -478,14 +478,17 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const
     tables_element.table_expression->as<ASTTableExpression &>().final = true;
 }
 
-
-void ASTSelectQuery::setHasQueryParameters()
+bool ASTSelectQuery::hasQueryParameters() const
 {
-    if (!this->where())
-        return;
+    if (!has_query_parameters.has_value())
+    {
+        if (analyzeReceiveQueryParams(std::make_shared<ASTSelectQuery>(*this)).empty())
+            has_query_parameters = false;
+        else
+            has_query_parameters = true;
+    }
 
-    if (!analyzeReceiveQueryParams(this->where()).empty())
-        has_query_parameters = true;
+    return  has_query_parameters.value();
 }
 
 }
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 5ec56de891e..5c64534f1c2 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -142,9 +142,8 @@ public:
 
     QueryKind getQueryKind() const override { return QueryKind::Select; }
 
-    bool has_query_parameters = false;
-    bool hasQueryParameters() const { return has_query_parameters; }
-    void setHasQueryParameters();
+    mutable std::optional<bool> has_query_parameters;
+    bool hasQueryParameters() const;
 
 protected:
     void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index d139ae42cae..b0030294727 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -95,24 +95,25 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const
         || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT);
 }
 
-
-void ASTSelectWithUnionQuery::setHasQueryParameters()
+bool ASTSelectWithUnionQuery::hasQueryParameters() const
 {
-    if (!list_of_selects)
-        return;
-
-    for (const auto & child : list_of_selects->children)
+    if (!has_query_parameters.has_value())
     {
-        if (auto * select_node = child->as<ASTSelectQuery>())
+        for (const auto & child : list_of_selects->children)
         {
-            select_node->setHasQueryParameters();
-            if (select_node->hasQueryParameters())
+            if (auto * select_node = child->as<ASTSelectQuery>())
             {
-                has_query_parameters = true;
-                break;
+                if (select_node->hasQueryParameters())
+                {
+                    has_query_parameters = true;
+                    return has_query_parameters.value();
+                }
             }
         }
+        has_query_parameters = false;
     }
+
+    return  has_query_parameters.value();
 }
 
 }
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index ef804616cbb..334bb86932f 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -32,9 +32,8 @@ public:
     /// Consider any mode other than ALL as non-default.
     bool hasNonDefaultUnionMode() const;
 
-    bool has_query_parameters = false;
-    bool hasQueryParameters() const { return has_query_parameters; }
-    void setHasQueryParameters();
+    mutable std::optional<bool> has_query_parameters;
+    bool hasQueryParameters() const;
 
 };
 
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 5307d4aec5c..1ae2973657d 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -221,7 +221,6 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type)
     select_with_union_query->list_of_selects = std::make_shared<ASTExpressionList>();
     select_with_union_query->list_of_selects->children.push_back(std::move(select_query));
     select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
-    select_with_union_query->setHasQueryParameters();
 
     auto new_subquery = std::make_shared<ASTSubquery>();
     new_subquery->children.push_back(select_with_union_query);
diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h
index 02053fe4a3d..7fd594ddc84 100644
--- a/src/Parsers/FunctionParameterValuesVisitor.h
+++ b/src/Parsers/FunctionParameterValuesVisitor.h
@@ -8,7 +8,7 @@
 namespace DB
 {
 
-/// Find parameters in a query paramater values and collect them into map.
+/// Find parameters in a query parameter values and collect them into map.
 NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast);
 
 }
diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp
index e046030bc38..532a9e20735 100644
--- a/src/Parsers/ParserSelectWithUnionQuery.cpp
+++ b/src/Parsers/ParserSelectWithUnionQuery.cpp
@@ -36,7 +36,6 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected &
     select_with_union_query->list_of_selects = list_node;
     select_with_union_query->children.push_back(select_with_union_query->list_of_selects);
     select_with_union_query->list_of_modes = parser.getUnionModes();
-    select_with_union_query->setHasQueryParameters();
 
     return true;
 }

From d8fe1d2d6354d4b26e4c99c66d950a3f1b30a3f7 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 21 Oct 2022 14:45:28 +0200
Subject: [PATCH 026/262] Moved is_create_parameterized_view out of Context ,
 and added into ActionsMatcher::Data and SelectQueryInfo - 40907 Parameterized
 views as table functions

---
 src/Interpreters/ActionsVisitor.cpp           |  6 +++--
 src/Interpreters/ActionsVisitor.h             |  4 ++-
 src/Interpreters/Context.h                    |  5 +---
 src/Interpreters/ExpressionAnalyzer.cpp       | 26 +++++++++----------
 src/Interpreters/ExpressionAnalyzer.h         |  2 +-
 src/Interpreters/InterpreterCreateQuery.cpp   |  2 +-
 src/Interpreters/InterpreterSelectQuery.cpp   |  2 +-
 .../InterpreterSelectWithUnionQuery.cpp       | 10 ++++++-
 .../InterpreterSelectWithUnionQuery.h         |  3 ++-
 src/Interpreters/SelectQueryOptions.h         |  8 ++++++
 src/Interpreters/TreeRewriter.cpp             | 12 +++++----
 src/Interpreters/TreeRewriter.h               |  5 ++--
 src/Interpreters/executeQuery.cpp             |  5 ++--
 13 files changed, 56 insertions(+), 34 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 2a0bf4016e8..4e18740f9b4 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -537,7 +537,8 @@ ActionsMatcher::Data::Data(
     bool only_consts_,
     bool create_source_for_in_,
     AggregationKeysInfo aggregation_keys_info_,
-    bool build_expression_with_window_functions_)
+    bool build_expression_with_window_functions_,
+    bool is_create_parameterized_view_)
     : WithContext(context_)
     , set_size_limit(set_size_limit_)
     , subquery_depth(subquery_depth_)
@@ -551,6 +552,7 @@ ActionsMatcher::Data::Data(
     , actions_stack(std::move(actions_dag), context_)
     , aggregation_keys_info(aggregation_keys_info_)
     , build_expression_with_window_functions(build_expression_with_window_functions_)
+    , is_create_parameterized_view(is_create_parameterized_view_)
     , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1)
 {
 }
@@ -767,7 +769,7 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
     /// so these cannot be evaluated. But if its a parameterized view with sub part ast which does not contain query parameters
     /// then it can be evaluated
     /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair
-    if (!data.only_consts && (data.getContext()->isCreateParameterizedView() && analyzeReceiveQueryParams(ast).empty()))
+    if (!data.only_consts && (data.is_create_parameterized_view && analyzeReceiveQueryParams(ast).empty()))
     {
         throw Exception(
             "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h
index a27745d2cfa..97b4440ff88 100644
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@@ -134,6 +134,7 @@ public:
         ScopeStack actions_stack;
         AggregationKeysInfo aggregation_keys_info;
         bool build_expression_with_window_functions;
+        bool is_create_parameterized_view;
 
         /*
          * Remember the last unique column suffix to avoid quadratic behavior
@@ -154,7 +155,8 @@ public:
             bool only_consts_,
             bool create_source_for_in_,
             AggregationKeysInfo aggregation_keys_info_,
-            bool build_expression_with_window_functions_ = false);
+            bool build_expression_with_window_functions_ = false,
+            bool is_create_parameterized_view = false);
 
         /// Does result of the calculation already exists in the block.
         bool hasColumn(const String & column_name) const;
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 70e875cb868..601ba26d51a 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -369,7 +369,7 @@ private:
     /// Temporary data for query execution accounting.
     TemporaryDataOnDiskScopePtr temp_data_on_disk;
 
-    bool is_create_parameterized_view = false;
+
 public:
     /// Some counters for current query execution.
     /// Most of them are workarounds and should be removed in the future.
@@ -948,9 +948,6 @@ public:
     bool applyDeletedMask() const { return apply_deleted_mask; }
     void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
 
-    bool isCreateParameterizedView() const { return is_create_parameterized_view; }
-    void setIsCreateParameterizedView(bool is_create_parameterized_view_) { is_create_parameterized_view = is_create_parameterized_view_; }
-
     ActionLocksManagerPtr getActionLocksManager() const;
 
     enum class ApplicationType
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 53c2eb0bbe3..b13bcf6199d 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -537,7 +537,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
 }
 
 
-void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
+void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts, bool is_create_parameterized_view)
 {
     LogAST log;
     ActionsVisitor::Data visitor_data(
@@ -551,7 +551,9 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
         false /* no_makeset */,
         only_consts,
         !isRemoteStorage() /* create_source_for_in */,
-        getAggregationKeysInfo());
+        getAggregationKeysInfo(),
+        false /* build_expression_with_window_functions */,
+        is_create_parameterized_view);
     ActionsVisitor(visitor_data, log.stream()).visit(ast);
     actions = visitor_data.getActions();
 }
@@ -1284,11 +1286,11 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join);
 
-    getRootActions(select_query->where(), only_types, step.actions());
+    getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/, query_options.is_create_parameterized_view);
 
     /// For creating parameterized view, query parameters are allowed in select
     /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-    if (this->getContext()->isCreateParameterizedView())
+    if (query_options.is_create_parameterized_view)
         return true;
 
     auto where_column_name = select_query->where()->getColumnName();
@@ -1824,7 +1826,10 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
         chain.finalize();
 
-        finalize(chain, prewhere_step_num, where_step_num, having_step_num, query);
+        /// For creating parameterized view, query parameters are allowed in select
+        /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
+        if (!query_analyzer.query_options.is_create_parameterized_view)
+            finalize(chain, prewhere_step_num, where_step_num, having_step_num, query);
 
         chain.clear();
     };
@@ -1908,9 +1913,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
 
-                    //For creating parameterized view, query parameters are allowed in select
-                    //As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-                    if (!context->isCreateParameterizedView())
+                    /// For creating parameterized view, query parameters are allowed in select
+                    /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
+                    if (!query_analyzer.query_options.is_create_parameterized_view)
                     {
                         auto & column_elem
                             = before_where_sample.getByName(query.where()->getColumnName());
@@ -2078,11 +2083,6 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & having_step_num,
     const ASTSelectQuery & query)
 {
-    //For creating parameterized view, query parameters are allowed in select
-    //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
-    if (chain.getContext()->isCreateParameterizedView())
-        return;
-
     if (prewhere_step_num >= 0)
     {
         const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num);
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index ddb41a00f84..286665bb05f 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -175,7 +175,7 @@ protected:
 
     ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
 
-    void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
+    void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false, bool is_create_parameterized_view = false);
 
     /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
       * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index e89aa2244fe..a80b5949a79 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -726,7 +726,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti
     }
     else if (create.select)
     {
-        Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext());
+        Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext(), false /* is_subquery */, create.isParameterizedView());
         properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList());
     }
     else if (create.as_table_function)
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index a5883a44201..6fd4e53d0f7 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -2014,7 +2014,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions()
         }
 
         auto syntax_result
-            = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, storage_snapshot);
+            = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, storage_snapshot, options.is_create_parameterized_view);
         alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, context).getActionsDAG(true);
 
         /// The set of required columns could be added as a result of adding an action to calculate ALIAS.
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
index a679b17a5bd..23caacfdfbd 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp
@@ -248,12 +248,14 @@ InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast
 
 InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default;
 
-Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery)
+Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery, bool is_create_parameterized_view)
 {
     if (!context_->hasQueryContext())
     {
         if (is_subquery)
             return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
+        else if (is_create_parameterized_view)
+            return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()).getSampleBlock();
         else
             return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
     }
@@ -271,6 +273,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_,
         return cache[key]
             = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock();
     }
+    else if (is_create_parameterized_view)
+    {
+        return cache[key]
+            = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze())
+            .getSampleBlock();
+    }
     else
     {
         return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock();
diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h
index ff763ec6490..a2040e0d2fc 100644
--- a/src/Interpreters/InterpreterSelectWithUnionQuery.h
+++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h
@@ -41,7 +41,8 @@ public:
     static Block getSampleBlock(
         const ASTPtr & query_ptr_,
         ContextPtr context_,
-        bool is_subquery = false);
+        bool is_subquery = false,
+        bool is_create_parameterized_view = false);
 
     void ignoreWithTotals() override;
 
diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h
index 6b5a6a7f8eb..e6895ed243b 100644
--- a/src/Interpreters/SelectQueryOptions.h
+++ b/src/Interpreters/SelectQueryOptions.h
@@ -50,6 +50,7 @@ struct SelectQueryOptions
     bool with_all_cols = false; /// asterisk include materialized and aliased columns
     bool settings_limit_offset_done = false;
     bool is_explain = false; /// The value is true if it's explain statement.
+    bool is_create_parameterized_view = false;
 
     /// These two fields are used to evaluate shardNum() and shardCount() function when
     /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local
@@ -77,6 +78,13 @@ struct SelectQueryOptions
         return out;
     }
 
+    SelectQueryOptions createParameterizedView() const
+    {
+        SelectQueryOptions out = *this;
+        out.is_create_parameterized_view = true;
+        return out;
+    }
+
     SelectQueryOptions & analyze(bool dry_run = true)
     {
         only_analyze = dry_run;
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index c58aa8f87a1..45df40ac6a0 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -41,6 +41,7 @@
 #include <Parsers/ASTTablesInSelectQuery.h>
 #include <Parsers/ASTInterpolateElement.h>
 #include <Parsers/queryToString.h>
+#include <Parsers/ASTCreateQuery.h>
 
 #include <DataTypes/NestedUtils.h>
 #include <DataTypes/DataTypeNullable.h>
@@ -1277,7 +1278,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
         }
     }
 
-    normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext());
+    normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext(), select_options.is_create_parameterized_view);
 
     /// Remove unneeded columns according to 'required_result_columns'.
     /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside.
@@ -1370,7 +1371,8 @@ TreeRewriterResultPtr TreeRewriter::analyze(
     const StorageSnapshotPtr & storage_snapshot,
     bool allow_aggregations,
     bool allow_self_aliases,
-    bool execute_scalar_subqueries) const
+    bool execute_scalar_subqueries,
+    bool is_create_parameterized_view) const
 {
     if (query->as<ASTSelectQuery>())
         throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR);
@@ -1379,7 +1381,7 @@ TreeRewriterResultPtr TreeRewriter::analyze(
 
     TreeRewriterResult result(source_columns, storage, storage_snapshot, false);
 
-    normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext());
+    normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext(), is_create_parameterized_view);
 
     /// Executing scalar subqueries. Column defaults could be a scalar subquery.
     executeScalarSubqueries(query, getContext(), 0, result.scalars, result.local_scalars, !execute_scalar_subqueries);
@@ -1408,7 +1410,7 @@ TreeRewriterResultPtr TreeRewriter::analyze(
 }
 
 void TreeRewriter::normalize(
-    ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_)
+    ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view)
 {
     if (!UserDefinedSQLFunctionFactory::instance().empty())
     {
@@ -1480,7 +1482,7 @@ void TreeRewriter::normalize(
         FunctionNameNormalizer().visit(query.get());
 
     /// Common subexpression elimination. Rewrite rules.
-    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isCreateParameterizedView());
+    QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, is_create_parameterized_view);
     QueryNormalizer(normalizer_data).visit(query);
 
     optimizeGroupingSets(query);
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index 7954547c070..99408ca208b 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -119,7 +119,8 @@ public:
         const StorageSnapshotPtr & storage_snapshot = {},
         bool allow_aggregations = false,
         bool allow_self_aliases = true,
-        bool execute_scalar_subqueries = true) const;
+        bool execute_scalar_subqueries = true,
+        bool is_create_parameterized_view = false) const;
 
     /// Analyze and rewrite select query
     TreeRewriterResultPtr analyzeSelect(
@@ -131,7 +132,7 @@ public:
         std::shared_ptr<TableJoin> table_join = {}) const;
 
 private:
-    static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_);
+    static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false);
 };
 
 }
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index b73a8769301..466661296be 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -408,11 +408,12 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         if (const auto * insert_query = ast->as<ASTInsertQuery>(); insert_query && insert_query->data)
             query_end = insert_query->data;
 
+        bool is_create_parameterized_view = false;
         if (const auto * create_query = ast->as<ASTCreateQuery>())
-            context->setIsCreateParameterizedView(create_query->isParameterizedView());
+            is_create_parameterized_view = create_query->isParameterizedView();
 
         /// Replace ASTQueryParameter with ASTLiteral for prepared statements.
-        if (!context->isCreateParameterizedView() && context->hasQueryParameters())
+        if (!is_create_parameterized_view && context->hasQueryParameters())
         {
             ReplaceQueryParameterVisitor visitor(context->getQueryParameters());
             visitor.visit(ast);

From f4483ed19ebfdf28fa3167c337a0edf962f694dd Mon Sep 17 00:00:00 2001
From: Wangyang Guo <wangyang.guo@intel.com>
Date: Tue, 25 Oct 2022 09:39:56 +0800
Subject: [PATCH 027/262] TreeRewriter: use well defined TablesWithColumns

---
 src/Interpreters/TreeRewriter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index da12dccd8d8..eb713019306 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1203,7 +1203,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     ASTPtr & query,
     TreeRewriterResult && result,
     const SelectQueryOptions & select_options,
-    const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
+    const TablesWithColumns & tables_with_columns,
     const Names & required_result_columns,
     std::shared_ptr<TableJoin> table_join) const
 {

From b08961be8a310cdf0fde253343006dd270411c04 Mon Sep 17 00:00:00 2001
From: Wangyang Guo <wangyang.guo@intel.com>
Date: Tue, 25 Oct 2022 14:44:09 +0800
Subject: [PATCH 028/262] LogicalExpressionsOptimizer: optimze for
 LowCardinality

---
 .../LogicalExpressionsOptimizer.cpp           | 37 +++++++++++++++++--
 .../LogicalExpressionsOptimizer.h             |  7 +++-
 src/Interpreters/TreeRewriter.cpp             |  2 +-
 3 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp
index 9e30cac2e19..bd24e13b129 100644
--- a/src/Interpreters/LogicalExpressionsOptimizer.cpp
+++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp
@@ -1,13 +1,17 @@
 #include <Interpreters/LogicalExpressionsOptimizer.h>
+#include <Interpreters/IdentifierSemantic.h>
+#include <DataTypes/DataTypeLowCardinality.h>
 #include <Core/Settings.h>
 
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSelectQuery.h>
 #include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTIdentifier.h>
 
 #include <Common/typeid_cast.h>
 
 #include <deque>
+#include <vector>
 
 #include <base/sort.h>
 
@@ -32,8 +36,9 @@ bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpres
     return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression);
 }
 
-LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length)
-    : select_query(select_query_), settings(optimize_min_equality_disjunction_chain_length)
+LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_,
+    const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length)
+    : select_query(select_query_), tables_with_columns(tables_with_columns_), settings(optimize_min_equality_disjunction_chain_length)
 {
 }
 
@@ -196,13 +201,39 @@ inline ASTs & getFunctionOperands(const ASTFunction * or_function)
 
 }
 
+bool LogicalExpressionsOptimizer::isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const
+{
+    if (functions.size() > 1)
+    {
+        /// Check if identifier is LowCardinality type
+        auto & first_operands = getFunctionOperands(functions[0]);
+        const auto * identifier = first_operands[0]->as<ASTIdentifier>();
+        if (identifier)
+        {
+            auto pos = IdentifierSemantic::getMembership(*identifier);
+            if (!pos)
+                pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables_with_columns, true);
+            if (pos)
+            {
+                if (auto data_type_and_name = tables_with_columns[*pos].columns.tryGetByName(identifier->shortName()))
+                {
+                    if (typeid_cast<const DataTypeLowCardinality *>(data_type_and_name->type.get()))
+                        return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+
 bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const
 {
     const auto & equalities = chain.second;
     const auto & equality_functions = equalities.functions;
 
     /// We eliminate too short chains.
-    if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length)
+    if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length &&
+            !isLowCardinalityEqualityChain(equality_functions))
         return false;
 
     /// We check that the right-hand sides of all equalities have the same type.
diff --git a/src/Interpreters/LogicalExpressionsOptimizer.h b/src/Interpreters/LogicalExpressionsOptimizer.h
index 4991d31f8b1..a8a0d186394 100644
--- a/src/Interpreters/LogicalExpressionsOptimizer.h
+++ b/src/Interpreters/LogicalExpressionsOptimizer.h
@@ -1,6 +1,7 @@
 #pragma once
 
 #include <Parsers/IAST.h>
+#include <Interpreters/DatabaseAndTableWithAlias.h>
 
 #include <string>
 #include <vector>
@@ -36,7 +37,7 @@ class LogicalExpressionsOptimizer final
 
 public:
     /// Constructor. Accepts the root of the query DAG.
-    LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length);
+    LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length);
 
     /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
       * on the expressions `expr` IN (x1, ..., xN).
@@ -79,6 +80,9 @@ private:
       */
     bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;
 
+    /// Check if is LowCardinality OR chain
+    bool isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const;
+
     /// Insert the IN expression into the OR chain.
     static void addInExpression(const DisjunctiveEqualityChain & chain);
 
@@ -96,6 +100,7 @@ private:
     using ColumnToPosition = std::unordered_map<const IAST *, size_t>;
 
     ASTSelectQuery * select_query;
+    const TablesWithColumns & tables_with_columns;
     const ExtractedSettings settings;
     /// Information about the OR-chains inside the query.
     DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index eb713019306..c61ba9c3286 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1246,7 +1246,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
 
     /// Optimizes logical expressions.
-    LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
+    LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform();
 
     NameSet all_source_columns_set = source_columns_set;
     if (table_join)

From ad387ece4375516d4cfd9a65ac7a3a59141f22df Mon Sep 17 00:00:00 2001
From: Wangyang Guo <wangyang.guo@intel.com>
Date: Wed, 2 Nov 2022 14:21:16 +0800
Subject: [PATCH 029/262] LogicalExpressionsOptimizer: add comments

---
 src/Interpreters/LogicalExpressionsOptimizer.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp
index bd24e13b129..cd00d77c031 100644
--- a/src/Interpreters/LogicalExpressionsOptimizer.cpp
+++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp
@@ -231,7 +231,9 @@ bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const Disj
     const auto & equalities = chain.second;
     const auto & equality_functions = equalities.functions;
 
-    /// We eliminate too short chains.
+    /// For LowCardinality column, the dict is usually smaller and the index is relatively large.
+    /// In most cases, merging OR-chain as IN is better than converting each LowCardinality into full column individually.
+    /// For non-LowCardinality, we need to eliminate too short chains.
     if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length &&
             !isLowCardinalityEqualityChain(equality_functions))
         return false;

From 11bc712c8e0c9b8711b71d389d003a7b00bd6e3c Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Mon, 7 Nov 2022 07:12:55 +0000
Subject: [PATCH 030/262] add retries on ConnectionError

---
 .../test_case.py                              | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py
index 44df1c369cf..ed4bc74e64f 100644
--- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py
+++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py
@@ -35,12 +35,23 @@ def start_cluster():
         cluster.shutdown()
 
 
+def requests_get(url, attempts=10, sleep=0.5):
+    attempt = 0
+    while True:
+        attempt += 1
+        try:
+            return requests.get(url)
+        except requests.exceptions.ConnectionError as e:
+            if attempt >= attempts:
+                raise
+        time.sleep(sleep)
+
+
 def test_request_to_node_with_interserver_listen_host(start_cluster):
-    time.sleep(5)  # waiting for interserver listener to start
-    response_interserver = requests.get(
+    response_interserver = requests_get(
         f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}"
     )
-    response_client = requests.get(
+    response_client = requests_get(
         f"http://{node_without_interserver_listen_host.ip_address}:8123"
     )
     assert response_interserver.status_code == 200
@@ -49,7 +60,7 @@ def test_request_to_node_with_interserver_listen_host(start_cluster):
 
 
 def test_request_to_node_without_interserver_listen_host(start_cluster):
-    response = requests.get(
+    response = requests_get(
         f"http://{node_without_interserver_listen_host.ip_address}:{INTERSERVER_HTTP_PORT}"
     )
     assert response.status_code == 200

From 3e5f1a9178d023d518fb7b1cd79f52ae1520e280 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 7 Nov 2022 15:22:45 +0100
Subject: [PATCH 031/262] Added is_parameterized_view in ASTFunction and fixed
 review comments - 40907 Parameterized views as table functions

---
 src/Interpreters/ActionsVisitor.cpp           |  6 ++--
 src/Interpreters/Context.cpp                  |  5 +++-
 src/Interpreters/QueryNormalizer.cpp          |  2 +-
 src/Parsers/ASTFunction.h                     |  6 +++-
 src/Parsers/ASTSelectQuery.cpp                |  7 +----
 src/Parsers/ASTSelectQuery.h                  |  7 +++--
 src/Parsers/ASTSelectWithUnionQuery.h         |  7 ++++-
 src/Parsers/ASTTablesInSelectQuery.cpp        | 10 +------
 src/Parsers/ExpressionListParsers.cpp         | 10 +------
 .../FunctionParameterValuesVisitor.cpp        | 30 +++++++++----------
 .../02428_parameterized_view.reference        |  1 +
 .../0_stateless/02428_parameterized_view.sql  |  3 ++
 12 files changed, 46 insertions(+), 48 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 4e18740f9b4..d3dd89086e5 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -765,9 +765,9 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
     if (const auto * node = index.tryGetNode(child_column_name))
         return NameAndTypePair(child_column_name, node->result_type);
 
-    /// For parameterized view, we allow query parameters in create which will be substituted by select queries
-    /// so these cannot be evaluated. But if its a parameterized view with sub part ast which does not contain query parameters
-    /// then it can be evaluated
+    /// For parameterized view, we allow query parameters in CREATE which will be substituted by SELECT queries
+    /// so these cannot be evaluated at this point. But if it's a parameterized view with sub part ast which does
+    /// not contain query parameters then it can be evaluated
     /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair
     if (!data.only_consts && (data.is_create_parameterized_view && analyzeReceiveQueryParams(ast).empty()))
     {
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 2617eb7ffa3..63e7af896c2 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -1232,7 +1232,7 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String
 
 StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
 {
-    const ASTFunction * function = assert_cast<const ASTFunction *>(table_expression.get());
+    ASTFunction * function = assert_cast<ASTFunction *>(table_expression.get());
     String database_name = getCurrentDatabase();
     String table_name = function->name;
 
@@ -1252,7 +1252,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
     if (table)
     {
         if (table.get()->isView() && table->as<StorageView>()->isParameterizedView())
+        {
+            function->is_parameterized_view = true;
             return table;
+        }
     }
     auto hash = table_expression->getTreeHash();
     String key = toString(hash.first) + '_' + toString(hash.second);
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index 19112a7c4c0..76c526d7b12 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -128,7 +128,7 @@ void QueryNormalizer::visit(ASTQueryParameter & node, Data & data)
     /// Eg: CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2={c1:UInt64}; - c1 is used twice
     auto it_alias = data.query_parameters.find(node.name);
     if (it_alias != data.query_parameters.end())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name));
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {}", backQuote(node.name));
 
     data.query_parameters.insert(node.name);
 }
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index 4e8874a8839..b658249b42a 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -26,6 +26,10 @@ public:
 
     bool is_lambda_function = false;
 
+    /// This field is updated in executeTableFunction if its a parameterized_view
+    /// and used in ASTTablesInSelectQuery::FormatImpl for EXPLAIN SYNTAX of SELECT parameterized view
+    bool is_parameterized_view = false;
+
     // We have to make these fields ASTPtr because this is what the visitors
     // expect. Some of them take const ASTPtr & (makes no sense), and some
     // take ASTPtr & and modify it. I don't understand how the latter is
@@ -55,7 +59,7 @@ public:
 
     std::string getWindowDescription() const;
 
-    //This is used for parameterized view, to identify if name is 'db.view'
+    /// This is used for parameterized view, to identify if name is 'db.view'
     bool has_database_name = false;
 
 protected:
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index b5198e3df0d..1f93333628d 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -9,8 +9,6 @@
 #include <IO/Operators.h>
 #include <Parsers/QueryParameterVisitor.h>
 
-#include <queue>
-
 namespace DB
 {
 
@@ -482,10 +480,7 @@ bool ASTSelectQuery::hasQueryParameters() const
 {
     if (!has_query_parameters.has_value())
     {
-        if (analyzeReceiveQueryParams(std::make_shared<ASTSelectQuery>(*this)).empty())
-            has_query_parameters = false;
-        else
-            has_query_parameters = true;
+        has_query_parameters = !analyzeReceiveQueryParams(std::make_shared<ASTSelectQuery>(*this)).empty();
     }
 
     return  has_query_parameters.value();
diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h
index 5c64534f1c2..881cc954ad9 100644
--- a/src/Parsers/ASTSelectQuery.h
+++ b/src/Parsers/ASTSelectQuery.h
@@ -141,8 +141,6 @@ public:
     void setFinal();
 
     QueryKind getQueryKind() const override { return QueryKind::Select; }
-
-    mutable std::optional<bool> has_query_parameters;
     bool hasQueryParameters() const;
 
 protected:
@@ -151,6 +149,11 @@ protected:
 private:
     std::unordered_map<Expression, size_t> positions;
 
+    /// This variable is optional as we want to set it on the first call to hasQueryParameters
+    /// and return the same variable on future calls to hasQueryParameters
+    /// its mutable as we set it in const function
+    mutable std::optional<bool> has_query_parameters;
+
     ASTPtr & getExpression(Expression expr);
 };
 
diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h
index 334bb86932f..a775e217308 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.h
+++ b/src/Parsers/ASTSelectWithUnionQuery.h
@@ -32,9 +32,14 @@ public:
     /// Consider any mode other than ALL as non-default.
     bool hasNonDefaultUnionMode() const;
 
-    mutable std::optional<bool> has_query_parameters;
     bool hasQueryParameters() const;
 
+private:
+    /// This variable is optional as we want to set it on the first call to hasQueryParameters
+    /// and return the same variable on future calls to hasQueryParameters
+    /// its mutable as we set it in const function
+    mutable std::optional<bool> has_query_parameters;
+
 };
 
 }
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index 9e889c796f7..ed70f961c4d 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -113,24 +113,16 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState
         settings.ostr << " ";
         database_and_table_name->formatImpl(settings, state, frame);
     }
-    /// In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX
-    else if (table_function && table_function->as<ASTFunction>() && table_function->as<ASTFunction>()->name=="view")
+    else if (table_function && !table_function->as<ASTFunction>()->is_parameterized_view)
     {
         settings.ostr << " ";
         table_function->formatImpl(settings, state, frame);
     }
-    /// For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX
-    /// we cannot remove the table function part, as its needed for query substitution
     else if (subquery)
     {
         settings.ostr << settings.nl_or_ws << indent_str;
         subquery->formatImpl(settings, state, frame);
     }
-    else if (table_function)
-    {
-        settings.ostr << " ";
-        table_function->formatImpl(settings, state, frame);
-    }
 
     if (final)
     {
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 3ee95600808..48a310542a4 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -2034,15 +2034,7 @@ std::unique_ptr<Layer> getFunctionLayer(ASTPtr identifier, bool is_table_functio
     else if (function_name_lowercase == "grouping")
         return std::make_unique<FunctionLayer>(function_name_lowercase, allow_function_parameters_);
     else
-    {
-        bool has_database_name = false;
-        if (const auto *ast_identifier  = identifier->as<ASTIdentifier>())
-        {
-            if (ast_identifier->compound())
-                has_database_name = true;
-        }
-        return std::make_unique<FunctionLayer>(function_name, allow_function_parameters_, has_database_name);
-    }
+        return std::make_unique<FunctionLayer>(function_name, allow_function_parameters_, identifier->as<ASTIdentifier>()->compound());
 }
 
 
diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp
index 93192084187..3cb0fe630f8 100644
--- a/src/Parsers/FunctionParameterValuesVisitor.cpp
+++ b/src/Parsers/FunctionParameterValuesVisitor.cpp
@@ -38,25 +38,25 @@ private:
 
     void visitExpressionList(const ASTExpressionList & expression_list)
     {
-        if (expression_list.children.size() == 2)
+        if (expression_list.children.size() != 2)
+            return;
+
+        if (const auto * identifier = expression_list.children[0]->as<ASTIdentifier>())
         {
-            if (const auto * identifier = expression_list.children[0]->as<ASTIdentifier>())
+            if (const auto * literal = expression_list.children[1]->as<ASTLiteral>())
             {
-                if (const auto * literal = expression_list.children[1]->as<ASTLiteral>())
+                parameter_values[identifier->name()] = convertFieldToString(literal->value);
+            }
+            else if (const auto * function = expression_list.children[1]->as<ASTFunction>())
+            {
+                if (isFunctionCast(function))
                 {
-                    parameter_values[identifier->name()] = convertFieldToString(literal->value);
-                }
-                else if (const auto * function = expression_list.children[1]->as<ASTFunction>())
-                {
-                    if (isFunctionCast(function))
+                    const auto * cast_expression = assert_cast<ASTExpressionList*>(function->arguments.get());
+                    if (cast_expression->children.size() != 2)
+                        throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+                    if (const auto * cast_literal = cast_expression->children[0]->as<ASTLiteral>())
                     {
-                        const auto * cast_expression = assert_cast<ASTExpressionList*>(function->arguments.get());
-                        if (cast_expression->children.size() != 2)
-                            throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
-                        if (const auto * cast_literal = cast_expression->children[0]->as<ASTLiteral>())
-                        {
-                            parameter_values[identifier->name()] = convertFieldToString(cast_literal->value);
-                        }
+                        parameter_values[identifier->name()] = convertFieldToString(cast_literal->value);
                     }
                 }
             }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index 37120d2d1b9..c79c29bbb28 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -1,6 +1,7 @@
 20
 20
 10
+50
 SELECT
     Name,
     Price,
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index e0a67e7690b..6b08ecb5674 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -19,6 +19,9 @@ set param_price=10;
 SELECT Price FROM v1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
 SELECT Price FROM v1(price={price:UInt64});
 
+set param_limit=1;
+SELECT Price FROM v1(price=50) LIMIT {limit:UInt64};
+
 DETACH TABLE v1;
 ATTACH TABLE v1;
 

From a2b83d359244ec1a9dd9b11ebe7c839c60a139ff Mon Sep 17 00:00:00 2001
From: Wangyang Guo <wangyang.guo@intel.com>
Date: Tue, 8 Nov 2022 15:05:25 +0800
Subject: [PATCH 032/262] add query test:
 02477_logical_expressions_optimizer_low_cardinality

---
 ...cal_expressions_optimizer_low_cardinality.reference |  6 ++++++
 ...7_logical_expressions_optimizer_low_cardinality.sql | 10 ++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
 create mode 100644 tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql

diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
new file mode 100644
index 00000000000..dcfcac737c3
--- /dev/null
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference
@@ -0,0 +1,6 @@
+SELECT a
+FROM t_logical_expressions_optimizer_low_cardinality
+WHERE a IN (\'x\', \'y\')
+SELECT a
+FROM t_logical_expressions_optimizer_low_cardinality
+WHERE (b = 0) OR (b = 1)
diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
new file mode 100644
index 00000000000..be355a05675
--- /dev/null
+++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql
@@ -0,0 +1,10 @@
+DROP TABLE IF EXISTS t_logical_expressions_optimizer_low_cardinality;
+set optimize_min_equality_disjunction_chain_length=3;
+CREATE TABLE t_logical_expressions_optimizer_low_cardinality (a LowCardinality(String), b UInt32) ENGINE = Memory;
+
+-- LowCardinality case, ignore optimize_min_equality_disjunction_chain_length limit, optimzer applied
+EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y';
+-- Non-LowCardinality case, optimizer not applied for short chains
+EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1;
+
+DROP TABLE t_logical_expressions_optimizer_low_cardinality;

From 887779e8d8bfcf528a5c492cf13fadea6ebe0306 Mon Sep 17 00:00:00 2001
From: Wangyang Guo <wangyang.guo@intel.com>
Date: Tue, 8 Nov 2022 17:19:18 +0800
Subject: [PATCH 033/262] Add perftest: low_cardinality_query

---
 tests/performance/low_cardinality_query.xml | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100644 tests/performance/low_cardinality_query.xml

diff --git a/tests/performance/low_cardinality_query.xml b/tests/performance/low_cardinality_query.xml
new file mode 100644
index 00000000000..989c674b443
--- /dev/null
+++ b/tests/performance/low_cardinality_query.xml
@@ -0,0 +1,13 @@
+<test>
+    <create_query>DROP TABLE IF EXISTS test_lc_query</create_query>
+    <create_query>
+        CREATE TABLE test_lc_query (x UInt64, lc LowCardinality(Nullable(String))) ENGINE = MergeTree order by x
+    </create_query>
+
+    <fill_query>INSERT INTO test_lc_query SELECT number, toString(number % 100) FROM numbers(1e7)</fill_query>
+
+    <query>SELECT count() FROM test_lc_query WHERE lc = '12' OR lc = '34'</query>
+    <query>SELECT count() FROM test_lc_query WHERE lc = '12' OR lc = '34' OR lc = '56'</query>
+
+    <drop_query>DROP TABLE IF EXISTS test_lc_query</drop_query>
+</test>

From 779b9197e331996ea3c46c328226aa6706c4dfbc Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 8 Nov 2022 11:03:40 +0100
Subject: [PATCH 034/262] Moved is_create_parameterized_view check inside
 finalize function and refactor check for isParmaeterizedView when replacing
 subquery - 40907 Parameterized views as table functions

---
 src/Interpreters/ExpressionAnalyzer.cpp     | 15 ++++++++-------
 src/Interpreters/ExpressionAnalyzer.h       |  3 ++-
 src/Interpreters/InterpreterSelectQuery.cpp |  7 +++++--
 src/Storages/StorageView.cpp                |  7 ++-----
 4 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index b13bcf6199d..2d2cb5fd939 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1826,10 +1826,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
         chain.finalize();
 
-        /// For creating parameterized view, query parameters are allowed in select
-        /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
-        if (!query_analyzer.query_options.is_create_parameterized_view)
-            finalize(chain, prewhere_step_num, where_step_num, having_step_num, query);
+        finalize(chain, prewhere_step_num, where_step_num, having_step_num, query, query_analyzer.query_options.is_create_parameterized_view);
 
         chain.clear();
     };
@@ -1915,7 +1912,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
                     /// For creating parameterized view, query parameters are allowed in select
                     /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-                    if (!query_analyzer.query_options.is_create_parameterized_view)
+                    const bool can_evaluate_filter_column = !query_analyzer.query_options.is_create_parameterized_view;
+                    if (can_evaluate_filter_column)
                     {
                         auto & column_elem
                             = before_where_sample.getByName(query.where()->getColumnName());
@@ -2081,7 +2079,8 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & prewhere_step_num,
     ssize_t & where_step_num,
     ssize_t & having_step_num,
-    const ASTSelectQuery & query)
+    const ASTSelectQuery & query,
+    bool is_create_parameterized_view)
 {
     if (prewhere_step_num >= 0)
     {
@@ -2101,7 +2100,9 @@ void ExpressionAnalysisResult::finalize(
         prewhere_step_num = -1;
     }
 
-    if (where_step_num >= 0)
+    /// For creating parameterized view, query parameters are allowed in select
+    /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
+    if (where_step_num >= 0 && !is_create_parameterized_view)
     {
         where_column_name = query.where()->getColumnName();
         remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 286665bb05f..4b1e4dd18f5 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -293,7 +293,8 @@ struct ExpressionAnalysisResult
         ssize_t & prewhere_step_num,
         ssize_t & where_step_num,
         ssize_t & having_step_num,
-        const ASTSelectQuery & query);
+        const ASTSelectQuery & query,
+        bool is_create_parameterized_view);
 };
 
 /// SelectQuery specific ExpressionAnalyzer part.
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 35daf55423a..73059cf0684 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -502,10 +502,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         ASTPtr view_table;
         if (view)
         {
-            NameToNameMap parameter_values = analyzeReceiveFunctionParamValues(query_ptr);
             query_info.is_parameterized_view = view->isParameterizedView();
+            NameToNameMap parameter_values;
+            if (view->isParameterizedView())
+                parameter_values = analyzeReceiveFunctionParamValues(query_ptr);
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView());
-            view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values);
+            if (view->isParameterizedView())
+                view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values);
         }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index bb3e818d63e..8afbe7d461f 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -178,11 +178,8 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
 
 void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const
 {
-    if (is_parameterized_view)
-    {
-        ReplaceQueryParameterVisitor visitor(parameter_values);
-        visitor.visit(outer_query);
-    }
+    ReplaceQueryParameterVisitor visitor(parameter_values);
+    visitor.visit(outer_query);
 }
 
 void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, bool parameterized_view)

From 2ef30d434302ba4679d1983cec930f0c215c3d1b Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 11 Nov 2022 19:39:18 +0100
Subject: [PATCH 035/262] Fixed clang-tidy build issue by making
 replaceQueryParametersIfParametrizedView static - 40907 Parameterized views
 as table functions

---
 src/Storages/StorageView.cpp | 2 +-
 src/Storages/StorageView.h   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 8afbe7d461f..8a2787625fb 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -176,7 +176,7 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
     return select_element->table_expression->as<ASTTableExpression>();
 }
 
-void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const
+void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values)
 {
     ReplaceQueryParameterVisitor visitor(parameter_values);
     visitor.visit(outer_query);
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 0db4295d90c..cd88005a207 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -35,7 +35,7 @@ public:
         size_t max_block_size,
         size_t num_streams) override;
 
-    void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const;
+    static void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values);
 
     static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view)
     {

From 95abe5d0274694d2627e8af39758cfba6a8655c8 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 16 Nov 2022 12:27:08 +0100
Subject: [PATCH 036/262] Updated to support query parameters in HAVING clause
 and addressed review comments - 40907 Parameterized views as table functions

---
 src/Interpreters/Context.cpp                           | 10 +++++++---
 src/Interpreters/Context.h                             |  2 --
 src/Interpreters/ExpressionAnalyzer.cpp                |  9 +++++++--
 src/Interpreters/InterpreterSelectQuery.cpp            |  8 +++++---
 src/Parsers/ASTFunction.h                              |  4 ++--
 src/Parsers/ASTTablesInSelectQuery.cpp                 |  2 +-
 src/Parsers/ExpressionListParsers.cpp                  |  8 ++++----
 src/Parsers/FunctionParameterValuesVisitor.cpp         |  2 +-
 src/Parsers/FunctionParameterValuesVisitor.h           |  2 +-
 .../0_stateless/02428_parameterized_view.reference     |  1 +
 tests/queries/0_stateless/02428_parameterized_view.sql |  5 +++++
 11 files changed, 34 insertions(+), 19 deletions(-)

diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index e0fa4e13abb..c0537a252d8 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -147,6 +147,7 @@ namespace ErrorCodes
     extern const int INVALID_SETTING_VALUE;
     extern const int UNKNOWN_READ_METHOD;
     extern const int NOT_IMPLEMENTED;
+    extern const int UNKNOWN_FUNCTION;
 }
 
 
@@ -1238,7 +1239,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
     String database_name = getCurrentDatabase();
     String table_name = function->name;
 
-    if (function->has_database_name)
+    if (function->is_compound_name)
     {
         std::vector<std::string> parts;
         splitInto<'.'>(parts, function->name);
@@ -1255,7 +1256,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
     {
         if (table.get()->isView() && table->as<StorageView>()->isParameterizedView())
         {
-            function->is_parameterized_view = true;
+            function->prefer_subquery_to_function_formatting = true;
             return table;
         }
     }
@@ -1271,7 +1272,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression)
         }
         catch (Exception & e)
         {
-            e.addMessage(" or incorrect parameterized view");
+            if (e.code() == ErrorCodes::UNKNOWN_FUNCTION)
+            {
+                e.addMessage(" or incorrect parameterized view");
+            }
             throw;
         }
         if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint())
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 1ce2bd914f6..0eaec215588 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -371,8 +371,6 @@ private:
 
     /// Temporary data for query execution accounting.
     TemporaryDataOnDiskScopePtr temp_data_on_disk;
-
-
 public:
     /// Some counters for current query execution.
     /// Most of them are workarounds and should be removed in the future.
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index eb7d799ec0a..d096a0701d6 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -72,6 +72,7 @@
 #include <Processors/Executors/PullingAsyncPipelineExecutor.h>
 #include <Processors/QueryPlan/QueryPlan.h>
 #include <Parsers/formatAST.h>
+#include <Parsers/QueryParameterVisitor.h>
 
 namespace DB
 {
@@ -1485,6 +1486,10 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
     ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
 
     getRootActionsForHaving(select_query->having(), only_types, step.actions());
+
+    if (query_options.is_create_parameterized_view && !analyzeReceiveQueryParams(select_query->having()).empty())
+        return true;
+
     step.addRequiredOutput(select_query->having()->getColumnName());
 
     return true;
@@ -2095,14 +2100,14 @@ void ExpressionAnalysisResult::finalize(
 
     /// For creating parameterized view, query parameters are allowed in select
     /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
-    if (where_step_num >= 0 && !is_create_parameterized_view)
+    if (where_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.where()).empty()))
     {
         where_column_name = query.where()->getColumnName();
         remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
         where_step_num = -1;
     }
 
-    if (having_step_num >= 0)
+    if (having_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.having()).empty()))
     {
         having_column_name = query.having()->getColumnName();
         remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second;
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 688d82ae8a4..47b7ba7602a 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -506,11 +506,13 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (view)
         {
             query_info.is_parameterized_view = view->isParameterizedView();
+            /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced.
+            /// ad after query is replaced, we use these parameters to substitute in the parameterized view query
             NameToNameMap parameter_values;
-            if (view->isParameterizedView())
-                parameter_values = analyzeReceiveFunctionParamValues(query_ptr);
+            if (query_info.is_parameterized_view )
+                parameter_values = analyzeFunctionParamValues(query_ptr);
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView());
-            if (view->isParameterizedView())
+            if (query_info.is_parameterized_view )
                 view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values);
         }
 
diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h
index b658249b42a..ca182a4fb11 100644
--- a/src/Parsers/ASTFunction.h
+++ b/src/Parsers/ASTFunction.h
@@ -28,7 +28,7 @@ public:
 
     /// This field is updated in executeTableFunction if its a parameterized_view
     /// and used in ASTTablesInSelectQuery::FormatImpl for EXPLAIN SYNTAX of SELECT parameterized view
-    bool is_parameterized_view = false;
+    bool prefer_subquery_to_function_formatting = false;
 
     // We have to make these fields ASTPtr because this is what the visitors
     // expect. Some of them take const ASTPtr & (makes no sense), and some
@@ -60,7 +60,7 @@ public:
     std::string getWindowDescription() const;
 
     /// This is used for parameterized view, to identify if name is 'db.view'
-    bool has_database_name = false;
+    bool is_compound_name = false;
 
 protected:
     void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override;
diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp
index ed70f961c4d..75c0ef26c07 100644
--- a/src/Parsers/ASTTablesInSelectQuery.cpp
+++ b/src/Parsers/ASTTablesInSelectQuery.cpp
@@ -113,7 +113,7 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState
         settings.ostr << " ";
         database_and_table_name->formatImpl(settings, state, frame);
     }
-    else if (table_function && !table_function->as<ASTFunction>()->is_parameterized_view)
+    else if (table_function && !(table_function->as<ASTFunction>()->prefer_subquery_to_function_formatting && subquery))
     {
         settings.ostr << " ";
         table_function->formatImpl(settings, state, frame);
diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp
index 7b2ece9c0c9..ee212a1993b 100644
--- a/src/Parsers/ExpressionListParsers.cpp
+++ b/src/Parsers/ExpressionListParsers.cpp
@@ -830,8 +830,8 @@ public:
 class FunctionLayer : public Layer
 {
 public:
-    explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true, bool has_database_name_ = false)
-        : function_name(function_name_), allow_function_parameters(allow_function_parameters_), has_database_name(has_database_name_){}
+    explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true, bool is_compound_name_ = false)
+        : function_name(function_name_), allow_function_parameters(allow_function_parameters_), is_compound_name(is_compound_name_){}
 
     bool parse(IParser::Pos & pos, Expected & expected, Action & action) override
     {
@@ -972,7 +972,7 @@ public:
                 function_name += "Distinct";
 
             auto function_node = makeASTFunction(function_name, std::move(elements));
-            function_node->has_database_name = has_database_name;
+            function_node->is_compound_name = is_compound_name;
 
             if (parameters)
             {
@@ -1027,7 +1027,7 @@ private:
     ASTPtr parameters;
 
     bool allow_function_parameters;
-    bool has_database_name;
+    bool is_compound_name;
 };
 
 /// Layer for priority brackets and tuple function
diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp
index 3cb0fe630f8..cb187b2a56a 100644
--- a/src/Parsers/FunctionParameterValuesVisitor.cpp
+++ b/src/Parsers/FunctionParameterValuesVisitor.cpp
@@ -64,7 +64,7 @@ private:
     }
 };
 
-NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast)
+NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast)
 {
     NameToNameMap parameter_values;
     FunctionParameterValuesVisitor(parameter_values).visit(ast);
diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h
index 7fd594ddc84..f87257fc979 100644
--- a/src/Parsers/FunctionParameterValuesVisitor.h
+++ b/src/Parsers/FunctionParameterValuesVisitor.h
@@ -9,6 +9,6 @@ namespace DB
 {
 
 /// Find parameters in a query parameter values and collect them into map.
-NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast);
+NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast);
 
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index c79c29bbb28..9ea5e464b8e 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -15,3 +15,4 @@ FROM
 50
 10
 20
+30
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 6b08ecb5674..1ac35f42d4c 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -55,6 +55,11 @@ CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}
 SELECT Price FROM system.v1(price=20);
 SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION }
 
+INSERT INTO Catalog VALUES ('Book2', 30, 8);
+INSERT INTO Catalog VALUES ('Book3', 30, 8);
+
+CREATE VIEW v5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64};
+SELECT Price FROM v5(price=30, quantity=8,limit=1);
 
 DROP TABLE v1;
 DROP TABLE v2;

From 6d35bc06c54d855367943ed37b52af0f2f42fb27 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 16 Nov 2022 13:01:02 +0100
Subject: [PATCH 037/262] Fixed style check fail in InterpreterSelectQuery.cpp
 - 40907 Parameterized views as table functions

---
 src/Interpreters/InterpreterSelectQuery.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 47b7ba7602a..060d07a9763 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -509,10 +509,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced.
             /// ad after query is replaced, we use these parameters to substitute in the parameterized view query
             NameToNameMap parameter_values;
-            if (query_info.is_parameterized_view )
+            if (query_info.is_parameterized_view)
                 parameter_values = analyzeFunctionParamValues(query_ptr);
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView());
-            if (query_info.is_parameterized_view )
+            if (query_info.is_parameterized_view)
                 view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values);
         }
 

From 93eaf553f40e0c87cbc205565efc1f7496de14c4 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 16 Nov 2022 15:33:42 +0100
Subject: [PATCH 038/262] Fixed parameterized_view test - 40907 Parameterized
 views as table functions

---
 tests/queries/0_stateless/02428_parameterized_view.sql | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 1ac35f42d4c..934ddd18d49 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -15,12 +15,12 @@ CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
 SELECT Price FROM v1(price=20);
 SELECT Price FROM `v1`(price=20);
 
-set param_price=10;
+set param_p=10;
 SELECT Price FROM v1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
-SELECT Price FROM v1(price={price:UInt64});
+SELECT Price FROM v1(price={p:UInt64});
 
-set param_limit=1;
-SELECT Price FROM v1(price=50) LIMIT {limit:UInt64};
+set param_l=1;
+SELECT Price FROM v1(price=50) LIMIT ({l:UInt64});
 
 DETACH TABLE v1;
 ATTACH TABLE v1;

From 40adaeef92eaa9519f0d125f2cc1d070488d8238 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 18 Nov 2022 16:15:37 +0000
Subject: [PATCH 039/262] Fix aggregate functions optimization

---
 ...egateFunctionsArithmericOperationsPass.cpp | 12 +--
 ...optimize_aggregation_arithmetics.reference | 81 +++++++++++++++++++
 ...02481_optimize_aggregation_arithmetics.sql | 26 ++++++
 3 files changed, 114 insertions(+), 5 deletions(-)
 create mode 100644 tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference
 create mode 100644 tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql

diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index dcf386b2988..7420321d3ee 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -116,8 +116,9 @@ public:
             if (!function_name_if_constant_is_negative.empty() &&
                 left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal))
             {
-                resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
+                lower_function_name = function_name_if_constant_is_negative;
             }
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
 
             auto inner_function = aggregate_function_arguments_nodes[0];
             auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
@@ -132,8 +133,9 @@ public:
             if (!function_name_if_constant_is_negative.empty() &&
                 right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal))
             {
-                resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative);
+                lower_function_name = function_name_if_constant_is_negative;
             }
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
 
             auto inner_function = aggregate_function_arguments_nodes[0];
             auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
@@ -144,16 +146,16 @@ public:
     }
 
 private:
-    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name)
+    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
     {
-        auto function_result_type = function_node.getResultType();
         auto function_aggregate_function = function_node.getAggregateFunction();
 
         AggregateFunctionProperties properties;
         auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name,
-            function_aggregate_function->getArgumentTypes(),
+            { argument->getResultType() },
             function_aggregate_function->getParameters(),
             properties);
+        auto function_result_type = aggregate_function->getReturnType();
 
         function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type));
     }
diff --git a/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference
new file mode 100644
index 00000000000..1cc6fc5d4b1
--- /dev/null
+++ b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference
@@ -0,0 +1,81 @@
+-- { echoOn }
+EXPLAIN actions=1
+    (
+        SELECT round(avg(log(2) * number), 6) AS k
+        FROM numbers(10000000)
+        GROUP BY number % 3, number % 2
+    )
+SETTINGS allow_experimental_analyzer=1;
+Expression ((Project names + Projection))
+Actions: INPUT : 0 -> avg(number_0) Float64 : 0
+         COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1
+         COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2
+         FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3
+         FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0
+         ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2
+Positions: 2
+  Aggregating
+  Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8)
+  Aggregates:
+      avg(number_0)
+        Function: avg(UInt64) → Float64
+        Arguments: number_0
+    Expression ((Before GROUP BY + Change column names to column identifiers))
+    Actions: INPUT : 0 -> number UInt64 : 0
+             COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1
+             COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2
+             ALIAS number :: 0 -> number_0 UInt64 : 3
+             FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0
+             FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1
+    Positions: 0 1 3
+      ReadFromStorage (SystemNumbers)
+EXPLAIN actions=1
+    (
+        SELECT round(log(2) * avg(number), 6) AS k
+        FROM numbers(10000000)
+        GROUP BY number % 3, number % 2
+    )
+SETTINGS allow_experimental_analyzer=1;
+Expression ((Project names + Projection))
+Actions: INPUT : 0 -> avg(number_0) Float64 : 0
+         COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1
+         COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2
+         FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3
+         FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0
+         ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2
+Positions: 2
+  Aggregating
+  Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8)
+  Aggregates:
+      avg(number_0)
+        Function: avg(UInt64) → Float64
+        Arguments: number_0
+    Expression ((Before GROUP BY + Change column names to column identifiers))
+    Actions: INPUT : 0 -> number UInt64 : 0
+             COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1
+             COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2
+             ALIAS number :: 0 -> number_0 UInt64 : 3
+             FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0
+             FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1
+    Positions: 0 1 3
+      ReadFromStorage (SystemNumbers)
+SELECT round(avg(log(2) * number), 6) AS k
+FROM numbers(10000000)
+GROUP BY number % 3, number % 2
+SETTINGS allow_experimental_analyzer=1;
+3465734.516505
+3465735.209653
+3465735.9028
+3465736.595947
+3465735.209653
+3465735.9028
+SELECT round(log(2) * avg(number), 6) AS k
+FROM numbers(10000000)
+GROUP BY number % 3, number % 2
+SETTINGS allow_experimental_analyzer=0;
+3465734.516505
+3465735.209653
+3465735.9028
+3465736.595947
+3465735.209653
+3465735.9028
diff --git a/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql
new file mode 100644
index 00000000000..5fec5a79813
--- /dev/null
+++ b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql
@@ -0,0 +1,26 @@
+-- { echoOn }
+EXPLAIN actions=1
+    (
+        SELECT round(avg(log(2) * number), 6) AS k
+        FROM numbers(10000000)
+        GROUP BY number % 3, number % 2
+    )
+SETTINGS allow_experimental_analyzer=1;
+
+EXPLAIN actions=1
+    (
+        SELECT round(log(2) * avg(number), 6) AS k
+        FROM numbers(10000000)
+        GROUP BY number % 3, number % 2
+    )
+SETTINGS allow_experimental_analyzer=1;
+
+SELECT round(avg(log(2) * number), 6) AS k
+FROM numbers(10000000)
+GROUP BY number % 3, number % 2
+SETTINGS allow_experimental_analyzer=1;
+
+SELECT round(log(2) * avg(number), 6) AS k
+FROM numbers(10000000)
+GROUP BY number % 3, number % 2
+SETTINGS allow_experimental_analyzer=0;

From 0545e241512791b8d4dc218cf4ff6dc8883acf52 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 28 Nov 2022 19:05:01 +0100
Subject: [PATCH 040/262] Updated to support query parameters in different
 parts of query (SELECT, ORDERBY, GROUP BY), updated the approach in WHERE &
 HAVING, added tests for the same - 40907 Parameterized views as table
 functions

---
 src/Interpreters/ActionsVisitor.cpp           | 24 +++---
 src/Interpreters/ActionsVisitor.h             |  4 +-
 src/Interpreters/ExpressionAnalyzer.cpp       | 44 ++++-------
 src/Interpreters/ExpressionAnalyzer.h         |  5 +-
 src/Interpreters/InterpreterSelectQuery.cpp   | 19 ++++-
 src/Interpreters/QueryNormalizer.cpp          | 15 +---
 src/Interpreters/QueryNormalizer.h            |  1 -
 .../TranslateQualifiedNamesVisitor.cpp        | 15 +++-
 .../TranslateQualifiedNamesVisitor.h          |  4 +-
 src/Interpreters/TreeRewriter.cpp             | 30 +++++++-
 src/Interpreters/TreeRewriter.h               |  5 +-
 src/Parsers/ASTFunction.cpp                   |  2 +
 src/Storages/StorageSnapshot.cpp              | 23 ++++--
 src/Storages/StorageSnapshot.h                |  2 +-
 src/Storages/StorageView.cpp                  |  4 +-
 src/Storages/StorageView.h                    |  8 +-
 .../02428_parameterized_view.reference        |  6 +-
 .../0_stateless/02428_parameterized_view.sql  | 74 +++++++++++--------
 18 files changed, 170 insertions(+), 115 deletions(-)

diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index d3dd89086e5..7e4fa5d168a 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -24,6 +24,7 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <DataTypes/FieldToDataType.h>
 #include <DataTypes/DataTypesDecimal.h>
+#include <DataTypes/DataTypeFactory.h>
 
 #include <Columns/ColumnArray.h>
 #include <Columns/ColumnConst.h>
@@ -537,8 +538,7 @@ ActionsMatcher::Data::Data(
     bool only_consts_,
     bool create_source_for_in_,
     AggregationKeysInfo aggregation_keys_info_,
-    bool build_expression_with_window_functions_,
-    bool is_create_parameterized_view_)
+    bool build_expression_with_window_functions_)
     : WithContext(context_)
     , set_size_limit(set_size_limit_)
     , subquery_depth(subquery_depth_)
@@ -552,7 +552,6 @@ ActionsMatcher::Data::Data(
     , actions_stack(std::move(actions_dag), context_)
     , aggregation_keys_info(aggregation_keys_info_)
     , build_expression_with_window_functions(build_expression_with_window_functions_)
-    , is_create_parameterized_view(is_create_parameterized_view_)
     , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1)
 {
 }
@@ -765,16 +764,11 @@ std::optional<NameAndTypePair> ActionsMatcher::getNameAndTypeFromAST(const ASTPt
     if (const auto * node = index.tryGetNode(child_column_name))
         return NameAndTypePair(child_column_name, node->result_type);
 
-    /// For parameterized view, we allow query parameters in CREATE which will be substituted by SELECT queries
-    /// so these cannot be evaluated at this point. But if it's a parameterized view with sub part ast which does
-    /// not contain query parameters then it can be evaluated
-    /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair
-    if (!data.only_consts && (data.is_create_parameterized_view && analyzeReceiveQueryParams(ast).empty()))
-    {
+    if (!data.only_consts)
         throw Exception(
             "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(),
             ErrorCodes::UNKNOWN_IDENTIFIER);
-    }
+
     return {};
 }
 
@@ -1130,6 +1124,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
 
             const auto * function = child->as<ASTFunction>();
             const auto * identifier = child->as<ASTTableIdentifier>();
+            const auto * query_parameter = child->as<ASTQueryParameter>();
             if (function && function->name == "lambda")
             {
                 /// If the argument is a lambda expression, just remember its approximate type.
@@ -1210,6 +1205,15 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                 argument_types.push_back(column.type);
                 argument_names.push_back(column.name);
             }
+            else if (query_parameter)
+            {
+                const auto data_type = DataTypeFactory::instance().get(query_parameter->type);
+                ColumnWithTypeAndName column(data_type,query_parameter->getColumnName());
+                data.addColumn(column);
+
+                argument_types.push_back(data_type);
+                argument_names.push_back(query_parameter->name);
+            }
             else
             {
                 /// If the argument is not a lambda expression, call it recursively and find out its type.
diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h
index d02cc3e9f6d..fea013fd075 100644
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@@ -134,7 +134,6 @@ public:
         ScopeStack actions_stack;
         AggregationKeysInfo aggregation_keys_info;
         bool build_expression_with_window_functions;
-        bool is_create_parameterized_view;
 
         /*
          * Remember the last unique column suffix to avoid quadratic behavior
@@ -155,8 +154,7 @@ public:
             bool only_consts_,
             bool create_source_for_in_,
             AggregationKeysInfo aggregation_keys_info_,
-            bool build_expression_with_window_functions_ = false,
-            bool is_create_parameterized_view = false);
+            bool build_expression_with_window_functions_ = false);
 
         /// Does result of the calculation already exists in the block.
         bool hasColumn(const String & column_name) const;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index d096a0701d6..808637d3171 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -538,7 +538,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node)
 }
 
 
-void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts, bool is_create_parameterized_view)
+void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts)
 {
     LogAST log;
     ActionsVisitor::Data visitor_data(
@@ -553,8 +553,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
         only_consts,
         !isRemoteStorage() /* create_source_for_in */,
         getAggregationKeysInfo(),
-        false /* build_expression_with_window_functions */,
-        is_create_parameterized_view);
+        false /* build_expression_with_window_functions */);
     ActionsVisitor(visitor_data, log.stream()).visit(ast);
     actions = visitor_data.getActions();
 }
@@ -1287,12 +1286,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join);
 
-    getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/, query_options.is_create_parameterized_view);
-
-    /// For creating parameterized view, query parameters are allowed in select
-    /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-    if (query_options.is_create_parameterized_view)
-        return true;
+    getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/);
 
     auto where_column_name = select_query->where()->getColumnName();
     step.addRequiredOutput(where_column_name);
@@ -1487,9 +1481,6 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain,
 
     getRootActionsForHaving(select_query->having(), only_types, step.actions());
 
-    if (query_options.is_create_parameterized_view && !analyzeReceiveQueryParams(select_query->having()).empty())
-        return true;
-
     step.addRequiredOutput(select_query->having()->getColumnName());
 
     return true;
@@ -1501,7 +1492,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
 
     ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
 
-    getRootActions(select_query->select(), only_types, step.actions());
+    getRootActions(select_query->select(), only_types, step.actions(), false /*only_consts*/);
 
     for (const auto & child : select_query->select()->children)
         appendSelectSkipWindowExpressions(step, child);
@@ -1831,7 +1822,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
         chain.finalize();
 
-        finalize(chain, prewhere_step_num, where_step_num, having_step_num, query, query_analyzer.query_options.is_create_parameterized_view);
+        finalize(chain, prewhere_step_num, where_step_num, having_step_num, query);
 
         chain.clear();
     };
@@ -1915,17 +1906,11 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
                         before_where,
                         ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample);
 
-                    /// For creating parameterized view, query parameters are allowed in select
-                    /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression
-                    const bool can_evaluate_filter_column = !query_analyzer.query_options.is_create_parameterized_view;
-                    if (can_evaluate_filter_column)
-                    {
-                        auto & column_elem
-                            = before_where_sample.getByName(query.where()->getColumnName());
-                        /// If the filter column is a constant and not a query parameter, record it.
-                        if (column_elem.column)
-                            where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
-                    }
+                    auto & column_elem
+                        = before_where_sample.getByName(query.where()->getColumnName());
+                    /// If the filter column is a constant and not a query parameter, record it.
+                    if (column_elem.column)
+                        where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
                 }
             }
             chain.addStep();
@@ -2077,8 +2062,7 @@ void ExpressionAnalysisResult::finalize(
     ssize_t & prewhere_step_num,
     ssize_t & where_step_num,
     ssize_t & having_step_num,
-    const ASTSelectQuery & query,
-    bool is_create_parameterized_view)
+    const ASTSelectQuery & query)
 {
     if (prewhere_step_num >= 0)
     {
@@ -2098,16 +2082,14 @@ void ExpressionAnalysisResult::finalize(
         prewhere_step_num = -1;
     }
 
-    /// For creating parameterized view, query parameters are allowed in select
-    /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps
-    if (where_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.where()).empty()))
+    if (where_step_num >= 0)
     {
         where_column_name = query.where()->getColumnName();
         remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
         where_step_num = -1;
     }
 
-    if (having_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.having()).empty()))
+    if (having_step_num >= 0)
     {
         having_column_name = query.having()->getColumnName();
         remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second;
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index 4b1e4dd18f5..ddb41a00f84 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -175,7 +175,7 @@ protected:
 
     ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const;
 
-    void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false, bool is_create_parameterized_view = false);
+    void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false);
 
     /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in
       * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the
@@ -293,8 +293,7 @@ struct ExpressionAnalysisResult
         ssize_t & prewhere_step_num,
         ssize_t & where_step_num,
         ssize_t & having_step_num,
-        const ASTSelectQuery & query,
-        bool is_create_parameterized_view);
+        const ASTSelectQuery & query);
 };
 
 /// SelectQuery specific ExpressionAnalyzer part.
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 060d07a9763..56a7e3d6996 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -503,23 +503,34 @@ InterpreterSelectQuery::InterpreterSelectQuery(
     {
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
+        NameToNameMap parameter_values;
         if (view)
         {
             query_info.is_parameterized_view = view->isParameterizedView();
             /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced.
             /// ad after query is replaced, we use these parameters to substitute in the parameterized view query
-            NameToNameMap parameter_values;
             if (query_info.is_parameterized_view)
+            {
                 parameter_values = analyzeFunctionParamValues(query_ptr);
+                view->setParameterValues(parameter_values);
+            }
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView());
             if (query_info.is_parameterized_view)
-                view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values);
+            {
+                view->replaceQueryParametersIfParametrizedView(query_ptr);
+            }
+
         }
 
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
             query_ptr,
             TreeRewriterResult(source_header.getNamesAndTypesList(), storage, storage_snapshot),
-            options, joined_tables.tablesWithColumns(), required_result_column_names, table_join);
+            options,
+            joined_tables.tablesWithColumns(),
+            required_result_column_names,
+            table_join,
+            query_info.is_parameterized_view,
+            parameter_values);
 
         query_info.syntax_analyzer_result = syntax_analyzer_result;
         context->setDistributed(syntax_analyzer_result->is_remote_storage);
@@ -646,7 +657,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
                 query_info.filter_asts.push_back(query_info.additional_filter_ast);
             }
 
-            source_header = storage_snapshot->getSampleBlockForColumns(required_columns);
+            source_header = storage_snapshot->getSampleBlockForColumns(required_columns, parameter_values);
         }
 
         /// Calculate structure of the result.
diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp
index edb90187941..921d004af94 100644
--- a/src/Interpreters/QueryNormalizer.cpp
+++ b/src/Interpreters/QueryNormalizer.cpp
@@ -130,17 +130,6 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data)
     }
 }
 
-void QueryNormalizer::visit(ASTQueryParameter & node, Data & data)
-{
-    /// This is used only for create parameterized view to check if same parameter name is used twice
-    /// Eg: CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2={c1:UInt64}; - c1 is used twice
-    auto it_alias = data.query_parameters.find(node.name);
-    if (it_alias != data.query_parameters.end())
-        throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {}", backQuote(node.name));
-
-    data.query_parameters.insert(node.name);
-}
-
 
 void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data)
 {
@@ -278,9 +267,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data)
         visit(*node_select, ast, data);
     else if (auto * node_param = ast->as<ASTQueryParameter>())
     {
-        if (data.is_create_parameterized_view)
-            visit(*node_param, data);
-        else
+        if (!data.is_create_parameterized_view)
             throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER);
     }
     else if (auto * node_function = ast->as<ASTFunction>())
diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h
index 5006d3ad83c..90c70dd71e6 100644
--- a/src/Interpreters/QueryNormalizer.h
+++ b/src/Interpreters/QueryNormalizer.h
@@ -84,7 +84,6 @@ private:
     static void visit(ASTIdentifier &, ASTPtr &, Data &);
     static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &);
     static void visit(ASTSelectQuery &, const ASTPtr &, Data &);
-    static void visit(ASTQueryParameter &, Data &);
 
     static void visitChildren(IAST * node, Data & data);
 };
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index bc862ed7b38..20e5b034619 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -249,7 +249,20 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                     for (const auto & column : *cols)
                     {
                         if (first_table || !data.join_using_columns.contains(column.name))
-                            addIdentifier(columns, table.table, column.name);
+                        {
+                            std::string column_name = column.name;
+                            std::string::size_type pos = 0u;
+                            for (auto parameter : data.parameter_values)
+                            {
+                                if ((pos = column_name.find(parameter.first)) != std::string::npos)
+                                {
+                                    String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')");
+                                    column_name.replace(pos,parameter.first.size(),parameter_name);
+                                    break;
+                                }
+                            }
+                            addIdentifier(columns, table.table, column_name);
+                        }
                     }
                 }
                 first_table = false;
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h
index 73e45fc7ea0..777c6241d19 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.h
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h
@@ -28,11 +28,13 @@ public:
         const TablesWithColumns & tables;
         std::unordered_set<String> join_using_columns;
         bool has_columns;
+        NameToNameMap parameter_values;
 
-        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true)
+        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, NameToNameMap parameter_values_ = {})
             : source_columns(source_columns_)
             , tables(tables_)
             , has_columns(has_columns_)
+            , parameter_values(parameter_values_)
         {}
 
         bool hasColumn(const String & name) const { return source_columns.count(name); }
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index fa101a84c58..80b25f7352b 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -330,10 +330,10 @@ using ExistsExpressionVisitor = InDepthNodeVisitor<OneTypeMatcher<ExistsExpressi
 /// Expand asterisks and qualified asterisks with column names.
 /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
 void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set,
-                             const TablesWithColumns & tables_with_columns)
+                             const TablesWithColumns & tables_with_columns, NameToNameMap parameter_values = {})
 {
     LogAST log;
-    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns);
+    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns, true/* has_columns */, parameter_values);
     TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
     visitor.visit(query);
 
@@ -1206,7 +1206,9 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     const SelectQueryOptions & select_options,
     const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns,
     const Names & required_result_columns,
-    std::shared_ptr<TableJoin> table_join) const
+    std::shared_ptr<TableJoin> table_join,
+    bool is_parameterized_view,
+    const NameToNameMap parameter_values) const
 {
     auto * select_query = query->as<ASTSelectQuery>();
     if (!select_query)
@@ -1244,7 +1246,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
         result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix());
     }
 
-    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns);
+    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values);
 
     /// Optimizes logical expressions.
     LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
@@ -1311,7 +1313,27 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     result.aggregates = getAggregates(query, *select_query);
     result.window_function_asts = getWindowFunctions(query, *select_query);
     result.expressions_with_window_function = getExpressionsWithWindowFunctions(query);
+
+    if (is_parameterized_view)
+    {
+        for (auto & column : result.source_columns)
+        {
+            std::string column_name = column.name;
+            std::string::size_type pos = 0u;
+            for (auto & parameter : parameter_values)
+            {
+                if ((pos = column_name.find(parameter.first)) != std::string::npos)
+                {
+                    String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')");
+                    column.name.replace(pos,parameter.first.size(),parameter_name);
+                    break;
+                }
+            }
+        }
+    }
+
     result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
+
     result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames();
 
     /// rewrite filters for select query, must go after getArrayJoinedColumns
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index 99408ca208b..b60afc6c7fc 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -90,6 +90,7 @@ struct TreeRewriterResult
     void collectSourceColumns(bool add_special);
     void collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint);
     Names requiredSourceColumns() const { return required_source_columns.getNames(); }
+    Names sourceColumns() const { return source_columns.getNames(); }
     const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; }
     NameSet getArrayJoinSourceNameSet() const;
     const Scalars & getScalars() const { return scalars; }
@@ -129,7 +130,9 @@ public:
         const SelectQueryOptions & select_options = {},
         const std::vector<TableWithColumnNamesAndTypes> & tables_with_columns = {},
         const Names & required_result_columns = {},
-        std::shared_ptr<TableJoin> table_join = {}) const;
+        std::shared_ptr<TableJoin> table_join = {},
+        bool is_parameterized_view = false,
+        const NameToNameMap parameter_values = {}) const;
 
 private:
     static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false);
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index 63dc9f6b3ac..53c40089924 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -127,6 +127,8 @@ ASTPtr ASTFunction::clone() const
         res->children.push_back(res->window_definition);
     }
 
+    res->prefer_subquery_to_function_formatting = prefer_subquery_to_function_formatting;
+
     return res;
 }
 
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index 48851f0974d..8dfb7b288e3 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -112,21 +112,34 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co
     return *column;
 }
 
-Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const
+Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const NameToNameMap & parameter_values) const
 {
     Block res;
+
     const auto & columns = getMetadataForQuery()->getColumns();
     for (const auto & name : column_names)
     {
-        auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name);
-        auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name);
+        std::string column_name  = name;
+        std::string substituted_column_name = name;
+        std::string::size_type pos = 0u;
+        for (auto parameter : parameter_values)
+        {
+            if ((pos = substituted_column_name.find("_CAST(" + parameter.second)) != std::string::npos)
+            {
+                substituted_column_name = substituted_column_name.substr(0,pos) + parameter.first + ")";
+                break;
+            }
+        }
+
+        auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name);
+        auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name);
         if (column && !object_column)
         {
-            res.insert({column->type->createColumn(), column->type, column->name});
+            res.insert({column->type->createColumn(), column->type, column_name});
         }
         else if (object_column)
         {
-            res.insert({object_column->type->createColumn(), object_column->type, object_column->name});
+            res.insert({object_column->type->createColumn(), object_column->type, column_name});
         }
         else if (auto it = virtual_columns.find(name); it != virtual_columns.end())
         {
diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h
index badf0d3a1e8..723b30e49e6 100644
--- a/src/Storages/StorageSnapshot.h
+++ b/src/Storages/StorageSnapshot.h
@@ -66,7 +66,7 @@ struct StorageSnapshot
     NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const;
 
     /// Block with ordinary + materialized + aliases + virtuals + subcolumns.
-    Block getSampleBlockForColumns(const Names & column_names) const;
+    Block getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values = {}) const;
 
     ColumnsDescription getDescriptionForColumns(const Names & column_names) const;
 
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 8a2787625fb..d9e79607ce4 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -140,7 +140,7 @@ void StorageView::read(
     query_plan.addStep(std::move(materializing));
 
     /// And also convert to expected structure.
-    const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names);
+    const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names,parameter_values);
     const auto & header = query_plan.getCurrentDataStream().header;
 
     const auto * select_with_union = current_inner_query->as<ASTSelectWithUnionQuery>();
@@ -176,7 +176,7 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer
     return select_element->table_expression->as<ASTTableExpression>();
 }
 
-void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values)
+void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query)
 {
     ReplaceQueryParameterVisitor visitor(parameter_values);
     visitor.visit(outer_query);
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index cd88005a207..e913e98901f 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -35,7 +35,7 @@ public:
         size_t max_block_size,
         size_t num_streams) override;
 
-    static void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values);
+    void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query);
 
     static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view)
     {
@@ -45,8 +45,14 @@ public:
     static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view);
     static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name);
 
+    void setParameterValues (NameToNameMap parameter_values_)
+    {
+        parameter_values = parameter_values_;
+    }
+
 protected:
     bool is_parameterized_view;
+    NameToNameMap parameter_values;
 };
 
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index 9ea5e464b8e..9ec1cb0efd2 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -11,8 +11,12 @@ FROM
     SELECT *
     FROM default.Catalog
     WHERE Price = _CAST(10, \'UInt64\')
-) AS v1
+) AS pv1
 50
 10
 20
 30
+20
+30
+40
+60
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 934ddd18d49..9d55dba970a 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -1,8 +1,12 @@
-DROP TABLE IF EXISTS v1;
-DROP TABLE IF EXISTS v2;
-DROP TABLE IF EXISTS v3;
+DROP VIEW IF EXISTS pv1;
+DROP VIEW IF EXISTS pv2;
+DROP VIEW IF EXISTS pv3;
+DROP VIEW IF EXISTS pv4;
+DROP VIEW IF EXISTS pv5;
+DROP VIEW IF EXISTS pv6;
+DROP VIEW IF EXISTS v1;
 DROP TABLE IF EXISTS Catalog;
-DROP TABLE IF EXISTS system.v1;
+DROP TABLE IF EXISTS system.pv1;
 DROP TABLE IF EXISTS system.Catalog;
 
 CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
@@ -11,39 +15,39 @@ INSERT INTO Catalog VALUES ('Pen', 10, 3);
 INSERT INTO Catalog VALUES ('Book', 50, 2);
 INSERT INTO Catalog VALUES ('Paper', 20, 1);
 
-CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
-SELECT Price FROM v1(price=20);
-SELECT Price FROM `v1`(price=20);
+CREATE VIEW pv1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
+SELECT Price FROM pv1(price=20);
+SELECT Price FROM `pv1`(price=20);
 
 set param_p=10;
-SELECT Price FROM v1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
-SELECT Price FROM v1(price={p:UInt64});
+SELECT Price FROM pv1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
+SELECT Price FROM pv1(price={p:UInt64});
 
 set param_l=1;
-SELECT Price FROM v1(price=50) LIMIT ({l:UInt64});
+SELECT Price FROM pv1(price=50) LIMIT ({l:UInt64});
 
-DETACH TABLE v1;
-ATTACH TABLE v1;
+DETACH TABLE pv1;
+ATTACH TABLE pv1;
 
-EXPLAIN SYNTAX SELECT * from v1(price=10);
+EXPLAIN SYNTAX SELECT * from pv1(price=10);
 
-INSERT INTO v1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED}
+INSERT INTO pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED}
 
-SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION }
+SELECT Price FROM pv123(price=20); -- { serverError UNKNOWN_FUNCTION }
 
-CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10;
+CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price=10;
 
-SELECT Price FROM v10(price=10);  -- { serverError UNKNOWN_FUNCTION }
+SELECT Price FROM v1(price=10);  -- { serverError UNKNOWN_FUNCTION }
 
-CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
-SELECT Price FROM v2(price=50,quantity=2);
+CREATE VIEW pv2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
+SELECT Price FROM pv2(price=50,quantity=2);
 
-SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
+SELECT Price FROM pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
 
-CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3;
-SELECT Price FROM v3(price=10);
+CREATE VIEW pv3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3;
+SELECT Price FROM pv3(price=10);
 
-CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS}
+CREATE VIEW pv4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN}
 
 CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 
@@ -51,19 +55,25 @@ INSERT INTO system.Catalog VALUES ('Pen', 10, 3);
 INSERT INTO system.Catalog VALUES ('Book', 50, 2);
 INSERT INTO system.Catalog VALUES ('Paper', 20, 1);
 
-CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64};
-SELECT Price FROM system.v1(price=20);
-SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION }
+CREATE VIEW system.pv1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64};
+SELECT Price FROM system.pv1(price=20);
+SELECT Price FROM `system.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION }
 
 INSERT INTO Catalog VALUES ('Book2', 30, 8);
 INSERT INTO Catalog VALUES ('Book3', 30, 8);
 
-CREATE VIEW v5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64};
-SELECT Price FROM v5(price=30, quantity=8,limit=1);
+CREATE VIEW pv5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64};
+SELECT Price FROM pv5(price=30, quantity=8,limit=1);
 
-DROP TABLE v1;
-DROP TABLE v2;
-DROP TABLE v3;
+CREATE VIEW pv6 AS SELECT Price+{price:UInt64} FROM Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64};
+SELECT * FROM pv6(price=10);
+
+DROP VIEW pv1;
+DROP VIEW pv2;
+DROP VIEW pv3;
+DROP VIEW pv5;
+DROP VIEW pv6;
+DROP VIEW v1;
 DROP TABLE Catalog;
-DROP TABLE system.v1;
+DROP TABLE system.pv1;
 DROP TABLE system.Catalog;
\ No newline at end of file

From bc7a76a48602c0b6b9aa99e2d50543b6ca0fa2f3 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Sun, 4 Dec 2022 17:27:28 -0500
Subject: [PATCH 041/262] Added mmap for StorageFile

---
 src/Storages/StorageFile.cpp | 34 ++++++++++++++++++++++++++--------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 6e032a47943..6baf079275a 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -14,6 +14,8 @@
 #include <Parsers/ASTInsertQuery.h>
 #include <Parsers/ASTLiteral.h>
 
+#include <IO/MMapReadBufferFromFile.h>
+#include <IO/MMapReadBufferFromFileDescriptor.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/ReadHelpers.h>
@@ -194,10 +196,18 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
         if (0 != fstat(table_fd, &file_stat))
             throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
 
-        if (S_ISREG(file_stat.st_mode))
-            nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
-        else
-            nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
+        try
+        {
+            nested_buffer = std::make_unique<MMapReadBufferFromFileDescriptor>(table_fd, 0);
+        }
+        catch (const ErrnoException &)
+        {
+            /// Fallback if mmap is not supported.
+            if (S_ISREG(file_stat.st_mode))
+                nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
+            else
+                nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
+        }
 
         method = chooseCompressionMethod("", compression_method);
     }
@@ -207,10 +217,18 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
         if (0 != stat(current_path.c_str(), &file_stat))
             throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
 
-        if (S_ISREG(file_stat.st_mode))
-            nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
-        else
-            nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
+        try
+        {
+            nested_buffer = std::make_unique<MMapReadBufferFromFile>(current_path, 0);
+        }
+        catch (const ErrnoException &)
+        {
+            /// Fallback if mmap is not supported.
+            if (S_ISREG(file_stat.st_mode))
+                nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
+            else
+                nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
+        }
 
         method = chooseCompressionMethod(current_path, compression_method);
     }

From 084e465d84cb2ba4c0773044e2a14bf4e68aca04 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Sun, 4 Dec 2022 23:39:23 -0500
Subject: [PATCH 042/262] Use mmap only on regular files.

---
 src/Storages/StorageFile.cpp | 28 ++++++----------------------
 1 file changed, 6 insertions(+), 22 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 6baf079275a..9ef1039c666 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -196,18 +196,10 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
         if (0 != fstat(table_fd, &file_stat))
             throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
 
-        try
-        {
+        if (S_ISREG(file_stat.st_mode))
             nested_buffer = std::make_unique<MMapReadBufferFromFileDescriptor>(table_fd, 0);
-        }
-        catch (const ErrnoException &)
-        {
-            /// Fallback if mmap is not supported.
-            if (S_ISREG(file_stat.st_mode))
-                nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
-            else
-                nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
-        }
+        else
+            nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
 
         method = chooseCompressionMethod("", compression_method);
     }
@@ -217,18 +209,10 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
         if (0 != stat(current_path.c_str(), &file_stat))
             throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
 
-        try
-        {
+        if (S_ISREG(file_stat.st_mode))
             nested_buffer = std::make_unique<MMapReadBufferFromFile>(current_path, 0);
-        }
-        catch (const ErrnoException &)
-        {
-            /// Fallback if mmap is not supported.
-            if (S_ISREG(file_stat.st_mode))
-                nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
-            else
-                nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
-        }
+        else
+            nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
 
         method = chooseCompressionMethod(current_path, compression_method);
     }

From e93c0776b6ffd2770cde82b654ab3039e84580f4 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Tue, 6 Dec 2022 11:14:12 +0000
Subject: [PATCH 043/262] Set keytab in environment variable

---
 programs/server/Server.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 965717d74b9..36b1d86ea53 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -735,6 +735,13 @@ try
     }
 #endif
 
+    String principal_keytab = config().getString("kerberos.keytab", "");
+    if (!principal_keytab.empty() && std::filesystem::exists(principal_keytab))
+    {
+        setenv("KRB5_CLIENT_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT
+        setenv("KRB5_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT
+    }
+
     registerFunctions();
     registerAggregateFunctions();
     registerTableFunctions();

From 540f890291005dba4500765906b5098326c30014 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 7 Dec 2022 11:04:15 +0000
Subject: [PATCH 044/262] Make system.replicas parallel

---
 src/Core/Settings.h                           |  2 ++
 src/Storages/System/StorageSystemReplicas.cpp | 32 +++++++++++++++----
 2 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 91647a5f165..4be91ce311f 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -155,6 +155,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     \
     M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
     \
+    M(UInt64, system_replicas_fetch_threads, 16, "The maximum number of threads to fetch data for system.replicas table.", 0) \
+    \
     M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
     \
     M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard; if set to 1 - SELECT is executed on each shard; if set to 2 - SELECT and INSERT are executed on each shard", 0) \
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 0f7877a6e41..a6afb6eff0d 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -151,14 +151,32 @@ Pipe StorageSystemReplicas::read(
 
     MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns();
 
-    for (size_t i = 0, size = col_database->size(); i < size; ++i)
-    {
-        StorageReplicatedMergeTree::Status status;
-        dynamic_cast<StorageReplicatedMergeTree &>(
-            *replicated_tables
-                [(*col_database)[i].safeGet<const String &>()]
-                [(*col_table)[i].safeGet<const String &>()]).getStatus(status, with_zk_fields);
+    auto settings = context->getSettingsRef();
+    size_t thread_pool_size = settings.system_replicas_fetch_threads;
 
+    if (settings.max_threads != 0)
+        thread_pool_size = std::min(thread_pool_size, static_cast<size_t>(settings.max_threads));
+
+    ThreadPool thread_pool(thread_pool_size);
+
+    size_t tables_size = col_database->size();
+    std::vector<StorageReplicatedMergeTree::Status> statuses(tables_size);
+
+    for (size_t i = 0; i < tables_size; ++i)
+    {
+        thread_pool.scheduleOrThrowOnError([i, &statuses, &replicated_tables, &col_database, &col_table, &with_zk_fields]
+        {
+            dynamic_cast<StorageReplicatedMergeTree &>(
+                *replicated_tables
+                    [(*col_database)[i].safeGet<const String &>()]
+                    [(*col_table)[i].safeGet<const String &>()]).getStatus(statuses[i], with_zk_fields);
+        });
+    }
+
+    thread_pool.wait();
+
+    for (const auto & status: statuses)
+    {
         size_t col_num = 3;
         res_columns[col_num++]->insert(status.is_leader);
         res_columns[col_num++]->insert(status.can_become_leader);

From 9b46baa17dc66f6df2f7f0279071f1607b13c510 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Wed, 7 Dec 2022 22:31:32 -0500
Subject: [PATCH 045/262] Rewrite `StorageFile` buffer creation with
 `createReadBufferFromFileBase`. Add file descriptor support for
 `createReadBufferFromFileBase`. Fix file_size overflow in
 `createReadBufferFromFileBase`. Fix `MMapReadBufferFromFileWithCache`
 file_size definition.

---
 programs/local/LocalServer.cpp                |  6 ++
 src/Disks/IO/createReadBufferFromFileBase.cpp | 82 ++++++++++++++++---
 src/Disks/IO/createReadBufferFromFileBase.h   | 18 ++++
 src/IO/MMapReadBufferFromFileWithCache.cpp    |  1 +
 src/Storages/StorageFile.cpp                  | 31 ++-----
 ...97_storage_file_reader_selection.reference |  6 ++
 .../02497_storage_file_reader_selection.sh    | 29 +++++++
 7 files changed, 136 insertions(+), 37 deletions(-)
 create mode 100644 tests/queries/0_stateless/02497_storage_file_reader_selection.reference
 create mode 100755 tests/queries/0_stateless/02497_storage_file_reader_selection.sh

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 33d11091660..6f75c404174 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -540,6 +540,12 @@ void LocalServer::processConfig()
     global_context->makeGlobalContext();
     global_context->setApplicationType(Context::ApplicationType::LOCAL);
 
+    if (!global_context->getSettingsRef().isChanged("local_filesystem_read_method"))
+        global_context->setSetting("local_filesystem_read_method", Field{"mmap"});
+
+    if (!global_context->getSettingsRef().isChanged("min_bytes_to_use_mmap_io"))
+        global_context->setSetting("min_bytes_to_use_mmap_io", Field{1});
+
     tryInitPath();
 
     Poco::Logger * log = &logger();
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index b274786f162..73506e19d11 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -23,22 +23,37 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
+    extern const int CANNOT_STAT;
 }
 
 
-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptorBase(
     const std::string & filename,
     const ReadSettings & settings,
     std::optional<size_t> read_hint,
     std::optional<size_t> file_size,
     int flags,
     char * existing_memory,
-    size_t alignment)
+    size_t alignment,
+    bool read_from_fd,
+    int fd)
 {
     if (file_size.has_value() && !*file_size)
         return std::make_unique<ReadBufferFromEmptyFile>();
 
-    size_t estimated_size = 0;
+    struct stat file_stat{};
+    if (read_from_fd)
+    {
+        if (0 != fstat(fd, &file_stat))
+            throwFromErrno("Cannot stat file descriptor", ErrorCodes::CANNOT_STAT);
+    }
+    else
+    {
+        if (0 != stat(filename.c_str(), &file_stat))
+            throwFromErrno("Cannot stat file " + filename, ErrorCodes::CANNOT_STAT);
+    }
+
+    size_t estimated_size = file_stat.st_size;
     if (read_hint.has_value())
         estimated_size = *read_hint;
     else if (file_size.has_value())
@@ -48,17 +63,18 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
         && settings.local_fs_method == LocalFSReadMethod::mmap
         && settings.mmap_threshold
         && settings.mmap_cache
-        && estimated_size >= settings.mmap_threshold)
+        && estimated_size >= settings.mmap_threshold
+        && S_ISREG(file_stat.st_mode))
     {
         try
         {
-            auto res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, file_size.value_or(-1));
+            auto res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, estimated_size);
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
             return res;
         }
         catch (const ErrnoException &)
         {
-            /// Fallback if mmap is not supported (example: pipe).
+            /// Fallback if mmap is not supported.
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed);
         }
     }
@@ -67,13 +83,21 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     {
         std::unique_ptr<ReadBufferFromFileBase> res;
 
-        if (settings.local_fs_method == LocalFSReadMethod::read)
+        /// Pread works only with regular files, so we explicitly fallback to read in other cases.
+        if (settings.local_fs_method == LocalFSReadMethod::read || !S_ISREG(file_stat.st_mode))
         {
-            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            if (read_from_fd)
+                res = std::make_unique<ReadBufferFromFileDescriptor>(fd, buffer_size, existing_memory, alignment, file_size);
+            else
+                res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
         {
-            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            if (read_from_fd)
+                res = std::make_unique<ReadBufferFromFileDescriptorPRead>(fd, buffer_size, existing_memory, alignment, file_size);
+            else
+                res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(
+                    filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
         {
@@ -82,8 +106,13 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized");
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
-            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+
+            if (read_from_fd)
+                res = std::make_unique<AsynchronousReadBufferFromFileDescriptor>(
+                    reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size);
+            else
+                res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
+                    reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
         {
@@ -92,8 +121,13 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized");
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
-            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+
+            if (read_from_fd)
+                res = std::make_unique<AsynchronousReadBufferFromFileDescriptor>(
+                    reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size);
+            else
+                res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
+                    reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
@@ -169,4 +203,26 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     return create(buffer_size, flags);
 }
 
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
+    const std::string & filename,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint,
+    std::optional<size_t> file_size,
+    int flags_,
+    char * existing_memory,
+    size_t alignment)
+{
+    return createReadBufferFromFileOrFileDescriptorBase(filename, settings, read_hint, file_size, flags_, existing_memory, alignment);
+}
+
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileDescriptorBase(
+    int fd,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint,
+    std::optional<size_t> file_size,
+    char * existing_memory ,
+    size_t alignment)
+{
+    return createReadBufferFromFileOrFileDescriptorBase({}, settings, read_hint, file_size, -1, existing_memory, alignment, true, fd);
+}
 }
diff --git a/src/Disks/IO/createReadBufferFromFileBase.h b/src/Disks/IO/createReadBufferFromFileBase.h
index c2e2040587b..542ea423462 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.h
+++ b/src/Disks/IO/createReadBufferFromFileBase.h
@@ -14,6 +14,17 @@ namespace DB
   * @param read_hint - the number of bytes to read hint
   * @param file_size - size of file
   */
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptorBase(
+    const std::string & filename,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint = {},
+    std::optional<size_t> file_size = {},
+    int flags_ = -1,
+    char * existing_memory = nullptr,
+    size_t alignment = 0,
+    bool read_from_fd = false,
+    int fd = 0);
+
 std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     const std::string & filename,
     const ReadSettings & settings,
@@ -23,4 +34,11 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     char * existing_memory = nullptr,
     size_t alignment = 0);
 
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileDescriptorBase(
+    int fd,
+    const ReadSettings & settings,
+    std::optional<size_t> read_hint = {},
+    std::optional<size_t> file_size = {},
+    char * existing_memory = nullptr,
+    size_t alignment = 0);
 }
diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp
index 503a58b65b9..d13cf5db2f7 100644
--- a/src/IO/MMapReadBufferFromFileWithCache.cpp
+++ b/src/IO/MMapReadBufferFromFileWithCache.cpp
@@ -18,6 +18,7 @@ void MMapReadBufferFromFileWithCache::init()
 
     size_t page_size = static_cast<size_t>(::getPageSize());
     ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1));
+    ReadBufferFromFileBase::file_size = length;
 }
 
 
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 9ef1039c666..8cc9f646406 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -39,6 +39,8 @@
 #include <Common/parseGlobs.h>
 #include <Common/filesystemHelpers.h>
 
+#include <Disks/IO/createReadBufferFromFileBase.h>
+
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
@@ -180,7 +182,6 @@ void checkCreationIsAllowed(
 std::unique_ptr<ReadBuffer> createReadBuffer(
     const String & current_path,
     bool use_table_fd,
-    const String & storage_name,
     int table_fd,
     const String & compression_method,
     ContextPtr context)
@@ -188,32 +189,14 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
     std::unique_ptr<ReadBuffer> nested_buffer;
     CompressionMethod method;
 
-    struct stat file_stat{};
-
     if (use_table_fd)
     {
-        /// Check if file descriptor allows random reads (and reading it twice).
-        if (0 != fstat(table_fd, &file_stat))
-            throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
-
-        if (S_ISREG(file_stat.st_mode))
-            nested_buffer = std::make_unique<MMapReadBufferFromFileDescriptor>(table_fd, 0);
-        else
-            nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
-
+        nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, context->getReadSettings());
         method = chooseCompressionMethod("", compression_method);
     }
     else
     {
-        /// Check if file descriptor allows random reads (and reading it twice).
-        if (0 != stat(current_path.c_str(), &file_stat))
-            throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
-
-        if (S_ISREG(file_stat.st_mode))
-            nested_buffer = std::make_unique<MMapReadBufferFromFile>(current_path, 0);
-        else
-            nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
-
+        nested_buffer = createReadBufferFromFileBase(current_path, context->getReadSettings());
         method = chooseCompressionMethod(current_path, compression_method);
     }
 
@@ -284,7 +267,7 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c
     {
         /// We will use PeekableReadBuffer to create a checkpoint, so we need a place
         /// where we can store the original read buffer.
-        read_buffer_from_fd = createReadBuffer("", true, getName(), table_fd, compression_method, context);
+        read_buffer_from_fd = createReadBuffer("", true, table_fd, compression_method, context);
         auto read_buf = std::make_unique<PeekableReadBuffer>(*read_buffer_from_fd);
         read_buf->setCheckpoint();
         return read_buf;
@@ -333,7 +316,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
         if (it == paths.end())
             return nullptr;
 
-        return createReadBuffer(*it++, false, "File", -1, compression_method, context);
+        return createReadBuffer(*it++, false, -1, compression_method, context);
     };
 
     ColumnsDescription columns;
@@ -550,7 +533,7 @@ public:
                 }
 
                 if (!read_buf)
-                    read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->getName(), storage->table_fd, storage->compression_method, context);
+                    read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
 
                 auto format
                     = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings);
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
new file mode 100644
index 00000000000..0977ae4d888
--- /dev/null
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
@@ -0,0 +1,6 @@
+key\nfoo\nbar
+1
+0
+key\nfoo\nbar
+0
+1
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
new file mode 100755
index 00000000000..9e07050536e
--- /dev/null
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
@@ -0,0 +1,29 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
+
+DATA_FILE=$USER_FILES_PATH/test_02497_storage_file_reader.data
+echo 'key\nfoo\nbar' > $DATA_FILE
+
+QUERY_ID=$RANDOM
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \
+  --query_id $QUERY_ID \
+  --local_filesystem_read_method=mmap \
+  --min_bytes_to_use_mmap_io=1 \
+
+$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
+$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
+$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'"
+
+QUERY_ID=$RANDOM
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \
+  --query_id $QUERY_ID \
+  --local_filesystem_read_method=pread
+
+$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
+$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
+$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'"

From 9cc4868b37c318d8f61005114135817cf242e2c2 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Wed, 7 Dec 2022 23:09:19 -0500
Subject: [PATCH 046/262] Try to fix codestyle error.

---
 .../queries/0_stateless/02497_storage_file_reader_selection.sh  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
index 9e07050536e..b7c60aac03c 100755
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
@@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
 USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}')
 
 DATA_FILE=$USER_FILES_PATH/test_02497_storage_file_reader.data
-echo 'key\nfoo\nbar' > $DATA_FILE
+echo -e 'key\nfoo\nbar' > $DATA_FILE
 
 QUERY_ID=$RANDOM
 $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \

From 76401ad0b9b029a934a3a8d861ef3a59061cfd97 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Wed, 7 Dec 2022 23:17:10 -0500
Subject: [PATCH 047/262] Test and codestyle fix.

---
 src/Storages/StorageFile.cpp                              | 1 -
 .../02497_storage_file_reader_selection.reference         | 8 ++++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 8cc9f646406..9fd40600eba 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -69,7 +69,6 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
     extern const int TIMEOUT_EXCEEDED;
     extern const int INCOMPATIBLE_COLUMNS;
-    extern const int CANNOT_STAT;
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_APPEND_TO_FILE;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
index 0977ae4d888..8da37e4219c 100644
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
@@ -1,6 +1,10 @@
-key\nfoo\nbar
+key
+foo
+bar
 1
 0
-key\nfoo\nbar
+key
+foo
+bar
 0
 1

From 2d87cc1a6c9d549393cb0ec38c38a89688b31d2f Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Thu, 8 Dec 2022 18:02:29 -0500
Subject: [PATCH 048/262] Add `storage_file_read_method` setting.

---
 programs/local/LocalServer.cpp                |  6 ------
 src/Core/Settings.h                           |  1 +
 src/Disks/IO/createReadBufferFromFileBase.cpp | 10 ++++++++--
 src/Storages/StorageFile.cpp                  | 15 ++++++++++++--
 ...2103_tsv_csv_custom_null_representation.sh | 20 +++++++++----------
 .../0_stateless/02130_parse_quoted_null.sh    | 18 ++++++++---------
 .../02497_storage_file_reader_selection.sh    |  6 ++----
 7 files changed, 43 insertions(+), 33 deletions(-)

diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp
index 6f75c404174..33d11091660 100644
--- a/programs/local/LocalServer.cpp
+++ b/programs/local/LocalServer.cpp
@@ -540,12 +540,6 @@ void LocalServer::processConfig()
     global_context->makeGlobalContext();
     global_context->setApplicationType(Context::ApplicationType::LOCAL);
 
-    if (!global_context->getSettingsRef().isChanged("local_filesystem_read_method"))
-        global_context->setSetting("local_filesystem_read_method", Field{"mmap"});
-
-    if (!global_context->getSettingsRef().isChanged("min_bytes_to_use_mmap_io"))
-        global_context->setSetting("min_bytes_to_use_mmap_io", Field{1});
-
     tryInitPath();
 
     Poco::Logger * log = &logger();
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index df5e9685173..c8c6eb61a73 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -591,6 +591,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
+    M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0)\
     M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index 73506e19d11..911c677300f 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -2,6 +2,7 @@
 #include <IO/ReadBufferFromEmptyFile.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/MMapReadBufferFromFileWithCache.h>
+#include <IO/MMapReadBufferFromFile.h>
 #include <IO/AsynchronousReadBufferFromFile.h>
 #include <Disks/IO/ThreadPoolReader.h>
 #include <IO/SynchronousReader.h>
@@ -62,13 +63,18 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptor
     if (!existing_memory
         && settings.local_fs_method == LocalFSReadMethod::mmap
         && settings.mmap_threshold
-        && settings.mmap_cache
         && estimated_size >= settings.mmap_threshold
         && S_ISREG(file_stat.st_mode))
     {
         try
         {
-            auto res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, estimated_size);
+            std::unique_ptr<ReadBufferFromFileBase> res;
+
+            if (settings.mmap_cache)
+                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, estimated_size);
+            else
+                res = std::make_unique<MMapReadBufferFromFile>(filename, 0, estimated_size);
+
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
             return res;
         }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 9fd40600eba..f4fc78d5e98 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -72,6 +72,7 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_APPEND_TO_FILE;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
+    extern const int UNKNOWN_READ_METHOD;
 }
 
 namespace
@@ -188,14 +189,24 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
     std::unique_ptr<ReadBuffer> nested_buffer;
     CompressionMethod method;
 
+    auto read_method = context->getSettingsRef().storage_file_read_method.value;
+    auto read_settings = context->getReadSettings();
+    read_settings.mmap_threshold = 1;
+    read_settings.mmap_cache = nullptr;  /// Turn off mmap cache for Storage File
+
+    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method))
+        read_settings.local_fs_method = *opt_method;
+    else
+        throwFromErrno("Unknown read method " + read_method, ErrorCodes::UNKNOWN_READ_METHOD);
+
     if (use_table_fd)
     {
-        nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, context->getReadSettings());
+        nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, read_settings);
         method = chooseCompressionMethod("", compression_method);
     }
     else
     {
-        nested_buffer = createReadBufferFromFileBase(current_path, context->getReadSettings());
+        nested_buffer = createReadBufferFromFileBase(current_path, read_settings);
         method = chooseCompressionMethod(current_path, compression_method);
     }
 
diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh
index 4162e046ca4..b28c56f9266 100755
--- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh
+++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh
@@ -97,37 +97,37 @@ echo 'Corner cases'
 echo 'TSV'
 
 echo -e "Some text\tCustomNull" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0"
 
 echo -e "Some text\tCustomNull Some text" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0"
 
 echo -e "Some text\t123NNN" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
 
 echo -e "Some text\tNU\tLL" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
 
 echo 'CSV'
 
 echo -e "Some text,CustomNull" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0"
 
 echo -e "Some text,CustomNull Some text" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0"
 
 echo -e "Some text,123NNN" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
 
 echo -e "Some text,NU,LL" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
 
 
 echo 'Large custom NULL'
 
 $CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'"
-$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'"
 
 rm $DATA_FILE
 
diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh
index 9cb6cb73e6c..2da62f9a4ff 100755
--- a/tests/queries/0_stateless/02130_parse_quoted_null.sh
+++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh
@@ -24,31 +24,31 @@ echo -e "42.42\t3" > $DATA_FILE
 $CLICKHOUSE_CLIENT -q "$SELECT_QUERY"
 
 echo -e "null\t4" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 --storage_file_read_method=pread
 
 echo -e "null\t5" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 --storage_file_read_method=pread
 
 echo -e "null\t6" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 --storage_file_read_method=pread
 
 echo -e "null\t7" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread
 
 echo -e "nan\t8" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 --storage_file_read_method=pread
 
 echo -e "nan\t9" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 --storage_file_read_method=pread
 
 echo -e "nan\t10" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 --storage_file_read_method=pread
 
 echo -e "nan\t11" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread
 
 echo -e "42\tnan" > $DATA_FILE
-$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
+$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL'
 
 $CLICKHOUSE_CLIENT -q "select * from test_02130 order by y"
 $CLICKHOUSE_CLIENT -q "drop table test_02130"
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
index b7c60aac03c..4d9336bc1a0 100755
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
@@ -11,9 +11,7 @@ echo -e 'key\nfoo\nbar' > $DATA_FILE
 
 QUERY_ID=$RANDOM
 $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \
-  --query_id $QUERY_ID \
-  --local_filesystem_read_method=mmap \
-  --min_bytes_to_use_mmap_io=1 \
+  --query_id $QUERY_ID
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
@@ -22,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FR
 QUERY_ID=$RANDOM
 $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \
   --query_id $QUERY_ID \
-  --local_filesystem_read_method=pread
+  --storage_file_read_method=pread
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"

From 165e39085bbbe7e3d04e304f11520275ceacd0c2 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Fri, 9 Dec 2022 12:25:55 +0000
Subject: [PATCH 049/262] Set keytab by call of
 krb5_gss_register_acceptor_identity()

---
 programs/server/Server.cpp            | 7 -------
 src/Access/ExternalAuthenticators.cpp | 7 ++++++-
 src/Access/GSSAcceptor.h              | 1 +
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 36b1d86ea53..965717d74b9 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -735,13 +735,6 @@ try
     }
 #endif
 
-    String principal_keytab = config().getString("kerberos.keytab", "");
-    if (!principal_keytab.empty() && std::filesystem::exists(principal_keytab))
-    {
-        setenv("KRB5_CLIENT_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT
-        setenv("KRB5_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT
-    }
-
     registerFunctions();
     registerAggregateFunctions();
     registerTableFunctions();
diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp
index e1c598f26f5..e709c40de46 100644
--- a/src/Access/ExternalAuthenticators.cpp
+++ b/src/Access/ExternalAuthenticators.cpp
@@ -9,7 +9,7 @@
 
 #include <optional>
 #include <utility>
-
+#include <filesystem>
 
 namespace DB
 {
@@ -223,6 +223,11 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::
 
     params.realm = config.getString("kerberos.realm", "");
     params.principal = config.getString("kerberos.principal", "");
+
+    String keytab = config.getString("kerberos.keytab", "");
+    if (!keytab.empty() && std::filesystem::exists(keytab))
+        if (krb5_gss_register_acceptor_identity(keytab.c_str()))
+            throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS);
 }
 
 }
diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h
index d2c55b1290c..c2930201a93 100644
--- a/src/Access/GSSAcceptor.h
+++ b/src/Access/GSSAcceptor.h
@@ -9,6 +9,7 @@
 #if USE_KRB5
 #   include <gssapi/gssapi.h>
 #   include <gssapi/gssapi_ext.h>
+#   include <gssapi/gssapi_krb5.h>
 #   define MAYBE_NORETURN
 #else
 #   define MAYBE_NORETURN [[noreturn]]

From 9c70b13702d0f3f8e9e615fbf8e30d326efa4f39 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Fri, 9 Dec 2022 12:33:05 +0000
Subject: [PATCH 050/262] Add #if USE_KRB5

---
 src/Access/ExternalAuthenticators.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp
index e709c40de46..6dcbd7e10b1 100644
--- a/src/Access/ExternalAuthenticators.cpp
+++ b/src/Access/ExternalAuthenticators.cpp
@@ -224,10 +224,12 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::
     params.realm = config.getString("kerberos.realm", "");
     params.principal = config.getString("kerberos.principal", "");
 
+#if USE_KRB5
     String keytab = config.getString("kerberos.keytab", "");
     if (!keytab.empty() && std::filesystem::exists(keytab))
         if (krb5_gss_register_acceptor_identity(keytab.c_str()))
             throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS);
+#endif
 }
 
 }

From e38a93c45a947e596a00d4bc03a5bde913c39676 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Sat, 10 Dec 2022 22:26:07 -0500
Subject: [PATCH 051/262] Fix UB, fix test.

---
 src/Core/Settings.h                               |  2 +-
 src/IO/ReadBufferFromFileBase.cpp                 | 15 +++++++++++++++
 src/IO/ReadBufferFromFileBase.h                   |  3 +++
 src/IO/ReadBufferFromFileDescriptor.cpp           | 15 ---------------
 src/IO/ReadBufferFromFileDescriptor.h             |  2 --
 src/Storages/StorageFile.cpp                      |  2 +-
 .../02497_storage_file_reader_selection.reference |  2 --
 .../02497_storage_file_reader_selection.sh        |  2 --
 8 files changed, 20 insertions(+), 23 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c8c6eb61a73..a9f4280a0d1 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -591,7 +591,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
-    M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0)\
+    M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp
index 1152804b770..7ea16d679bc 100644
--- a/src/IO/ReadBufferFromFileBase.cpp
+++ b/src/IO/ReadBufferFromFileBase.cpp
@@ -1,4 +1,6 @@
+#include <IO/Progress.h>
 #include <IO/ReadBufferFromFileBase.h>
+#include <Interpreters/Context.h>
 
 namespace DB
 {
@@ -31,4 +33,17 @@ size_t ReadBufferFromFileBase::getFileSize()
     throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for read buffer");
 }
 
+void ReadBufferFromFileBase::setProgressCallback(ContextPtr context)
+{
+    auto file_progress_callback = context->getFileProgressCallback();
+
+    if (!file_progress_callback)
+        return;
+
+    setProfileCallback([file_progress_callback](const ProfileInfo & progress)
+    {
+       file_progress_callback(FileProgress(progress.bytes_read, 0));
+    });
+}
+
 }
diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h
index d28be034eb5..cc4a131b10b 100644
--- a/src/IO/ReadBufferFromFileBase.h
+++ b/src/IO/ReadBufferFromFileBase.h
@@ -3,6 +3,7 @@
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/SeekableReadBuffer.h>
 #include <IO/WithFileName.h>
+#include <Interpreters/Context.h>
 #include <base/time.h>
 
 #include <functional>
@@ -51,6 +52,8 @@ public:
 
     size_t getFileSize() override;
 
+    void setProgressCallback(ContextPtr context);
+
 protected:
     std::optional<size_t> file_size;
     ProfileCallback profile_callback;
diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp
index cb4b6ca5f3e..b0e3a1ac7cd 100644
--- a/src/IO/ReadBufferFromFileDescriptor.cpp
+++ b/src/IO/ReadBufferFromFileDescriptor.cpp
@@ -7,7 +7,6 @@
 #include <Common/CurrentMetrics.h>
 #include <IO/ReadBufferFromFileDescriptor.h>
 #include <IO/WriteHelpers.h>
-#include <IO/Progress.h>
 #include <Common/filesystemHelpers.h>
 #include <sys/stat.h>
 #include <Interpreters/Context.h>
@@ -254,18 +253,4 @@ size_t ReadBufferFromFileDescriptor::getFileSize()
     return getSizeFromFileDescriptor(fd, getFileName());
 }
 
-
-void ReadBufferFromFileDescriptor::setProgressCallback(ContextPtr context)
-{
-    auto file_progress_callback = context->getFileProgressCallback();
-
-    if (!file_progress_callback)
-        return;
-
-    setProfileCallback([file_progress_callback](const ProfileInfo & progress)
-    {
-        file_progress_callback(FileProgress(progress.bytes_read, 0));
-    });
-}
-
 }
diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h
index 6edda460bac..71ea1a1c358 100644
--- a/src/IO/ReadBufferFromFileDescriptor.h
+++ b/src/IO/ReadBufferFromFileDescriptor.h
@@ -62,8 +62,6 @@ public:
 
     size_t getFileSize() override;
 
-    void setProgressCallback(ContextPtr context);
-
 private:
     /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout.
     bool poll(size_t timeout_microseconds) const;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index f4fc78d5e98..6154cdb73ca 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -214,7 +214,7 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
     if (context->getApplicationType() == Context::ApplicationType::LOCAL
         || context->getApplicationType() == Context::ApplicationType::CLIENT)
     {
-        auto & in = static_cast<ReadBufferFromFileDescriptor &>(*nested_buffer);
+        auto & in = static_cast<ReadBufferFromFileBase &>(*nested_buffer);
         in.setProgressCallback(context);
     }
 
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
index 8da37e4219c..39e2f2f6f5e 100644
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
@@ -2,9 +2,7 @@ key
 foo
 bar
 1
-0
 key
 foo
 bar
 0
-1
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
index 4d9336bc1a0..0e27146103e 100755
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
@@ -15,7 +15,6 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data',
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
-$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'"
 
 QUERY_ID=$RANDOM
 $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \
@@ -24,4 +23,3 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data',
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
-$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'"

From 1d6e77a29a1e36e755f15d5c720fd1690f7d63a2 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Sun, 11 Dec 2022 16:15:41 -0500
Subject: [PATCH 052/262] Move reader selection logic back to `StorageFile`.

---
 src/Disks/IO/createReadBufferFromFileBase.cpp | 91 ++++---------------
 src/Disks/IO/createReadBufferFromFileBase.h   | 18 ----
 src/Storages/StorageFile.cpp                  | 84 ++++++++++++++---
 3 files changed, 86 insertions(+), 107 deletions(-)

diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index 911c677300f..fca05787959 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -2,7 +2,6 @@
 #include <IO/ReadBufferFromEmptyFile.h>
 #include <IO/ReadBufferFromFile.h>
 #include <IO/MMapReadBufferFromFileWithCache.h>
-#include <IO/MMapReadBufferFromFile.h>
 #include <IO/AsynchronousReadBufferFromFile.h>
 #include <Disks/IO/ThreadPoolReader.h>
 #include <IO/SynchronousReader.h>
@@ -24,37 +23,22 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int CANNOT_STAT;
 }
 
 
-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptorBase(
+std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     const std::string & filename,
     const ReadSettings & settings,
     std::optional<size_t> read_hint,
     std::optional<size_t> file_size,
     int flags,
     char * existing_memory,
-    size_t alignment,
-    bool read_from_fd,
-    int fd)
+    size_t alignment)
 {
     if (file_size.has_value() && !*file_size)
         return std::make_unique<ReadBufferFromEmptyFile>();
 
-    struct stat file_stat{};
-    if (read_from_fd)
-    {
-        if (0 != fstat(fd, &file_stat))
-            throwFromErrno("Cannot stat file descriptor", ErrorCodes::CANNOT_STAT);
-    }
-    else
-    {
-        if (0 != stat(filename.c_str(), &file_stat))
-            throwFromErrno("Cannot stat file " + filename, ErrorCodes::CANNOT_STAT);
-    }
-
-    size_t estimated_size = file_stat.st_size;
+    size_t estimated_size = 0;
     if (read_hint.has_value())
         estimated_size = *read_hint;
     else if (file_size.has_value())
@@ -63,24 +47,23 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptor
     if (!existing_memory
         && settings.local_fs_method == LocalFSReadMethod::mmap
         && settings.mmap_threshold
-        && estimated_size >= settings.mmap_threshold
-        && S_ISREG(file_stat.st_mode))
+        && settings.mmap_cache
+        && estimated_size >= settings.mmap_threshold)
     {
         try
         {
-            std::unique_ptr<ReadBufferFromFileBase> res;
-
-            if (settings.mmap_cache)
-                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, estimated_size);
+            std::unique_ptr<MMapReadBufferFromFileWithCache> res;
+            if (file_size)
+                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, *file_size);
             else
-                res = std::make_unique<MMapReadBufferFromFile>(filename, 0, estimated_size);
+                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, *file_size);
 
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
             return res;
         }
         catch (const ErrnoException &)
         {
-            /// Fallback if mmap is not supported.
+            /// Fallback if mmap is not supported (example: pipe).
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed);
         }
     }
@@ -89,21 +72,13 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptor
     {
         std::unique_ptr<ReadBufferFromFileBase> res;
 
-        /// Pread works only with regular files, so we explicitly fallback to read in other cases.
-        if (settings.local_fs_method == LocalFSReadMethod::read || !S_ISREG(file_stat.st_mode))
+        if (settings.local_fs_method == LocalFSReadMethod::read)
         {
-            if (read_from_fd)
-                res = std::make_unique<ReadBufferFromFileDescriptor>(fd, buffer_size, existing_memory, alignment, file_size);
-            else
-                res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
         {
-            if (read_from_fd)
-                res = std::make_unique<ReadBufferFromFileDescriptorPRead>(fd, buffer_size, existing_memory, alignment, file_size);
-            else
-                res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(
-                    filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
         {
@@ -112,13 +87,8 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptor
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized");
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
-
-            if (read_from_fd)
-                res = std::make_unique<AsynchronousReadBufferFromFileDescriptor>(
-                    reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size);
-            else
-                res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                    reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
+                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
         {
@@ -127,13 +97,8 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptor
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized");
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
-
-            if (read_from_fd)
-                res = std::make_unique<AsynchronousReadBufferFromFileDescriptor>(
-                    reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size);
-            else
-                res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                    reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
+                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
@@ -209,26 +174,4 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptor
     return create(buffer_size, flags);
 }
 
-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
-    const std::string & filename,
-    const ReadSettings & settings,
-    std::optional<size_t> read_hint,
-    std::optional<size_t> file_size,
-    int flags_,
-    char * existing_memory,
-    size_t alignment)
-{
-    return createReadBufferFromFileOrFileDescriptorBase(filename, settings, read_hint, file_size, flags_, existing_memory, alignment);
-}
-
-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileDescriptorBase(
-    int fd,
-    const ReadSettings & settings,
-    std::optional<size_t> read_hint,
-    std::optional<size_t> file_size,
-    char * existing_memory ,
-    size_t alignment)
-{
-    return createReadBufferFromFileOrFileDescriptorBase({}, settings, read_hint, file_size, -1, existing_memory, alignment, true, fd);
-}
 }
diff --git a/src/Disks/IO/createReadBufferFromFileBase.h b/src/Disks/IO/createReadBufferFromFileBase.h
index 542ea423462..c2e2040587b 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.h
+++ b/src/Disks/IO/createReadBufferFromFileBase.h
@@ -14,17 +14,6 @@ namespace DB
   * @param read_hint - the number of bytes to read hint
   * @param file_size - size of file
   */
-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileOrFileDescriptorBase(
-    const std::string & filename,
-    const ReadSettings & settings,
-    std::optional<size_t> read_hint = {},
-    std::optional<size_t> file_size = {},
-    int flags_ = -1,
-    char * existing_memory = nullptr,
-    size_t alignment = 0,
-    bool read_from_fd = false,
-    int fd = 0);
-
 std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     const std::string & filename,
     const ReadSettings & settings,
@@ -34,11 +23,4 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     char * existing_memory = nullptr,
     size_t alignment = 0);
 
-std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileDescriptorBase(
-    int fd,
-    const ReadSettings & settings,
-    std::optional<size_t> read_hint = {},
-    std::optional<size_t> file_size = {},
-    char * existing_memory = nullptr,
-    size_t alignment = 0);
 }
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 6154cdb73ca..1c61370a392 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -38,8 +38,7 @@
 #include <Common/typeid_cast.h>
 #include <Common/parseGlobs.h>
 #include <Common/filesystemHelpers.h>
-
-#include <Disks/IO/createReadBufferFromFileBase.h>
+#include <Common/ProfileEvents.h>
 
 #include <QueryPipeline/Pipe.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
@@ -51,6 +50,13 @@
 #include <filesystem>
 
 
+namespace ProfileEvents
+{
+    extern const Event CreatedReadBufferOrdinary;
+    extern const Event CreatedReadBufferMMap;
+    extern const Event CreatedReadBufferMMapFailed;
+}
+
 namespace fs = std::filesystem;
 
 namespace DB
@@ -69,6 +75,7 @@ namespace ErrorCodes
     extern const int FILE_DOESNT_EXIST;
     extern const int TIMEOUT_EXCEEDED;
     extern const int INCOMPATIBLE_COLUMNS;
+    extern const int CANNOT_STAT;
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_APPEND_TO_FILE;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
@@ -182,6 +189,7 @@ void checkCreationIsAllowed(
 std::unique_ptr<ReadBuffer> createReadBuffer(
     const String & current_path,
     bool use_table_fd,
+    const String & storage_name,
     int table_fd,
     const String & compression_method,
     ContextPtr context)
@@ -189,27 +197,73 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
     std::unique_ptr<ReadBuffer> nested_buffer;
     CompressionMethod method;
 
-    auto read_method = context->getSettingsRef().storage_file_read_method.value;
-    auto read_settings = context->getReadSettings();
-    read_settings.mmap_threshold = 1;
-    read_settings.mmap_cache = nullptr;  /// Turn off mmap cache for Storage File
-
-    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method))
-        read_settings.local_fs_method = *opt_method;
+    auto read_method_string = context->getSettingsRef().storage_file_read_method.value;
+    LocalFSReadMethod read_method;
+    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method_string))
+        read_method = *opt_method;
     else
-        throwFromErrno("Unknown read method " + read_method, ErrorCodes::UNKNOWN_READ_METHOD);
+        throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD);
+
+    struct stat file_stat{};
 
     if (use_table_fd)
     {
-        nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, read_settings);
+        /// Check if file descriptor allows random reads (and reading it twice).
+        if (0 != fstat(table_fd, &file_stat))
+            throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT);
+
         method = chooseCompressionMethod("", compression_method);
     }
     else
     {
-        nested_buffer = createReadBufferFromFileBase(current_path, read_settings);
+        /// Check if file descriptor allows random reads (and reading it twice).
+        if (0 != stat(current_path.c_str(), &file_stat))
+            throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT);
+
         method = chooseCompressionMethod(current_path, compression_method);
     }
 
+
+    bool mmap_failed = false;
+    if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap)
+    {
+        try
+        {
+            if (use_table_fd)
+                nested_buffer = std::make_unique<MMapReadBufferFromFileDescriptor>(table_fd, 0);
+            else
+                nested_buffer = std::make_unique<MMapReadBufferFromFile>(current_path, 0);
+
+            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
+        }
+        catch (const ErrnoException &)
+        {
+            /// Fallback if mmap is not supported.
+            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed);
+            mmap_failed = true;
+        }
+    }
+
+    if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || mmap_failed))
+    {
+        if (use_table_fd)
+            nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
+        else
+            nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
+
+        ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
+    }
+    else
+    {
+        if (use_table_fd)
+            nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
+        else
+            nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
+
+        ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
+    }
+
+
     /// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
     if (context->getApplicationType() == Context::ApplicationType::LOCAL
         || context->getApplicationType() == Context::ApplicationType::CLIENT)
@@ -277,7 +331,7 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c
     {
         /// We will use PeekableReadBuffer to create a checkpoint, so we need a place
         /// where we can store the original read buffer.
-        read_buffer_from_fd = createReadBuffer("", true, table_fd, compression_method, context);
+        read_buffer_from_fd = createReadBuffer("", true, getName(), table_fd, compression_method, context);
         auto read_buf = std::make_unique<PeekableReadBuffer>(*read_buffer_from_fd);
         read_buf->setCheckpoint();
         return read_buf;
@@ -326,7 +380,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile(
         if (it == paths.end())
             return nullptr;
 
-        return createReadBuffer(*it++, false, -1, compression_method, context);
+        return createReadBuffer(*it++, false, "File", -1, compression_method, context);
     };
 
     ColumnsDescription columns;
@@ -543,7 +597,7 @@ public:
                 }
 
                 if (!read_buf)
-                    read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->table_fd, storage->compression_method, context);
+                    read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->getName(), storage->table_fd, storage->compression_method, context);
 
                 auto format
                     = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings);

From 6979dc9f2f9fa031108d8758bbcb85f1d5661013 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Sun, 11 Dec 2022 17:36:30 -0500
Subject: [PATCH 053/262] dummy fix, additional test

---
 src/Storages/StorageFile.cpp                                    | 2 +-
 .../0_stateless/02497_storage_file_reader_selection.reference   | 2 ++
 .../queries/0_stateless/02497_storage_file_reader_selection.sh  | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 1c61370a392..c425cf4e686 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -253,7 +253,7 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
 
         ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
     }
-    else
+    else if (mmap_failed)
     {
         if (use_table_fd)
             nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
index 39e2f2f6f5e..8da37e4219c 100644
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference
@@ -2,7 +2,9 @@ key
 foo
 bar
 1
+0
 key
 foo
 bar
 0
+1
diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
index 0e27146103e..4d9336bc1a0 100755
--- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
+++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh
@@ -15,6 +15,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data',
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
+$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'"
 
 QUERY_ID=$RANDOM
 $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \
@@ -23,3 +24,4 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data',
 
 $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
 $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'"
+$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'"

From 5c52f26823c0cadcec874ba66458ce592a22bb90 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Mon, 12 Dec 2022 00:39:08 -0500
Subject: [PATCH 054/262] ya fix.

---
 src/Storages/StorageFile.cpp | 105 +++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 48 deletions(-)

diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index c425cf4e686..1cee3e32e87 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -186,6 +186,62 @@ void checkCreationIsAllowed(
     }
 }
 
+std::unique_ptr<ReadBuffer> selectReadBuffer(
+    const String & current_path,
+    bool use_table_fd,
+    int table_fd,
+    const struct stat & file_stat,
+    ContextPtr context)
+{
+    auto read_method_string = context->getSettingsRef().storage_file_read_method.value;
+    LocalFSReadMethod read_method;
+    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method_string))
+        read_method = *opt_method;
+    else
+        throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD);
+
+    if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap)
+    {
+        try
+        {
+            std::unique_ptr<ReadBufferFromFileBase> res;
+            if (use_table_fd)
+                res = std::make_unique<MMapReadBufferFromFileDescriptor>(table_fd, 0);
+            else
+                res = std::make_unique<MMapReadBufferFromFile>(current_path, 0);
+
+            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
+            return res;
+        }
+        catch (const ErrnoException &)
+        {
+            /// Fallback if mmap is not supported.
+            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed);
+        }
+    }
+
+    std::unique_ptr<ReadBufferFromFileBase> res;
+    if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || read_method == LocalFSReadMethod::mmap))
+    {
+        if (use_table_fd)
+            res = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
+        else
+            res = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
+
+        ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
+    }
+    else
+    {
+        if (use_table_fd)
+            res = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
+        else
+            res = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
+
+        ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
+    }
+    return res;
+}
+
 std::unique_ptr<ReadBuffer> createReadBuffer(
     const String & current_path,
     bool use_table_fd,
@@ -194,16 +250,8 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
     const String & compression_method,
     ContextPtr context)
 {
-    std::unique_ptr<ReadBuffer> nested_buffer;
     CompressionMethod method;
 
-    auto read_method_string = context->getSettingsRef().storage_file_read_method.value;
-    LocalFSReadMethod read_method;
-    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method_string))
-        read_method = *opt_method;
-    else
-        throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD);
-
     struct stat file_stat{};
 
     if (use_table_fd)
@@ -223,46 +271,7 @@ std::unique_ptr<ReadBuffer> createReadBuffer(
         method = chooseCompressionMethod(current_path, compression_method);
     }
 
-
-    bool mmap_failed = false;
-    if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap)
-    {
-        try
-        {
-            if (use_table_fd)
-                nested_buffer = std::make_unique<MMapReadBufferFromFileDescriptor>(table_fd, 0);
-            else
-                nested_buffer = std::make_unique<MMapReadBufferFromFile>(current_path, 0);
-
-            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
-        }
-        catch (const ErrnoException &)
-        {
-            /// Fallback if mmap is not supported.
-            ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed);
-            mmap_failed = true;
-        }
-    }
-
-    if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || mmap_failed))
-    {
-        if (use_table_fd)
-            nested_buffer = std::make_unique<ReadBufferFromFileDescriptorPRead>(table_fd);
-        else
-            nested_buffer = std::make_unique<ReadBufferFromFilePRead>(current_path, context->getSettingsRef().max_read_buffer_size);
-
-        ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
-    }
-    else if (mmap_failed)
-    {
-        if (use_table_fd)
-            nested_buffer = std::make_unique<ReadBufferFromFileDescriptor>(table_fd);
-        else
-            nested_buffer = std::make_unique<ReadBufferFromFile>(current_path, context->getSettingsRef().max_read_buffer_size);
-
-        ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary);
-    }
-
+    std::unique_ptr<ReadBuffer> nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context);
 
     /// For clickhouse-local and clickhouse-client add progress callback to display progress bar.
     if (context->getApplicationType() == Context::ApplicationType::LOCAL

From fef57b4e3b615c8ec80ab4c116a2afd9c8b70f91 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 14 Dec 2022 02:12:08 +0000
Subject: [PATCH 055/262] Better thread pool

---
 src/Core/Settings.h                           |  2 --
 src/Storages/System/StorageSystemReplicas.cpp | 27 +++++++++++++++----
 2 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4be91ce311f..91647a5f165 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -155,8 +155,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     \
     M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \
     \
-    M(UInt64, system_replicas_fetch_threads, 16, "The maximum number of threads to fetch data for system.replicas table.", 0) \
-    \
     M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \
     \
     M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard; if set to 1 - SELECT is executed on each shard; if set to 2 - SELECT and INSERT are executed on each shard", 0) \
diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index a6afb6eff0d..d36de9afe12 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -10,6 +10,7 @@
 #include <Common/typeid_cast.h>
 #include <Databases/IDatabase.h>
 #include <Processors/Sources/SourceFromSingleChunk.h>
+#include <Common/getNumberOfPhysicalCPUCores.h>
 
 
 namespace DB
@@ -151,30 +152,46 @@ Pipe StorageSystemReplicas::read(
 
     MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns();
 
-    auto settings = context->getSettingsRef();
-    size_t thread_pool_size = settings.system_replicas_fetch_threads;
+    size_t tables_size = col_database->size();
+    size_t thread_pool_size = std::min(tables_size, static_cast<size_t>(getNumberOfPhysicalCPUCores()));
 
+    auto settings = context->getSettingsRef();
     if (settings.max_threads != 0)
         thread_pool_size = std::min(thread_pool_size, static_cast<size_t>(settings.max_threads));
 
     ThreadPool thread_pool(thread_pool_size);
+    std::atomic<bool> error_flag = false;
+    Exception exception;
 
-    size_t tables_size = col_database->size();
     std::vector<StorageReplicatedMergeTree::Status> statuses(tables_size);
 
     for (size_t i = 0; i < tables_size; ++i)
     {
-        thread_pool.scheduleOrThrowOnError([i, &statuses, &replicated_tables, &col_database, &col_table, &with_zk_fields]
+        thread_pool.scheduleOrThrowOnError([&, i=i]
         {
-            dynamic_cast<StorageReplicatedMergeTree &>(
+            try
+            {
+                dynamic_cast<StorageReplicatedMergeTree &>(
                 *replicated_tables
                     [(*col_database)[i].safeGet<const String &>()]
                     [(*col_table)[i].safeGet<const String &>()]).getStatus(statuses[i], with_zk_fields);
+            }
+            catch (...)
+            {
+                tryLogCurrentException("system.replicas", "Failed to fetch system.replicas data");
+
+                /// We capture one of the exceptions to be thrown later
+                if (!error_flag.exchange(true))
+                    exception = Exception(getCurrentExceptionCode(), getCurrentExceptionMessage(false));
+            }
         });
     }
 
     thread_pool.wait();
 
+    if (error_flag)
+        throw exception;
+
     for (const auto & status: statuses)
     {
         size_t col_num = 3;

From b2f3b236d8e36fd3c2b2e050df69300c47fa67c4 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 14 Dec 2022 12:23:08 +0000
Subject: [PATCH 056/262] Slightly better thread pool

---
 src/Storages/System/StorageSystemReplicas.cpp | 31 +++++++------------
 1 file changed, 11 insertions(+), 20 deletions(-)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index d36de9afe12..51e60c779b8 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -153,45 +153,36 @@ Pipe StorageSystemReplicas::read(
     MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns();
 
     size_t tables_size = col_database->size();
-    size_t thread_pool_size = std::min(tables_size, static_cast<size_t>(getNumberOfPhysicalCPUCores()));
+    std::vector<StorageReplicatedMergeTree::Status> statuses(tables_size);
 
+    size_t thread_pool_size = std::min(tables_size, static_cast<size_t>(getNumberOfPhysicalCPUCores()));
     auto settings = context->getSettingsRef();
     if (settings.max_threads != 0)
         thread_pool_size = std::min(thread_pool_size, static_cast<size_t>(settings.max_threads));
 
     ThreadPool thread_pool(thread_pool_size);
-    std::atomic<bool> error_flag = false;
-    Exception exception;
-
-    std::vector<StorageReplicatedMergeTree::Status> statuses(tables_size);
 
     for (size_t i = 0; i < tables_size; ++i)
     {
-        thread_pool.scheduleOrThrowOnError([&, i=i]
+        try
         {
-            try
+            thread_pool.scheduleOrThrowOnError([&, i=i]
             {
                 dynamic_cast<StorageReplicatedMergeTree &>(
                 *replicated_tables
                     [(*col_database)[i].safeGet<const String &>()]
                     [(*col_table)[i].safeGet<const String &>()]).getStatus(statuses[i], with_zk_fields);
-            }
-            catch (...)
-            {
-                tryLogCurrentException("system.replicas", "Failed to fetch system.replicas data");
-
-                /// We capture one of the exceptions to be thrown later
-                if (!error_flag.exchange(true))
-                    exception = Exception(getCurrentExceptionCode(), getCurrentExceptionMessage(false));
-            }
-        });
+            });
+        }
+        catch (...)
+        {
+            thread_pool.wait();
+            throw;
+        }
     }
 
     thread_pool.wait();
 
-    if (error_flag)
-        throw exception;
-
     for (const auto & status: statuses)
     {
         size_t col_num = 3;

From 2aff560218bd64ae7753c5bb8d776016740282a9 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 14 Dec 2022 12:50:12 +0000
Subject: [PATCH 057/262] Even better thread pool

---
 src/Storages/System/StorageSystemReplicas.cpp | 20 ++++++-------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp
index 51e60c779b8..7d0970620bc 100644
--- a/src/Storages/System/StorageSystemReplicas.cpp
+++ b/src/Storages/System/StorageSystemReplicas.cpp
@@ -164,21 +164,13 @@ Pipe StorageSystemReplicas::read(
 
     for (size_t i = 0; i < tables_size; ++i)
     {
-        try
+        thread_pool.scheduleOrThrowOnError([&, i=i]
         {
-            thread_pool.scheduleOrThrowOnError([&, i=i]
-            {
-                dynamic_cast<StorageReplicatedMergeTree &>(
-                *replicated_tables
-                    [(*col_database)[i].safeGet<const String &>()]
-                    [(*col_table)[i].safeGet<const String &>()]).getStatus(statuses[i], with_zk_fields);
-            });
-        }
-        catch (...)
-        {
-            thread_pool.wait();
-            throw;
-        }
+            dynamic_cast<StorageReplicatedMergeTree &>(
+            *replicated_tables
+                [(*col_database)[i].safeGet<const String &>()]
+                [(*col_table)[i].safeGet<const String &>()]).getStatus(statuses[i], with_zk_fields);
+        });
     }
 
     thread_pool.wait();

From e2ced517dd8baf81f44cc5900d0443ac3aaf8790 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Wed, 14 Dec 2022 13:39:23 +0000
Subject: [PATCH 058/262] Add integration test test_kerberos_auth

---
 .../compose/docker_compose_kerberos_kdc.yml   |  11 ++
 tests/integration/helpers/cluster.py          |  65 ++++++++-
 .../test_kerberos_auth/__init__.py            |   0
 .../clickhouse_path/EMPTY_DIR                 |   0
 .../test_kerberos_auth/configs/kerberos.xml   |   6 +
 .../test_kerberos_auth/configs/users.xml      |  19 +++
 .../kerberos_image_config.sh                  | 126 ++++++++++++++++++
 .../test_kerberos_auth/secrets/krb.conf       |  22 +++
 tests/integration/test_kerberos_auth/test.py  |  63 +++++++++
 9 files changed, 310 insertions(+), 2 deletions(-)
 create mode 100644 docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
 create mode 100644 tests/integration/test_kerberos_auth/__init__.py
 create mode 100644 tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR
 create mode 100644 tests/integration/test_kerberos_auth/configs/kerberos.xml
 create mode 100644 tests/integration/test_kerberos_auth/configs/users.xml
 create mode 100644 tests/integration/test_kerberos_auth/kerberos_image_config.sh
 create mode 100644 tests/integration/test_kerberos_auth/secrets/krb.conf
 create mode 100644 tests/integration/test_kerberos_auth/test.py

diff --git a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
new file mode 100644
index 00000000000..e06b1c71bb7
--- /dev/null
+++ b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
@@ -0,0 +1,11 @@
+version: '2.3'
+
+services:
+  kerberoskdc:
+    image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
+    hostname: kerberoskdc
+    volumes:
+      - ${KERBEROS_AUTH_DIR}/secrets:/tmp/keytab
+      - ${KERBEROS_AUTH_DIR}/../../kerberos_image_config.sh:/config.sh
+      - /dev/urandom:/dev/random
+    ports: [88, 749]
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 95d405266ae..7557e8412d2 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -388,6 +388,7 @@ class ClickHouseCluster:
         self.with_postgres_cluster = False
         self.with_kafka = False
         self.with_kerberized_kafka = False
+        self.with_kerberos_kdc = False
         self.with_rabbitmq = False
         self.with_nats = False
         self.with_odbc_drivers = False
@@ -455,6 +456,12 @@ class ClickHouseCluster:
             self.kerberized_kafka_host
         )
 
+        # available when with_kerberos_kdc == True
+        self.kerberos_kdc_host = "kerberoskdc"
+        self.keberos_docker_id = self.get_instance_docker_id(
+            self.kerberos_kdc_host
+        )
+
         # available when with_mongo == True
         self.mongo_host = "mongo1"
         self.mongo_port = get_free_port()
@@ -1059,6 +1066,31 @@ class ClickHouseCluster:
         ]
         return self.base_kerberized_kafka_cmd
 
+
+    def setup_kerberos_cmd(
+        self, instance, env_variables, docker_compose_yml_dir
+    ):
+        self.with_kerberos_kdc = True
+        env_variables["KERBEROS_AUTH_DIR"] = instance.path + "/"
+        env_variables["KERBEROS_KDC_HOST"] = self.kerberos_kdc_host
+        self.base_cmd.extend(
+            [
+                "--file",
+                p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"),
+            ]
+        )
+        self.base_kerberos_cmd = [
+            "docker-compose",
+            "--env-file",
+            instance.env_file,
+            "--project-name",
+            self.project_name,
+            "--file",
+            p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"),
+        ]
+        return self.base_kerberos_cmd
+
+
     def setup_redis_cmd(self, instance, env_variables, docker_compose_yml_dir):
         self.with_redis = True
         env_variables["REDIS_HOST"] = self.redis_host
@@ -1329,6 +1361,7 @@ class ClickHouseCluster:
         with_mysql_cluster=False,
         with_kafka=False,
         with_kerberized_kafka=False,
+        with_kerberos_kdc=False,
         with_rabbitmq=False,
         with_nats=False,
         clickhouse_path_dir=None,
@@ -1420,6 +1453,7 @@ class ClickHouseCluster:
             with_mysql_cluster=with_mysql_cluster,
             with_kafka=with_kafka,
             with_kerberized_kafka=with_kerberized_kafka,
+            with_kerberos_kdc=with_kerberos_kdc,
             with_rabbitmq=with_rabbitmq,
             with_nats=with_nats,
             with_nginx=with_nginx,
@@ -1554,6 +1588,13 @@ class ClickHouseCluster:
                 )
             )
 
+        if with_kerberos_kdc and not self.with_kerberos_kdc:
+            cmds.append(
+                self.setup_kerberos_cmd(
+                    instance, env_variables, docker_compose_yml_dir
+                )
+            )
+
         if with_rabbitmq and not self.with_rabbitmq:
             cmds.append(
                 self.setup_rabbitmq_cmd(instance, env_variables, docker_compose_yml_dir)
@@ -2124,6 +2165,11 @@ class ClickHouseCluster:
                 logging.debug("Waiting for Kafka to start up")
                 time.sleep(1)
 
+    def wait_kerberos_kdc_is_available(self, kafka_docker_id):
+        logging.debug("Waiting for Kerberos KDC to start up")
+        # temp code: sleep 50 seconds
+        time.sleep(50)
+
     def wait_hdfs_to_start(self, timeout=300, check_marker=False):
         start = time.time()
         while time.time() - start < timeout:
@@ -2473,6 +2519,16 @@ class ClickHouseCluster:
                     self.kerberized_kafka_docker_id, self.kerberized_kafka_port, 100
                 )
 
+            if self.with_kerberos_kdc and self.base_kerberos_cmd:
+                logging.debug("Setup Kerberos KDC")
+                run_and_check(
+                    self.base_kerberos_cmd
+                    + common_opts
+                    + ["--renew-anon-volumes"]
+                )
+                self.up_called = True
+                self.wait_kerberos_kdc_is_available(self.keberos_docker_id)
+
             if self.with_rabbitmq and self.base_rabbitmq_cmd:
                 logging.debug("Setup RabbitMQ")
                 os.makedirs(self.rabbitmq_logs_dir)
@@ -2872,6 +2928,7 @@ class ClickHouseInstance:
         with_mysql_cluster,
         with_kafka,
         with_kerberized_kafka,
+        with_kerberos_kdc,
         with_rabbitmq,
         with_nats,
         with_nginx,
@@ -2955,6 +3012,7 @@ class ClickHouseInstance:
         self.with_postgres_cluster = with_postgres_cluster
         self.with_kafka = with_kafka
         self.with_kerberized_kafka = with_kerberized_kafka
+        self.with_kerberos_kdc = with_kerberos_kdc
         self.with_rabbitmq = with_rabbitmq
         self.with_nats = with_nats
         self.with_nginx = with_nginx
@@ -2988,7 +3046,7 @@ class ClickHouseInstance:
         else:
             self.odbc_ini_path = ""
 
-        if with_kerberized_kafka or with_kerberized_hdfs:
+        if with_kerberized_kafka or with_kerberized_hdfs or with_kerberos_kdc:
             self.keytab_path = (
                 "- "
                 + os.path.dirname(self.docker_compose_path)
@@ -3906,7 +3964,7 @@ class ClickHouseInstance:
         if self.with_zookeeper:
             shutil.copy(self.zookeeper_config_path, conf_d_dir)
 
-        if self.with_kerberized_kafka or self.with_kerberized_hdfs:
+        if self.with_kerberized_kafka or self.with_kerberized_hdfs or self.with_kerberos_kdc:
             shutil.copytree(
                 self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets"))
             )
@@ -3978,6 +4036,9 @@ class ClickHouseInstance:
         if self.with_kerberized_kafka:
             depends_on.append("kerberized_kafka1")
 
+        if self.with_kerberos_kdc:
+            depends_on.append("kerberoskdc")
+
         if self.with_kerberized_hdfs:
             depends_on.append("kerberizedhdfs1")
 
diff --git a/tests/integration/test_kerberos_auth/__init__.py b/tests/integration/test_kerberos_auth/__init__.py
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR b/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/integration/test_kerberos_auth/configs/kerberos.xml b/tests/integration/test_kerberos_auth/configs/kerberos.xml
new file mode 100644
index 00000000000..5b6be45e78e
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/configs/kerberos.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+    <kerberos>
+        <realm>TEST.CLICKHOUSE.TECH</realm>
+        <keytab>/tmp/keytab/clickhouse.keytab</keytab>
+    </kerberos>
+</clickhouse>
diff --git a/tests/integration/test_kerberos_auth/configs/users.xml b/tests/integration/test_kerberos_auth/configs/users.xml
new file mode 100644
index 00000000000..33d658e6335
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/configs/users.xml
@@ -0,0 +1,19 @@
+<clickhouse>
+    <profiles>
+        <default>
+        </default>
+    </profiles>
+    <users>
+        <kuser>
+            <kerberos>
+                <realm>TEST.CLICKHOUSE.TECH</realm>
+            </kerberos>
+            <access_management>1</access_management>
+            <networks replace="replace">
+                <ip>::/0</ip>
+            </networks>
+            <profile>default</profile>
+            <quota>default</quota>
+        </kuser>
+    </users>
+</clickhouse>
diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
new file mode 100644
index 00000000000..c99ce1def9f
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+
+
+set -x # trace
+
+: "${REALM:=TEST.CLICKHOUSE.TECH}"
+: "${DOMAIN_REALM:=test.clickhouse.com}"
+: "${KERB_MASTER_KEY:=masterkey}"
+: "${KERB_ADMIN_USER:=admin}"
+: "${KERB_ADMIN_PASS:=admin}"
+
+create_config() {
+  : "${KDC_ADDRESS:=$(hostname -f)}"
+
+  cat>/etc/krb5.conf<<EOF
+[logging]
+ default = FILE:/var/log/kerberos/krb5libs.log
+ kdc = FILE:/var/log/kerberos/krb5kdc.log
+ admin_server = FILE:/var/log/kerberos/kadmind.log
+
+[libdefaults]
+ default_realm = $REALM
+ dns_lookup_realm = false
+ dns_lookup_kdc = false
+ ticket_lifetime = 15s
+ renew_lifetime = 15s
+ forwardable = true
+ # WARNING: We use weaker key types to simplify testing as stronger key types
+ # require the enhanced security JCE policy file to be installed. You should
+ # NOT run with this configuration in production or any real environment. You
+ # have been warned.
+ default_tkt_enctypes = des-cbc-md5 des-cbc-crc des3-cbc-sha1
+ default_tgs_enctypes = des-cbc-md5 des-cbc-crc des3-cbc-sha1
+ permitted_enctypes = des-cbc-md5 des-cbc-crc des3-cbc-sha1
+
+[realms]
+ $REALM = {
+  kdc = $KDC_ADDRESS
+  admin_server = $KDC_ADDRESS
+ }
+
+[domain_realm]
+ .$DOMAIN_REALM = $REALM
+ $DOMAIN_REALM = $REALM
+EOF
+
+cat>/var/kerberos/krb5kdc/kdc.conf<<EOF
+[kdcdefaults]
+ kdc_ports = 88
+ kdc_tcp_ports = 88
+
+[realms]
+ $REALM = {
+  acl_file = /var/kerberos/krb5kdc/kadm5.acl
+  dict_file = /usr/share/dict/words
+  admin_keytab = /var/kerberos/krb5kdc/kadm5.keytab
+  # WARNING: We use weaker key types to simplify testing as stronger key types
+  # require the enhanced security JCE policy file to be installed. You should
+  # NOT run with this configuration in production or any real environment. You
+  # have been warned.
+  master_key_type = des3-hmac-sha1
+  supported_enctypes = arcfour-hmac:normal des3-hmac-sha1:normal des-cbc-crc:normal des:normal des:v4 des:norealm des:onlyrealm des:afs3
+  default_principal_flags = +preauth
+ }
+EOF
+}
+
+create_db() {
+  /usr/sbin/kdb5_util -P $KERB_MASTER_KEY -r $REALM create -s
+}
+
+start_kdc() {
+  mkdir -p /var/log/kerberos
+
+  /etc/rc.d/init.d/krb5kdc start
+  /etc/rc.d/init.d/kadmin start
+
+  chkconfig krb5kdc on
+  chkconfig kadmin on
+}
+
+restart_kdc() {
+  /etc/rc.d/init.d/krb5kdc restart
+  /etc/rc.d/init.d/kadmin restart
+}
+
+create_admin_user() {
+  kadmin.local -q "addprinc -pw $KERB_ADMIN_PASS $KERB_ADMIN_USER/admin"
+  echo "*/admin@$REALM *" > /var/kerberos/krb5kdc/kadm5.acl
+}
+
+create_keytabs() {
+  rm /tmp/keytab/*.keytab
+
+  kadmin.local -q "addprinc -randkey kuser@${REALM}"
+  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kuser.keytab kuser@${REALM}"
+
+  kadmin.local -q "addprinc -randkey HTTP/instance@${REALM}"
+  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab HTTP/instance@${REALM}"
+
+  chmod g+r /tmp/keytab/kuser.keytab
+  chmod g+r /tmp/keytab/clickhouse.keytab
+}
+
+main() {
+
+  if [ ! -f /kerberos_initialized ]; then
+    create_config
+    create_db
+    create_admin_user
+    start_kdc
+
+    touch /kerberos_initialized
+  fi
+
+  if [ ! -f /var/kerberos/krb5kdc/principal ]; then
+    while true; do sleep 1000; done
+  else
+    start_kdc
+		create_keytabs
+    tail -F /var/log/kerberos/krb5kdc.log
+  fi
+
+}
+
+[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@"
diff --git a/tests/integration/test_kerberos_auth/secrets/krb.conf b/tests/integration/test_kerberos_auth/secrets/krb.conf
new file mode 100644
index 00000000000..87520f65b2d
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/secrets/krb.conf
@@ -0,0 +1,22 @@
+[logging]
+ default = FILE:/var/log/kerberos/krb5libs.log
+ kdc = FILE:/var/log/kerberos/krb5kdc.log
+ admin_server = FILE:/var/log/kerberos/kadmind.log
+
+[libdefaults]
+ default_realm = TEST.CLICKHOUSE.TECH
+ dns_lookup_realm = false
+ dns_lookup_kdc = false
+ ticket_lifetime = 15s
+ renew_lifetime = 15s
+ forwardable = true
+
+[realms]
+ TEST.CLICKHOUSE.TECH = {
+  kdc = kerberoskdc
+  admin_server = kerberoskdc
+ }
+
+[domain_realm]
+ .TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH
+ TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH
diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
new file mode 100644
index 00000000000..f4c30a69c9f
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -0,0 +1,63 @@
+import os.path as p
+import random
+import threading
+import time
+import pytest
+import logging
+
+from helpers.cluster import ClickHouseCluster
+from helpers.test_tools import TSV
+from helpers.client import QueryRuntimeException
+
+import json
+import subprocess
+
+import socket
+
+cluster = ClickHouseCluster(__file__)
+instance = cluster.add_instance(
+    "instance",
+    main_configs=["configs/kerberos.xml"],
+    user_configs=["configs/users.xml"],
+    with_kerberos_kdc=True,
+    clickhouse_path_dir="clickhouse_path",
+)
+
+
+# Fixtures
+
+
+@pytest.fixture(scope="module")
+def kerberos_cluster():
+    try:
+        cluster.start()
+        yield cluster
+    finally:
+        cluster.shutdown()
+
+
+@pytest.fixture(autouse=True)
+def kerberos_setup_teardown():
+    yield  # run test
+
+
+# Tests
+
+
+def test_kerberos_auth_with_keytab(kerberos_cluster):
+    logging.debug("kerberos test")
+    instance.exec_in_container(
+        ["bash", "-c", "kinit -V -k -t /tmp/keytab/kuser.keytab kuser"]
+    )
+    assert (
+        instance.exec_in_container(
+            ["bash", "-c", "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(instance.hostname)]
+        )
+        == "kuser\n"
+    )
+
+
+if __name__ == "__main__":
+    cluster.start()
+    input("Cluster created, press any key to destroy...")
+    cluster.shutdown()

From 5491aa9627de96c89147229031546edf9c2d7ce9 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Thu, 15 Dec 2022 11:50:23 +0000
Subject: [PATCH 059/262] Implement wait_kerberos_kdc_is_available

---
 tests/integration/helpers/cluster.py | 36 +++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index 7557e8412d2..ba5d02095f3 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -206,6 +206,22 @@ def check_kafka_is_available(kafka_id, kafka_port):
     p.communicate()
     return p.returncode == 0
 
+def check_kerberos_kdc_is_available(kerberos_kdc_id):
+    p = subprocess.Popen(
+        (
+            "docker",
+            "exec",
+            "-i",
+            kerberos_kdc_id,
+            "/etc/rc.d/init.d/krb5kdc",
+            "status",
+        ),
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    )
+    p.communicate()
+    return p.returncode == 0
+
 
 def check_rabbitmq_is_available(rabbitmq_id):
     p = subprocess.Popen(
@@ -458,7 +474,7 @@ class ClickHouseCluster:
 
         # available when with_kerberos_kdc == True
         self.kerberos_kdc_host = "kerberoskdc"
-        self.keberos_docker_id = self.get_instance_docker_id(
+        self.keberos_kdc_docker_id = self.get_instance_docker_id(
             self.kerberos_kdc_host
         )
 
@@ -2165,10 +2181,18 @@ class ClickHouseCluster:
                 logging.debug("Waiting for Kafka to start up")
                 time.sleep(1)
 
-    def wait_kerberos_kdc_is_available(self, kafka_docker_id):
-        logging.debug("Waiting for Kerberos KDC to start up")
-        # temp code: sleep 50 seconds
-        time.sleep(50)
+    def wait_kerberos_kdc_is_available(self, kerberos_kdc_docker_id, max_retries=50):
+        retries = 0
+        while True:
+            if check_kerberos_kdc_is_available(kerberos_kdc_docker_id):
+                break
+            else:
+                retries += 1
+                if retries > max_retries:
+                    raise Exception("Kerberos KDC is not available")
+                logging.debug("Waiting for Kerberos KDC to start up")
+                time.sleep(1)
+
 
     def wait_hdfs_to_start(self, timeout=300, check_marker=False):
         start = time.time()
@@ -2527,7 +2551,7 @@ class ClickHouseCluster:
                     + ["--renew-anon-volumes"]
                 )
                 self.up_called = True
-                self.wait_kerberos_kdc_is_available(self.keberos_docker_id)
+                self.wait_kerberos_kdc_is_available(self.keberos_kdc_docker_id)
 
             if self.with_rabbitmq and self.base_rabbitmq_cmd:
                 logging.debug("Setup RabbitMQ")

From b7df68476246dfd83ca27aa591b673fe2e59b36c Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Thu, 15 Dec 2022 18:08:19 -0500
Subject: [PATCH 060/262] Enum settings, fix else branch.

---
 src/Core/Settings.h                           | 2 +-
 src/Core/SettingsEnums.cpp                    | 5 +++++
 src/Core/SettingsEnums.h                      | 3 +++
 src/Disks/IO/createReadBufferFromFileBase.cpp | 2 +-
 src/Storages/StorageFile.cpp                  | 7 +------
 5 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index a9f4280a0d1..f0cb1be73eb 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -591,7 +591,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
-    M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0) \
+    M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap", 0) \
     M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 632587106a1..26fcff1d410 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -162,4 +162,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS,
 IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
     {{"clickhouse", Dialect::clickhouse},
      {"kusto", Dialect::kusto}})
+
+IMPLEMENT_SETTING_ENUM(StorageFileReadMethod, ErrorCodes::BAD_ARGUMENTS,
+    {{"mmap", LocalFSReadMethod::mmap},
+     {"pread", LocalFSReadMethod::pread},
+     {"read", LocalFSReadMethod::read}})
 }
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 97c4275c4d2..236bc7e9b10 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -4,6 +4,7 @@
 #include <Core/Joins.h>
 #include <QueryPipeline/SizeLimits.h>
 #include <Formats/FormatSettings.h>
+#include <IO/ReadSettings.h>
 
 
 namespace DB
@@ -191,4 +192,6 @@ enum class Dialect
 };
 
 DECLARE_SETTING_ENUM(Dialect)
+
+DECLARE_SETTING_ENUM_WITH_RENAME(StorageFileReadMethod, LocalFSReadMethod)
 }
diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index fca05787959..96947ddb242 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -56,7 +56,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
             if (file_size)
                 res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, *file_size);
             else
-                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0, *file_size);
+                res = std::make_unique<MMapReadBufferFromFileWithCache>(*settings.mmap_cache, filename, 0);
 
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap);
             return res;
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 1cee3e32e87..e4eb912bffe 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -193,12 +193,7 @@ std::unique_ptr<ReadBuffer> selectReadBuffer(
     const struct stat & file_stat,
     ContextPtr context)
 {
-    auto read_method_string = context->getSettingsRef().storage_file_read_method.value;
-    LocalFSReadMethod read_method;
-    if (auto opt_method = magic_enum::enum_cast<LocalFSReadMethod>(read_method_string))
-        read_method = *opt_method;
-    else
-        throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD);
+    auto read_method = context->getSettingsRef().storage_file_read_method;
 
     if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap)
     {

From 775c700c2428519fb1ce7f151d5b83229494ebfe Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Fri, 16 Dec 2022 08:20:01 +0000
Subject: [PATCH 061/262] Add second test for the case when there is no keytab

---
 .../configs/kerberos_with_keytab.xml          |  6 +++++
 ...rberos.xml => kerberos_without_keytab.xml} |  1 -
 .../kerberos_image_config.sh                  | 10 ++++---
 tests/integration/test_kerberos_auth/test.py  | 27 ++++++++++++++-----
 4 files changed, 33 insertions(+), 11 deletions(-)
 create mode 100644 tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml
 rename tests/integration/test_kerberos_auth/configs/{kerberos.xml => kerberos_without_keytab.xml} (64%)

diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml
new file mode 100644
index 00000000000..7f4e17438a6
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+    <kerberos>
+        <realm>TEST.CLICKHOUSE.TECH</realm>
+        <keytab>/tmp/keytab/clickhouse1.keytab</keytab>
+    </kerberos>
+</clickhouse>
diff --git a/tests/integration/test_kerberos_auth/configs/kerberos.xml b/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml
similarity index 64%
rename from tests/integration/test_kerberos_auth/configs/kerberos.xml
rename to tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml
index 5b6be45e78e..f01ceea1eb1 100644
--- a/tests/integration/test_kerberos_auth/configs/kerberos.xml
+++ b/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml
@@ -1,6 +1,5 @@
 <clickhouse>
     <kerberos>
         <realm>TEST.CLICKHOUSE.TECH</realm>
-        <keytab>/tmp/keytab/clickhouse.keytab</keytab>
     </kerberos>
 </clickhouse>
diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
index c99ce1def9f..90bbc49f2bf 100644
--- a/tests/integration/test_kerberos_auth/kerberos_image_config.sh
+++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
@@ -95,11 +95,15 @@ create_keytabs() {
   kadmin.local -q "addprinc -randkey kuser@${REALM}"
   kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kuser.keytab kuser@${REALM}"
 
-  kadmin.local -q "addprinc -randkey HTTP/instance@${REALM}"
-  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab HTTP/instance@${REALM}"
+  kadmin.local -q "addprinc -randkey HTTP/instance1@${REALM}"
+  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse1.keytab HTTP/instance1@${REALM}"
+
+  kadmin.local -q "addprinc -randkey HTTP/instance2@${REALM}"
+  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse2.keytab HTTP/instance2@${REALM}"
 
   chmod g+r /tmp/keytab/kuser.keytab
-  chmod g+r /tmp/keytab/clickhouse.keytab
+  chmod g+r /tmp/keytab/clickhouse1.keytab
+  chmod g+r /tmp/keytab/clickhouse2.keytab
 }
 
 main() {
diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index f4c30a69c9f..cdc4bdf78e5 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -15,9 +15,16 @@ import subprocess
 import socket
 
 cluster = ClickHouseCluster(__file__)
-instance = cluster.add_instance(
-    "instance",
-    main_configs=["configs/kerberos.xml"],
+instance1 = cluster.add_instance(
+    "instance1",
+    main_configs=["configs/kerberos_with_keytab.xml"],
+    user_configs=["configs/users.xml"],
+    with_kerberos_kdc=True,
+    clickhouse_path_dir="clickhouse_path",
+)
+instance2 = cluster.add_instance(
+    "instance2",
+    main_configs=["configs/kerberos_without_keytab.xml"],
     user_configs=["configs/users.xml"],
     with_kerberos_kdc=True,
     clickhouse_path_dir="clickhouse_path",
@@ -43,9 +50,7 @@ def kerberos_setup_teardown():
 
 # Tests
 
-
-def test_kerberos_auth_with_keytab(kerberos_cluster):
-    logging.debug("kerberos test")
+def make_auth(instance, user):
     instance.exec_in_container(
         ["bash", "-c", "kinit -V -k -t /tmp/keytab/kuser.keytab kuser"]
     )
@@ -53,10 +58,18 @@ def test_kerberos_auth_with_keytab(kerberos_cluster):
         instance.exec_in_container(
             ["bash", "-c", "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(instance.hostname)]
         )
-        == "kuser\n"
+        == user + "\n"
     )
 
 
+
+def test_kerberos_auth_with_keytab(kerberos_cluster):
+    make_auth(instance1, "kuser")
+
+def test_kerberos_auth_without_keytab(kerberos_cluster):
+    make_auth(instance2, "default")
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From dd8df3347b46c84ebd5a4b03e66c58e4dba7eeff Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 16 Dec 2022 11:11:19 +0100
Subject: [PATCH 062/262] Addressed style review comments and removed bool
 variables assignment from Clone of ASTs  - 40907 Parameterized views as table
 functions

---
 src/Interpreters/ExpressionAnalyzer.cpp             | 2 +-
 src/Interpreters/InterpreterSelectQuery.cpp         | 2 +-
 src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 4 ++--
 src/Interpreters/TranslateQualifiedNamesVisitor.h   | 2 +-
 src/Interpreters/TreeRewriter.cpp                   | 2 +-
 src/Interpreters/TreeRewriter.h                     | 1 -
 src/Parsers/ASTFunction.cpp                         | 2 --
 src/Parsers/ASTSelectQuery.cpp                      | 2 --
 src/Parsers/ASTSelectWithUnionQuery.cpp             | 2 --
 9 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 9a1bf92a28d..f94d7c6682d 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -1952,7 +1952,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
 
                     auto & column_elem
                         = before_where_sample.getByName(query.where()->getColumnName());
-                    /// If the filter column is a constant and not a query parameter, record it.
+                    /// If the filter column is a constant, record it.
                     if (column_elem.column)
                         where_constant_filter_description = ConstantFilterDescription(*column_elem.column);
                 }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 00b66d4402a..104bfd3d037 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -510,7 +510,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         {
             query_info.is_parameterized_view = view->isParameterizedView();
             /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced.
-            /// ad after query is replaced, we use these parameters to substitute in the parameterized view query
+            /// and after query is replaced, we use these parameters to substitute in the parameterized view query
             if (query_info.is_parameterized_view)
             {
                 parameter_values = analyzeFunctionParamValues(query_ptr);
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 20e5b034619..1596cb90a14 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -252,12 +252,12 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                         {
                             std::string column_name = column.name;
                             std::string::size_type pos = 0u;
-                            for (auto parameter : data.parameter_values)
+                            for (const auto & parameter : data.parameter_values)
                             {
                                 if ((pos = column_name.find(parameter.first)) != std::string::npos)
                                 {
                                     String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')");
-                                    column_name.replace(pos,parameter.first.size(),parameter_name);
+                                    column_name.replace(pos, parameter.first.size(), parameter_name);
                                     break;
                                 }
                             }
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h
index 777c6241d19..7fc95a19d1b 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.h
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h
@@ -30,7 +30,7 @@ public:
         bool has_columns;
         NameToNameMap parameter_values;
 
-        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, NameToNameMap parameter_values_ = {})
+        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {})
             : source_columns(source_columns_)
             , tables(tables_)
             , has_columns(has_columns_)
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 611e53ddaf2..13593f9711b 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -362,7 +362,7 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitor<OneTypeMatcher<Repl
 /// Expand asterisks and qualified asterisks with column names.
 /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
 void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set,
-                             const TablesWithColumns & tables_with_columns, NameToNameMap parameter_values = {})
+                             const TablesWithColumns & tables_with_columns, const NameToNameMap & parameter_values = {})
 {
     LogAST log;
     TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns, true/* has_columns */, parameter_values);
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index b60afc6c7fc..1f400588c6e 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -90,7 +90,6 @@ struct TreeRewriterResult
     void collectSourceColumns(bool add_special);
     void collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint);
     Names requiredSourceColumns() const { return required_source_columns.getNames(); }
-    Names sourceColumns() const { return source_columns.getNames(); }
     const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; }
     NameSet getArrayJoinSourceNameSet() const;
     const Scalars & getScalars() const { return scalars; }
diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp
index e1b12a49f29..9668848f0b6 100644
--- a/src/Parsers/ASTFunction.cpp
+++ b/src/Parsers/ASTFunction.cpp
@@ -464,8 +464,6 @@ ASTPtr ASTFunction::clone() const
         res->children.push_back(res->window_definition);
     }
 
-    res->prefer_subquery_to_function_formatting = prefer_subquery_to_function_formatting;
-
     return res;
 }
 
diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp
index 5bbd6161052..d2e55efadb1 100644
--- a/src/Parsers/ASTSelectQuery.cpp
+++ b/src/Parsers/ASTSelectQuery.cpp
@@ -37,8 +37,6 @@ ASTPtr ASTSelectQuery::clone() const
     for (const auto & child : children)
         res->children.push_back(child->clone());
 
-    res->has_query_parameters = has_query_parameters;
-
     return res;
 }
 
diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp
index b0030294727..9550752b1f3 100644
--- a/src/Parsers/ASTSelectWithUnionQuery.cpp
+++ b/src/Parsers/ASTSelectWithUnionQuery.cpp
@@ -23,8 +23,6 @@ ASTPtr ASTSelectWithUnionQuery::clone() const
     res->list_of_modes = list_of_modes;
     res->set_of_modes = set_of_modes;
 
-    res->has_query_parameters = has_query_parameters;
-
     cloneOutputOptions(*res);
     return res;
 }

From 22c2956a067fe7013f6c499d7fde943298be2b79 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 16 Dec 2022 12:19:11 +0100
Subject: [PATCH 063/262] Updated comment on fetching parameter values from
 query and fixed style comment- 40907 Parameterized views as table functions

---
 src/Interpreters/InterpreterSelectQuery.cpp | 3 +++
 src/Interpreters/TreeRewriter.cpp           | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 104bfd3d037..9714596dce8 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -510,6 +510,9 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         {
             query_info.is_parameterized_view = view->isParameterizedView();
             /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced.
+            /// replaceWithSubquery replaces the function child and adds the subquery in its place.
+            /// the parameters are children of function child, if function is replaced the parameters are also gone from tree
+            /// So we need to get the parameters before they are removed from the tree
             /// and after query is replaced, we use these parameters to substitute in the parameterized view query
             if (query_info.is_parameterized_view)
             {
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 13593f9711b..ab3189f39d2 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1409,7 +1409,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
                 if ((pos = column_name.find(parameter.first)) != std::string::npos)
                 {
                     String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')");
-                    column.name.replace(pos,parameter.first.size(),parameter_name);
+                    column.name.replace(pos, parameter.first.size(), parameter_name);
                     break;
                 }
             }

From 2d942af7b4a0fd644b727c27adb9bc90514c02b6 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Fri, 16 Dec 2022 11:55:50 -0500
Subject: [PATCH 064/262] Fix codestyle, fix test.

---
 src/Core/Settings.h                                | 2 +-
 src/Storages/StorageFile.cpp                       | 1 -
 tests/queries/0_stateless/02473_infile_progress.py | 6 +++++-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index e28d01d886e..4c14fea742b 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -592,7 +592,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
-    M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap", 0) \
+    M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
     M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index e4eb912bffe..0140a6110f9 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -79,7 +79,6 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int CANNOT_APPEND_TO_FILE;
     extern const int CANNOT_EXTRACT_TABLE_STRUCTURE;
-    extern const int UNKNOWN_READ_METHOD;
 }
 
 namespace
diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py
index 6c1c32822d3..053175031d2 100755
--- a/tests/queries/0_stateless/02473_infile_progress.py
+++ b/tests/queries/0_stateless/02473_infile_progress.py
@@ -14,7 +14,11 @@ log = None
 # uncomment the line below for debugging
 # log=sys.stdout
 
-with client(name="client>", log=log) as client1:
+with client(
+        name="client>",
+        log=log,
+        command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") + " client --storage_file_read_method=pread"
+) as client1:
     filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv"
 
     client1.expect(prompt)

From 34e669b8b5505d9858f093c210438072b7486ad9 Mon Sep 17 00:00:00 2001
From: pufit <pufit@yandex.ru>
Date: Fri, 16 Dec 2022 12:03:55 -0500
Subject: [PATCH 065/262] Fix test codestyle.

---
 tests/queries/0_stateless/02473_infile_progress.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py
index 053175031d2..28ad2c8413a 100755
--- a/tests/queries/0_stateless/02473_infile_progress.py
+++ b/tests/queries/0_stateless/02473_infile_progress.py
@@ -15,9 +15,10 @@ log = None
 # log=sys.stdout
 
 with client(
-        name="client>",
-        log=log,
-        command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") + " client --storage_file_read_method=pread"
+    name="client>",
+    log=log,
+    command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse")
+    + " client --storage_file_read_method=pread"
 ) as client1:
     filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv"
 

From 64c0c6ae994e1ecc40614500263a773acc8fdf02 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 19 Dec 2022 11:28:49 +0000
Subject: [PATCH 066/262] Refactor test_kerberos_auth test

---
 .../compose/docker_compose_kerberos_kdc.yml   |  4 +-
 tests/integration/helpers/cluster.py          | 57 +++++++++----------
 tests/integration/test_kerberos_auth/test.py  | 48 ++++++++++------
 3 files changed, 61 insertions(+), 48 deletions(-)

diff --git a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
index e06b1c71bb7..3ce9a6df1fb 100644
--- a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
+++ b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml
@@ -5,7 +5,7 @@ services:
     image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest}
     hostname: kerberoskdc
     volumes:
-      - ${KERBEROS_AUTH_DIR}/secrets:/tmp/keytab
-      - ${KERBEROS_AUTH_DIR}/../../kerberos_image_config.sh:/config.sh
+      - ${KERBEROS_KDC_DIR}/secrets:/tmp/keytab
+      - ${KERBEROS_KDC_DIR}/../kerberos_image_config.sh:/config.sh
       - /dev/urandom:/dev/random
     ports: [88, 749]
diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py
index ba5d02095f3..5c93f64e57a 100644
--- a/tests/integration/helpers/cluster.py
+++ b/tests/integration/helpers/cluster.py
@@ -206,6 +206,7 @@ def check_kafka_is_available(kafka_id, kafka_port):
     p.communicate()
     return p.returncode == 0
 
+
 def check_kerberos_kdc_is_available(kerberos_kdc_id):
     p = subprocess.Popen(
         (
@@ -387,6 +388,7 @@ class ClickHouseCluster:
         self.base_mysql_cmd = []
         self.base_kafka_cmd = []
         self.base_kerberized_kafka_cmd = []
+        self.base_kerberos_kdc_cmd = []
         self.base_rabbitmq_cmd = []
         self.base_nats_cmd = []
         self.base_cassandra_cmd = []
@@ -474,9 +476,7 @@ class ClickHouseCluster:
 
         # available when with_kerberos_kdc == True
         self.kerberos_kdc_host = "kerberoskdc"
-        self.keberos_kdc_docker_id = self.get_instance_docker_id(
-            self.kerberos_kdc_host
-        )
+        self.keberos_kdc_docker_id = self.get_instance_docker_id(self.kerberos_kdc_host)
 
         # available when with_mongo == True
         self.mongo_host = "mongo1"
@@ -1082,12 +1082,9 @@ class ClickHouseCluster:
         ]
         return self.base_kerberized_kafka_cmd
 
-
-    def setup_kerberos_cmd(
-        self, instance, env_variables, docker_compose_yml_dir
-    ):
+    def setup_kerberos_cmd(self, instance, env_variables, docker_compose_yml_dir):
         self.with_kerberos_kdc = True
-        env_variables["KERBEROS_AUTH_DIR"] = instance.path + "/"
+        env_variables["KERBEROS_KDC_DIR"] = self.instances_dir + "/"
         env_variables["KERBEROS_KDC_HOST"] = self.kerberos_kdc_host
         self.base_cmd.extend(
             [
@@ -1095,7 +1092,7 @@ class ClickHouseCluster:
                 p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"),
             ]
         )
-        self.base_kerberos_cmd = [
+        self.base_kerberos_kdc_cmd = [
             "docker-compose",
             "--env-file",
             instance.env_file,
@@ -1104,8 +1101,7 @@ class ClickHouseCluster:
             "--file",
             p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"),
         ]
-        return self.base_kerberos_cmd
-
+        return self.base_kerberos_kdc_cmd
 
     def setup_redis_cmd(self, instance, env_variables, docker_compose_yml_dir):
         self.with_redis = True
@@ -1606,9 +1602,7 @@ class ClickHouseCluster:
 
         if with_kerberos_kdc and not self.with_kerberos_kdc:
             cmds.append(
-                self.setup_kerberos_cmd(
-                    instance, env_variables, docker_compose_yml_dir
-                )
+                self.setup_kerberos_cmd(instance, env_variables, docker_compose_yml_dir)
             )
 
         if with_rabbitmq and not self.with_rabbitmq:
@@ -2193,7 +2187,6 @@ class ClickHouseCluster:
                 logging.debug("Waiting for Kerberos KDC to start up")
                 time.sleep(1)
 
-
     def wait_hdfs_to_start(self, timeout=300, check_marker=False):
         start = time.time()
         while time.time() - start < timeout:
@@ -2543,12 +2536,10 @@ class ClickHouseCluster:
                     self.kerberized_kafka_docker_id, self.kerberized_kafka_port, 100
                 )
 
-            if self.with_kerberos_kdc and self.base_kerberos_cmd:
+            if self.with_kerberos_kdc and self.base_kerberos_kdc_cmd:
                 logging.debug("Setup Kerberos KDC")
                 run_and_check(
-                    self.base_kerberos_cmd
-                    + common_opts
-                    + ["--renew-anon-volumes"]
+                    self.base_kerberos_kdc_cmd + common_opts + ["--renew-anon-volumes"]
                 )
                 self.up_called = True
                 self.wait_kerberos_kdc_is_available(self.keberos_kdc_docker_id)
@@ -3071,15 +3062,13 @@ class ClickHouseInstance:
             self.odbc_ini_path = ""
 
         if with_kerberized_kafka or with_kerberized_hdfs or with_kerberos_kdc:
-            self.keytab_path = (
-                "- "
-                + os.path.dirname(self.docker_compose_path)
-                + "/secrets:/tmp/keytab"
-            )
+            if with_kerberos_kdc:
+                base_secrets_dir = self.cluster.instances_dir
+            else:
+                base_secrets_dir = os.path.dirname(self.docker_compose_path)
+            self.keytab_path = "- " + base_secrets_dir + "/secrets:/tmp/keytab"
             self.krb5_conf = (
-                "- "
-                + os.path.dirname(self.docker_compose_path)
-                + "/secrets/krb.conf:/etc/krb5.conf:ro"
+                "- " + base_secrets_dir + "/secrets/krb.conf:/etc/krb5.conf:ro"
             )
         else:
             self.keytab_path = ""
@@ -3988,9 +3977,19 @@ class ClickHouseInstance:
         if self.with_zookeeper:
             shutil.copy(self.zookeeper_config_path, conf_d_dir)
 
-        if self.with_kerberized_kafka or self.with_kerberized_hdfs or self.with_kerberos_kdc:
+        if (
+            self.with_kerberized_kafka
+            or self.with_kerberized_hdfs
+            or self.with_kerberos_kdc
+        ):
+            if self.with_kerberos_kdc:
+                base_secrets_dir = self.cluster.instances_dir
+            else:
+                base_secrets_dir = self.path
             shutil.copytree(
-                self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets"))
+                self.kerberos_secrets_dir,
+                p.abspath(p.join(base_secrets_dir, "secrets")),
+                dirs_exist_ok=True,
             )
 
         if self.with_coredns:
diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index cdc4bdf78e5..3ab9bc0ef12 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -43,31 +43,45 @@ def kerberos_cluster():
         cluster.shutdown()
 
 
-@pytest.fixture(autouse=True)
-def kerberos_setup_teardown():
-    yield  # run test
-
-
 # Tests
 
-def make_auth(instance, user):
-    instance.exec_in_container(
-        ["bash", "-c", "kinit -V -k -t /tmp/keytab/kuser.keytab kuser"]
-    )
-    assert (
-        instance.exec_in_container(
-            ["bash", "-c", "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(instance.hostname)]
-        )
-        == user + "\n"
-    )
 
+def exec_kinit(instance):
+    instance.exec_in_container(
+        ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"]
+    )
 
 
 def test_kerberos_auth_with_keytab(kerberos_cluster):
-    make_auth(instance1, "kuser")
+    exec_kinit(instance1)
+    assert (
+        instance1.exec_in_container(
+            [
+                "bash",
+                "-c",
+                "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(
+                    instance1.hostname
+                ),
+            ]
+        )
+        == "kuser\n"
+    )
+
 
 def test_kerberos_auth_without_keytab(kerberos_cluster):
-    make_auth(instance2, "default")
+    exec_kinit(instance2)
+    assert (
+        "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name."
+        in instance2.exec_in_container(
+            [
+                "bash",
+                "-c",
+                "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(
+                    instance2.hostname
+                ),
+            ]
+        )
+    )
 
 
 if __name__ == "__main__":

From c26c257c57b50cc9f48e9a0b318adbb24990d920 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 19 Dec 2022 11:39:30 +0000
Subject: [PATCH 067/262] Refactor test.py

---
 tests/integration/test_kerberos_auth/test.py | 36 +++++++-------------
 1 file changed, 12 insertions(+), 24 deletions(-)

diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index 3ab9bc0ef12..64df0278c3e 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -46,41 +46,29 @@ def kerberos_cluster():
 # Tests
 
 
-def exec_kinit(instance):
+def make_auth(instance):
     instance.exec_in_container(
         ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"]
     )
+    return instance.exec_in_container(
+        [
+            "bash",
+            "-c",
+            "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(
+                instance.hostname
+            ),
+        ]
+    )
 
 
 def test_kerberos_auth_with_keytab(kerberos_cluster):
-    exec_kinit(instance1)
-    assert (
-        instance1.exec_in_container(
-            [
-                "bash",
-                "-c",
-                "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(
-                    instance1.hostname
-                ),
-            ]
-        )
-        == "kuser\n"
-    )
+    assert make_auth(instance1) == "kuser\n"
 
 
 def test_kerberos_auth_without_keytab(kerberos_cluster):
-    exec_kinit(instance2)
     assert (
         "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name."
-        in instance2.exec_in_container(
-            [
-                "bash",
-                "-c",
-                "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(
-                    instance2.hostname
-                ),
-            ]
-        )
+        in make_auth(instance2)
     )
 
 

From 8a692432de0e6c1712818f8b073fbe0297e281e8 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 19 Dec 2022 11:48:10 +0000
Subject: [PATCH 068/262] Remove unneeded headers from test.py

---
 .../test_kerberos_auth/clickhouse_path/EMPTY_DIR  |  0
 tests/integration/test_kerberos_auth/test.py      | 15 ---------------
 2 files changed, 15 deletions(-)
 delete mode 100644 tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR

diff --git a/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR b/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index 64df0278c3e..df6233e0cbb 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -1,18 +1,5 @@
-import os.path as p
-import random
-import threading
-import time
 import pytest
-import logging
-
 from helpers.cluster import ClickHouseCluster
-from helpers.test_tools import TSV
-from helpers.client import QueryRuntimeException
-
-import json
-import subprocess
-
-import socket
 
 cluster = ClickHouseCluster(__file__)
 instance1 = cluster.add_instance(
@@ -20,14 +7,12 @@ instance1 = cluster.add_instance(
     main_configs=["configs/kerberos_with_keytab.xml"],
     user_configs=["configs/users.xml"],
     with_kerberos_kdc=True,
-    clickhouse_path_dir="clickhouse_path",
 )
 instance2 = cluster.add_instance(
     "instance2",
     main_configs=["configs/kerberos_without_keytab.xml"],
     user_configs=["configs/users.xml"],
     with_kerberos_kdc=True,
-    clickhouse_path_dir="clickhouse_path",
 )
 
 

From 5001cf9fa21121f6a90201b780b9676ca1fe5f95 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 19 Dec 2022 15:05:38 +0100
Subject: [PATCH 069/262] Updated to get column data type based not function ,
 not the query parameter type , added test for the same - 40907 Parameterized
 views as table functions

---
 src/Interpreters/InterpreterSelectQuery.cpp   |  5 +++-
 .../TranslateQualifiedNamesVisitor.cpp        | 10 +++++--
 .../TranslateQualifiedNamesVisitor.h          |  4 ++-
 src/Interpreters/TreeRewriter.cpp             | 20 ++++++++-----
 src/Interpreters/TreeRewriter.h               |  3 +-
 src/Parsers/QueryParameterVisitor.cpp         | 28 ++++++++++++++-----
 src/Parsers/QueryParameterVisitor.h           |  2 ++
 src/Storages/StorageSnapshot.cpp              |  4 +--
 src/Storages/StorageView.cpp                  |  2 ++
 src/Storages/StorageView.h                    |  6 ++++
 .../02428_parameterized_view.reference        |  5 ++++
 .../0_stateless/02428_parameterized_view.sql  |  5 ++++
 12 files changed, 72 insertions(+), 22 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 9714596dce8..8381fee22b1 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -506,6 +506,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it.
         ASTPtr view_table;
         NameToNameMap parameter_values;
+        NameToNameMap parameter_types;
         if (view)
         {
             query_info.is_parameterized_view = view->isParameterizedView();
@@ -518,6 +519,7 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             {
                 parameter_values = analyzeFunctionParamValues(query_ptr);
                 view->setParameterValues(parameter_values);
+                parameter_types = view->getParameterValues();
             }
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView());
             if (query_info.is_parameterized_view)
@@ -535,7 +537,8 @@ InterpreterSelectQuery::InterpreterSelectQuery(
             required_result_column_names,
             table_join,
             query_info.is_parameterized_view,
-            parameter_values);
+            parameter_values,
+            parameter_types);
 
         query_info.syntax_analyzer_result = syntax_analyzer_result;
         context->setDistributed(syntax_analyzer_result->is_remote_storage);
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index 1596cb90a14..ff97eccab58 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -256,9 +256,13 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                             {
                                 if ((pos = column_name.find(parameter.first)) != std::string::npos)
                                 {
-                                    String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')");
-                                    column_name.replace(pos, parameter.first.size(), parameter_name);
-                                    break;
+                                    auto parameter_datatype_iterator = data.parameter_types.find(parameter.first);
+                                    if (parameter_datatype_iterator != data.parameter_types.end())
+                                    {
+                                        String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')");
+                                        column_name.replace(pos, parameter.first.size(), parameter_name);
+                                        break;
+                                    }
                                 }
                             }
                             addIdentifier(columns, table.table, column_name);
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h
index 7fc95a19d1b..6c804ad6c90 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.h
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h
@@ -29,12 +29,14 @@ public:
         std::unordered_set<String> join_using_columns;
         bool has_columns;
         NameToNameMap parameter_values;
+        NameToNameMap parameter_types;
 
-        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {})
+        Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}, const NameToNameMap & parameter_types_ = {})
             : source_columns(source_columns_)
             , tables(tables_)
             , has_columns(has_columns_)
             , parameter_values(parameter_values_)
+            , parameter_types(parameter_types_)
         {}
 
         bool hasColumn(const String & name) const { return source_columns.count(name); }
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index ab3189f39d2..3c7367b869e 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -362,10 +362,11 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitor<OneTypeMatcher<Repl
 /// Expand asterisks and qualified asterisks with column names.
 /// There would be columns in normal form & column aliases after translation. Column & column alias would be normalized in QueryNormalizer.
 void translateQualifiedNames(ASTPtr & query, const ASTSelectQuery & select_query, const NameSet & source_columns_set,
-                             const TablesWithColumns & tables_with_columns, const NameToNameMap & parameter_values = {})
+                             const TablesWithColumns & tables_with_columns, const NameToNameMap & parameter_values = {},
+                             const NameToNameMap & parameter_types = {})
 {
     LogAST log;
-    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns, true/* has_columns */, parameter_values);
+    TranslateQualifiedNamesVisitor::Data visitor_data(source_columns_set, tables_with_columns, true/* has_columns */, parameter_values, parameter_types);
     TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream());
     visitor.visit(query);
 
@@ -1305,7 +1306,8 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     const Names & required_result_columns,
     std::shared_ptr<TableJoin> table_join,
     bool is_parameterized_view,
-    const NameToNameMap parameter_values) const
+    const NameToNameMap parameter_values,
+    const NameToNameMap parameter_types) const
 {
     auto * select_query = query->as<ASTSelectQuery>();
     if (!select_query)
@@ -1343,7 +1345,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
         result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix());
     }
 
-    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values);
+    translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values, parameter_types);
 
     /// Optimizes logical expressions.
     LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform();
@@ -1408,9 +1410,13 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
             {
                 if ((pos = column_name.find(parameter.first)) != std::string::npos)
                 {
-                    String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')");
-                    column.name.replace(pos, parameter.first.size(), parameter_name);
-                    break;
+                    auto parameter_datatype_iterator = parameter_types.find(parameter.first);
+                    if (parameter_datatype_iterator != parameter_types.end())
+                    {
+                        String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')");
+                        column.name.replace(pos, parameter.first.size(), parameter_name);
+                        break;
+                    }
                 }
             }
         }
diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h
index 1f400588c6e..b94043b8983 100644
--- a/src/Interpreters/TreeRewriter.h
+++ b/src/Interpreters/TreeRewriter.h
@@ -131,7 +131,8 @@ public:
         const Names & required_result_columns = {},
         std::shared_ptr<TableJoin> table_join = {},
         bool is_parameterized_view = false,
-        const NameToNameMap parameter_values = {}) const;
+        const NameToNameMap parameter_values = {},
+        const NameToNameMap parameter_types = {}) const;
 
 private:
     static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false);
diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp
index 14750845034..1282c12cce6 100644
--- a/src/Parsers/QueryParameterVisitor.cpp
+++ b/src/Parsers/QueryParameterVisitor.cpp
@@ -10,8 +10,8 @@ namespace DB
 class QueryParameterVisitor
 {
 public:
-    explicit QueryParameterVisitor(NameSet & parameters_name)
-        : query_parameters(parameters_name)
+    explicit QueryParameterVisitor(NameToNameMap & parameters)
+        : query_parameters(parameters)
     {
     }
 
@@ -27,30 +27,44 @@ public:
     }
 
 private:
-    NameSet & query_parameters;
+    NameToNameMap & query_parameters;
 
     void visitQueryParameter(const ASTQueryParameter & query_parameter)
     {
-        query_parameters.insert(query_parameter.name);
+        query_parameters[query_parameter.name]= query_parameter.type;
     }
 };
 
 
 NameSet analyzeReceiveQueryParams(const std::string & query)
 {
-    NameSet query_params;
+    NameToNameMap query_params;
     const char * query_begin = query.data();
     const char * query_end = query.data() + query.size();
 
     ParserQuery parser(query_end);
     ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, 0);
     QueryParameterVisitor(query_params).visit(extract_query_ast);
-    return query_params;
+
+    NameSet query_param_names;
+    for (const auto & query_param : query_params)
+        query_param_names.insert(query_param.first);
+    return query_param_names;
 }
 
 NameSet analyzeReceiveQueryParams(const ASTPtr & ast)
 {
-    NameSet query_params;
+    NameToNameMap query_params;
+    QueryParameterVisitor(query_params).visit(ast);
+    NameSet query_param_names;
+    for (const auto & query_param : query_params)
+        query_param_names.insert(query_param.first);
+    return query_param_names;
+}
+
+NameToNameMap analyzeReceiveQueryParamsWithType(const ASTPtr & ast)
+{
+    NameToNameMap query_params;
     QueryParameterVisitor(query_params).visit(ast);
     return query_params;
 }
diff --git a/src/Parsers/QueryParameterVisitor.h b/src/Parsers/QueryParameterVisitor.h
index 6d9d49e1ed2..40b2fa6978f 100644
--- a/src/Parsers/QueryParameterVisitor.h
+++ b/src/Parsers/QueryParameterVisitor.h
@@ -13,4 +13,6 @@ NameSet analyzeReceiveQueryParams(const std::string & query);
 
 NameSet analyzeReceiveQueryParams(const ASTPtr & ast);
 
+NameToNameMap analyzeReceiveQueryParamsWithType(const ASTPtr & ast);
+
 }
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index 8dfb7b288e3..b88e07d93b8 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -119,10 +119,10 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const
     const auto & columns = getMetadataForQuery()->getColumns();
     for (const auto & name : column_names)
     {
-        std::string column_name  = name;
+        const std::string & column_name  = name;
         std::string substituted_column_name = name;
         std::string::size_type pos = 0u;
-        for (auto parameter : parameter_values)
+        for (const auto & parameter : parameter_values)
         {
             if ((pos = substituted_column_name.find("_CAST(" + parameter.second)) != std::string::npos)
             {
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index d9e79607ce4..f9ab6b0bc40 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -24,6 +24,7 @@
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 
 #include <Interpreters/ReplaceQueryParameterVisitor.h>
+#include <Parsers/QueryParameterVisitor.h>
 
 namespace DB
 {
@@ -102,6 +103,7 @@ StorageView::StorageView(
 
     description.inner_query = query.select->ptr();
     is_parameterized_view = query.isParameterizedView();
+    parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query);
     storage_metadata.setSelectQuery(description);
     setInMemoryMetadata(storage_metadata);
 }
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index e913e98901f..756106a95d1 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -50,9 +50,15 @@ public:
         parameter_values = parameter_values_;
     }
 
+    NameToNameMap getParameterValues() const
+    {
+        return parameter_types;
+    }
+
 protected:
     bool is_parameterized_view;
     NameToNameMap parameter_values;
+    NameToNameMap parameter_types;
 };
 
 }
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index 9ec1cb0efd2..38355617601 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -20,3 +20,8 @@ FROM
 30
 40
 60
+1
+2
+3
+3
+5
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index 9d55dba970a..d2118cd1279 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -4,6 +4,7 @@ DROP VIEW IF EXISTS pv3;
 DROP VIEW IF EXISTS pv4;
 DROP VIEW IF EXISTS pv5;
 DROP VIEW IF EXISTS pv6;
+DROP VIEW IF EXISTS pv7;
 DROP VIEW IF EXISTS v1;
 DROP TABLE IF EXISTS Catalog;
 DROP TABLE IF EXISTS system.pv1;
@@ -68,11 +69,15 @@ SELECT Price FROM pv5(price=30, quantity=8,limit=1);
 CREATE VIEW pv6 AS SELECT Price+{price:UInt64} FROM Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64};
 SELECT * FROM pv6(price=10);
 
+CREATE VIEW pv7 AS SELECT Price/{price:UInt64} FROM Catalog ORDER BY Price;
+SELECT * FROM pv7(price=10);
+
 DROP VIEW pv1;
 DROP VIEW pv2;
 DROP VIEW pv3;
 DROP VIEW pv5;
 DROP VIEW pv6;
+DROP VIEW pv7;
 DROP VIEW v1;
 DROP TABLE Catalog;
 DROP TABLE system.pv1;

From 616efdd3364dd45b00d7ea807748f4b44cc1604e Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 19 Dec 2022 15:28:15 +0000
Subject: [PATCH 070/262] Move keytab init into
 GSSAcceptorContext::initHandles()

---
 src/Access/ExternalAuthenticators.cpp |  9 +--------
 src/Access/GSSAcceptor.cpp            | 10 ++++++++++
 src/Access/GSSAcceptor.h              |  1 +
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp
index 6dcbd7e10b1..8709b3af2d5 100644
--- a/src/Access/ExternalAuthenticators.cpp
+++ b/src/Access/ExternalAuthenticators.cpp
@@ -9,7 +9,6 @@
 
 #include <optional>
 #include <utility>
-#include <filesystem>
 
 namespace DB
 {
@@ -223,13 +222,7 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util::
 
     params.realm = config.getString("kerberos.realm", "");
     params.principal = config.getString("kerberos.principal", "");
-
-#if USE_KRB5
-    String keytab = config.getString("kerberos.keytab", "");
-    if (!keytab.empty() && std::filesystem::exists(keytab))
-        if (krb5_gss_register_acceptor_identity(keytab.c_str()))
-            throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS);
-#endif
+    params.keytab = config.getString("kerberos.keytab", "");
 }
 
 }
diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp
index 02fa3f8e1d3..b107293ce39 100644
--- a/src/Access/GSSAcceptor.cpp
+++ b/src/Access/GSSAcceptor.cpp
@@ -6,6 +6,7 @@
 
 #include <mutex>
 #include <tuple>
+#include <filesystem>
 
 
 namespace DB
@@ -261,6 +262,15 @@ void GSSAcceptorContext::initHandles()
 
     resetHandles();
 
+    if (!params.keytab.empty())
+    {
+        if (!std::filesystem::exists(params.keytab))
+            throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS);
+
+        if (krb5_gss_register_acceptor_identity(params.keytab.c_str()))
+            throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS);
+    }
+
     if (!params.principal.empty())
     {
         if (!params.realm.empty())
diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h
index c2930201a93..ba448ae474e 100644
--- a/src/Access/GSSAcceptor.h
+++ b/src/Access/GSSAcceptor.h
@@ -29,6 +29,7 @@ public:
         String mechanism = "1.2.840.113554.1.2.2"; // OID: krb5
         String principal;
         String realm;
+        String keytab;
     };
 
     explicit GSSAcceptorContext(const Params & params_);

From cdc3912743bea2022aa9e1b6d482d1685af38c6f Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 20 Dec 2022 22:44:27 +0100
Subject: [PATCH 071/262] fix incorrect usages of getPartName()

---
 .../BackupCoordinationReplicatedTables.cpp    |  4 +--
 src/Storages/MergeTree/ActiveDataPartSet.cpp  |  4 +--
 src/Storages/MergeTree/DropPartsRanges.cpp    |  2 +-
 .../MergeTree/FutureMergedMutatedPart.cpp     |  2 +-
 src/Storages/MergeTree/IMergeTreeDataPart.cpp |  2 +-
 src/Storages/MergeTree/MergeTreeData.cpp      | 26 +++++++++----------
 .../MergeTree/MergeTreeDataWriter.cpp         |  2 +-
 .../MergeTree/MergeTreeDeduplicationLog.cpp   |  4 +--
 src/Storages/MergeTree/MergeTreePartInfo.cpp  | 20 +++++++++++++-
 src/Storages/MergeTree/MergeTreePartInfo.h    |  4 ++-
 src/Storages/MergeTree/MergeTreeSink.cpp      |  2 +-
 .../PartMovesBetweenShardsOrchestrator.cpp    |  2 +-
 .../MergeTree/ReplicatedMergeTreeQueue.cpp    |  2 +-
 src/Storages/StorageReplicatedMergeTree.cpp   | 15 ++++++-----
 14 files changed, 57 insertions(+), 34 deletions(-)

diff --git a/src/Backups/BackupCoordinationReplicatedTables.cpp b/src/Backups/BackupCoordinationReplicatedTables.cpp
index 910719b5365..27977445641 100644
--- a/src/Backups/BackupCoordinationReplicatedTables.cpp
+++ b/src/Backups/BackupCoordinationReplicatedTables.cpp
@@ -78,9 +78,9 @@ public:
                 throw Exception(
                     ErrorCodes::CANNOT_BACKUP_TABLE,
                     "Intersected parts detected: {} on replica {} and {} on replica {}",
-                    part.info.getPartName(),
+                    part.info.getPartNameForLogs(),
                     *part.replica_name,
-                    new_part_info.getPartName(),
+                    new_part_info.getPartNameForLogs(),
                     *replica_name);
             }
             ++last_it;
diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp
index 13976f6ec45..b438f18c1b8 100644
--- a/src/Storages/MergeTree/ActiveDataPartSet.cpp
+++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp
@@ -47,7 +47,7 @@ bool ActiveDataPartSet::addImpl(const MergeTreePartInfo & part_info, const Strin
         if (!part_info.contains(it->first))
         {
             if (!part_info.isDisjoint(it->first))
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName());
+                throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs());
             ++it;
             break;
         }
@@ -70,7 +70,7 @@ bool ActiveDataPartSet::addImpl(const MergeTreePartInfo & part_info, const Strin
     }
 
     if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first))
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs());
 
     part_info_to_name.emplace(part_info, name);
     return true;
diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp
index d467a7cac3d..bc4f20a3471 100644
--- a/src/Storages/MergeTree/DropPartsRanges.cpp
+++ b/src/Storages/MergeTree/DropPartsRanges.cpp
@@ -19,7 +19,7 @@ bool DropPartsRanges::isAffectedByDropRange(const std::string & new_part_name, s
     {
         if (!drop_range.isDisjoint(entry_info))
         {
-            postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartName(), new_part_name);
+            postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartNameForLogs(), new_part_name);
             return true;
         }
     }
diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp
index 019b24f6916..ffd444b7135 100644
--- a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp
+++ b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp
@@ -81,7 +81,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg
         name = part_info.getPartNameV0(min_date, max_date);
     }
     else
-        name = part_info.getPartName();
+        name = part_info.getPartNameV1();
 }
 
 void FutureMergedMutatedPart::updatePath(const MergeTreeData & storage, const IReservation * reservation)
diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
index 02a7a2ae641..3ed5dc4a710 100644
--- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp
+++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp
@@ -375,7 +375,7 @@ String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) c
         return new_part_info.getPartNameV0(min_date, max_date);
     }
     else
-        return new_part_info.getPartName();
+        return new_part_info.getPartNameV1();
 }
 
 std::optional<size_t> IMergeTreeDataPart::getColumnPosition(const String & column_name) const
diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index b92fada821c..214f0fc2e77 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -2034,7 +2034,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
         MergeTreePartInfo range_info = part->info;
         range_info.level = static_cast<UInt32>(range_info.max_block - range_info.min_block);
         range_info.mutation = 0;
-        independent_ranges_set.addImpl(range_info, range_info.getPartName());
+        independent_ranges_set.addImpl(range_info, range_info.getPartNameV1());
     }
 
     auto independent_ranges_infos = independent_ranges_set.getPartInfos();
@@ -2050,7 +2050,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t
                 parts_in_range.push_back(part);
         sum_of_ranges += parts_in_range.size();
 
-        LOG_TRACE(log, "Scheduling removal of {} parts in blocks range {}", parts_in_range.size(), range.getPartName());
+        LOG_TRACE(log, "Scheduling removal of {} parts in blocks range {}", parts_in_range.size(), range.getPartNameForLogs());
 
         pool.scheduleOrThrowOnError([&part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup(), batch = std::move(parts_in_range)]
         {
@@ -3335,7 +3335,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
     DataPartsVector parts_to_remove;
 
     if (drop_range.min_block > drop_range.max_block)
-        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartName());
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartNameForLogs());
 
     auto partition_range = getVisibleDataPartsVectorInPartition(txn, drop_range.partition_id, &lock);
 
@@ -3367,7 +3367,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
             bool is_covered_by_min_max_block = part->info.min_block <= drop_range.min_block && part->info.max_block >= drop_range.max_block && part->info.getMutationVersion() >= drop_range.getMutationVersion();
             if (is_covered_by_min_max_block)
             {
-                LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name);
+                LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartNameForLogs(), part->name);
                 return {};
             }
         }
@@ -3378,7 +3378,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
             {
                 /// Intersect left border
                 throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}",
-                                part->name, drop_range.getPartName());
+                                part->name, drop_range.getPartNameForLogs());
             }
 
             continue;
@@ -3392,7 +3392,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW
         {
             /// Intersect right border
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}",
-                            part->name, drop_range.getPartName());
+                            part->name, drop_range.getPartNameForLogs());
         }
 
         parts_to_remove.emplace_back(part);
@@ -4150,8 +4150,8 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String &
     {
         auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version);
         parts.push_back(getActiveContainingPart(part_info));
-        if (!parts.back() || parts.back()->name != part_info.getPartName())
-            throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART);
+        if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version))
+            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id);
     }
     else
         parts = getVisibleDataPartsVectorInPartition(local_context, partition_id);
@@ -4192,18 +4192,18 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String
     {
         auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version);
         parts.emplace_back(getActiveContainingPart(part_info));
-        if (!parts.back() || parts.back()->name != part_info.getPartName())
-            throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART);
+        if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version))
+            throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id);
     }
     else
         parts = getVisibleDataPartsVectorInPartition(local_context, partition_id);
 
     auto volume = getStoragePolicy()->getVolumeByName(name);
     if (!volume)
-        throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK);
+        throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName());
 
     if (parts.empty())
-        throw Exception("Nothing to move (check that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART);
+        throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists).");
 
     std::erase_if(parts, [&](auto part_ptr)
         {
@@ -4570,7 +4570,7 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const
 
 void MergeTreeData::restorePartFromBackup(std::shared_ptr<RestoredPartsHolder> restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup) const
 {
-    String part_name = part_info.getPartName();
+    String part_name = part_info.getPartNameAndCheckFormat(format_version);
     auto backup = restored_parts_holder->getBackup();
 
     UInt64 total_size_of_part = 0;
diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
index c50c01ea356..7d664e69703 100644
--- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp
@@ -362,7 +362,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart(
         part_name = new_part_info.getPartNameV0(min_date, max_date);
     }
     else
-        part_name = new_part_info.getPartName();
+        part_name = new_part_info.getPartNameV1();
 
     String part_dir = TMP_PREFIX + part_name;
     temp_part.temporary_directory_lock = data.getTemporaryPartDirectoryHolder(part_dir);
diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
index d0f4d8b3604..b843ce6a078 100644
--- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
+++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp
@@ -232,7 +232,7 @@ std::pair<MergeTreePartInfo, bool> MergeTreeDeduplicationLog::addPart(const std:
     /// Create new record
     MergeTreeDeduplicationLogRecord record;
     record.operation = MergeTreeDeduplicationOp::ADD;
-    record.part_name = part_info.getPartName();
+    record.part_name = part_info.getPartNameAndCheckFormat(format_version);
     record.block_id = block_id;
     /// Write it to disk
     writeRecord(record, *current_writer);
@@ -269,7 +269,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf
             /// Create drop record
             MergeTreeDeduplicationLogRecord record;
             record.operation = MergeTreeDeduplicationOp::DROP;
-            record.part_name = part_info.getPartName();
+            record.part_name = part_info.getPartNameAndCheckFormat(format_version);
             record.block_id = itr->key;
             /// Write it to disk
             writeRecord(record, *current_writer);
diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp
index 8c518e4d17f..038bf26ca91 100644
--- a/src/Storages/MergeTree/MergeTreePartInfo.cpp
+++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp
@@ -167,7 +167,25 @@ bool MergeTreePartInfo::contains(const String & outer_part_name, const String &
 }
 
 
-String MergeTreePartInfo::getPartName() const
+String MergeTreePartInfo::getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const
+{
+    if (format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
+        getPartNameV1();
+
+    /// We cannot just call getPartNameV0 because it requires extra arguments, but at least we can warn about it.
+    chassert(false);  /// Catch it in CI. Feel free to remove this line.
+    throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Trying to get part name in new format for old format version."
+                    "Either some new feature is incompatible with deprecated *MergeTree definition syntax or it's a bug.");
+}
+
+
+String MergeTreePartInfo::getPartNameForLogs() const
+{
+    /// We don't care about format version here
+    return getPartNameV1();
+}
+
+String MergeTreePartInfo::getPartNameV1() const
 {
     WriteBufferFromOwnString wb;
 
diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h
index 60c7e4e8822..cad851fb882 100644
--- a/src/Storages/MergeTree/MergeTreePartInfo.h
+++ b/src/Storages/MergeTree/MergeTreePartInfo.h
@@ -103,7 +103,9 @@ struct MergeTreePartInfo
         return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level;
     }
 
-    String getPartName() const;
+    String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const;
+    String getPartNameForLogs() const;
+    String getPartNameV1() const;
     String getPartNameV0(DayNum left_date, DayNum right_date) const;
     UInt64 getBlocksCount() const
     {
diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp
index 0de71e94ea8..99f6b1855e4 100644
--- a/src/Storages/MergeTree/MergeTreeSink.cpp
+++ b/src/Storages/MergeTree/MergeTreeSink.cpp
@@ -156,7 +156,7 @@ void MergeTreeSink::finishDelayedChunk()
                 if (!res.second)
                 {
                     ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks);
-                    LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName());
+                    LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartNameForLogs());
                     continue;
                 }
             }
diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
index d5f35ea1b3c..560d9f17a07 100644
--- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
+++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp
@@ -473,7 +473,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st
                     log_entry.log_entry_id = attach_log_entry_barrier_path;
                     log_entry.part_checksum = part->checksums.getTotalChecksumHex();
                     log_entry.create_time = std::time(nullptr);
-                    log_entry.new_part_name = part_info.getPartName();
+                    log_entry.new_part_name = part_info.getPartNameAndCheckFormat(storage.format_version);
 
                     ops.emplace_back(zkutil::makeCreateRequest(attach_log_entry_barrier_path, log_entry.toString(), -1));
                     ops.emplace_back(zkutil::makeSetRequest(entry.to_shard + "/log", "", -1));
diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index 9ff022d5d57..d6d74228d68 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -1504,7 +1504,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry(
                     entry.znode_name,
                     entry.typeToString(),
                     entry.new_part_name,
-                    info.getPartName());
+                    info.getPartNameForLogs());
                 LOG_TRACE(log, fmt::runtime(out_postpone_reason));
                 return false;
             }
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 8c422b52a3c..79a8253a9de 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1484,8 +1484,11 @@ String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataP
 
 MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFoundValidPart(const LogEntry& entry) const
 {
+    if (format_version != MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING)
+        return {};
+
     const MergeTreePartInfo actual_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version);
-    const String part_new_name = actual_part_info.getPartName();
+    const String part_new_name = actual_part_info.getPartNameV1();
 
     for (const DiskPtr & disk : getStoragePolicy()->getDisks())
     {
@@ -1496,7 +1499,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo
             if (!part_info || part_info->partition_id != actual_part_info.partition_id)
                 continue;
 
-            const String part_old_name = part_info->getPartName();
+            const String part_old_name = part_info->getPartNameV1();
 
             const VolumePtr volume = std::make_shared<SingleDiskVolume>("volume_" + part_old_name, disk);
 
@@ -3849,7 +3852,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id)
 bool StorageReplicatedMergeTree::partIsInsertingWithParallelQuorum(const MergeTreePartInfo & part_info) const
 {
     auto zookeeper = getZooKeeper();
-    return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartName());
+    return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameAndCheckFormat(format_version));
 }
 
 
@@ -3871,7 +3874,7 @@ bool StorageReplicatedMergeTree::partIsLastQuorumPart(const MergeTreePartInfo &
     if (partition_it == parts_with_quorum.added_parts.end())
         return false;
 
-    return partition_it->second == part_info.getPartName();
+    return partition_it->second == part_info.getPartNameAndCheckFormat(format_version);
 }
 
 
@@ -5048,7 +5051,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const
         return part_info.getPartNameV0(left_date, right_date);
     }
 
-    return part_info.getPartName();
+    return part_info.getPartNameV1();
 }
 
 bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info,
@@ -7540,7 +7543,7 @@ void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, t
     if (queue.hasDropRange(MergeTreePartInfo::fromPartName(part_name, format_version), &covering_drop_range))
     {
         LOG_WARNING(log, "Do not enqueue part {} for check because it's covered by DROP_RANGE {} and going to be removed",
-                    part_name, covering_drop_range.getPartName());
+                    part_name, covering_drop_range.getPartNameForLogs());
         return;
     }
     part_check_thread.enqueuePart(part_name, delay_to_check_seconds);

From b9d0d25eb21d451aa9c4d37d113163c4bd43a8b1 Mon Sep 17 00:00:00 2001
From: Nikolay Degterinsky <evillique@gmail.com>
Date: Wed, 21 Dec 2022 02:11:25 +0000
Subject: [PATCH 072/262] Better StorageReplicatedMergeTree::getStatus()

---
 src/Storages/StorageReplicatedMergeTree.cpp | 28 +++++++++++++++------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index b6e7864ac80..151f1002ea6 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -5665,7 +5665,24 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields)
     {
         try
         {
-            auto log_entries = zookeeper->getChildren(fs::path(zookeeper_path) / "log");
+            std::vector<std::string> paths;
+            paths.push_back(fs::path(zookeeper_path) / "log");
+            paths.push_back(fs::path(zookeeper_path) / "replicas");
+
+            auto children_result = zookeeper->getChildren(paths);
+            const auto & log_entries = children_result[0].names;
+            const auto & all_replicas = children_result[1].names;
+
+            paths.clear();
+            paths.push_back(fs::path(replica_path) / "log_pointer");
+            for (const String & replica : all_replicas)
+                paths.push_back(fs::path(zookeeper_path) / "replicas" / replica / "is_active");
+
+            auto get_result = zookeeper->tryGet(paths);
+            const auto & log_pointer_str = get_result[0].data;
+
+            if (get_result[0].error == Coordination::Error::ZNONODE)
+                throw zkutil::KeeperException(get_result[0].error);
 
             if (!log_entries.empty())
             {
@@ -5673,17 +5690,14 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields)
                 res.log_max_index = parse<UInt64>(last_log_entry.substr(strlen("log-")));
             }
 
-            String log_pointer_str = zookeeper->get(fs::path(replica_path) / "log_pointer");
             res.log_pointer = log_pointer_str.empty() ? 0 : parse<UInt64>(log_pointer_str);
-
-            auto all_replicas = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas");
             res.total_replicas = all_replicas.size();
 
-            for (const String & replica : all_replicas)
+            for (size_t i = 0, size = all_replicas.size(); i < size; ++i)
             {
-                bool is_replica_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / replica / "is_active");
+                bool is_replica_active = get_result[i + 1].error != Coordination::Error::ZNONODE;
                 res.active_replicas += static_cast<UInt8>(is_replica_active);
-                res.replica_is_active.emplace(replica, is_replica_active);
+                res.replica_is_active.emplace(all_replicas[i], is_replica_active);
             }
         }
         catch (const Coordination::Exception &)

From 2d9f3c242bb391aa1e062acaa900a2ff29bc9f0a Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Thu, 22 Dec 2022 08:07:16 +0000
Subject: [PATCH 073/262] Add test_bad_path_to_keytab test

---
 src/Access/GSSAcceptor.cpp                        |  2 +-
 .../configs/kerberos_bad_path_to_keytab.xml       |  6 ++++++
 .../test_kerberos_auth/kerberos_image_config.sh   |  4 ++++
 tests/integration/test_kerberos_auth/test.py      | 15 +++++++++++++++
 4 files changed, 26 insertions(+), 1 deletion(-)
 create mode 100644 tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml

diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp
index b107293ce39..998e5219bbb 100644
--- a/src/Access/GSSAcceptor.cpp
+++ b/src/Access/GSSAcceptor.cpp
@@ -268,7 +268,7 @@ void GSSAcceptorContext::initHandles()
             throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS);
 
         if (krb5_gss_register_acceptor_identity(params.keytab.c_str()))
-            throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS);
+            throw Exception("Failed to register keytab file", ErrorCodes::BAD_ARGUMENTS);
     }
 
     if (!params.principal.empty())
diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml
new file mode 100644
index 00000000000..5b6be45e78e
--- /dev/null
+++ b/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml
@@ -0,0 +1,6 @@
+<clickhouse>
+    <kerberos>
+        <realm>TEST.CLICKHOUSE.TECH</realm>
+        <keytab>/tmp/keytab/clickhouse.keytab</keytab>
+    </kerberos>
+</clickhouse>
diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
index 90bbc49f2bf..9ee5f3490fe 100644
--- a/tests/integration/test_kerberos_auth/kerberos_image_config.sh
+++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
@@ -101,9 +101,13 @@ create_keytabs() {
   kadmin.local -q "addprinc -randkey HTTP/instance2@${REALM}"
   kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse2.keytab HTTP/instance2@${REALM}"
 
+  kadmin.local -q "addprinc -randkey HTTP/instance3@${REALM}"
+  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse3.keytab HTTP/instance3@${REALM}"
+
   chmod g+r /tmp/keytab/kuser.keytab
   chmod g+r /tmp/keytab/clickhouse1.keytab
   chmod g+r /tmp/keytab/clickhouse2.keytab
+  chmod g+r /tmp/keytab/clickhouse3.keytab
 }
 
 main() {
diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index df6233e0cbb..89735d1c8fd 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -14,6 +14,12 @@ instance2 = cluster.add_instance(
     user_configs=["configs/users.xml"],
     with_kerberos_kdc=True,
 )
+instance3 = cluster.add_instance(
+    "instance3",
+    main_configs=["configs/kerberos_bad_path_to_keytab.xml"],
+    user_configs=["configs/users.xml"],
+    with_kerberos_kdc=True,
+)
 
 
 # Fixtures
@@ -57,6 +63,15 @@ def test_kerberos_auth_without_keytab(kerberos_cluster):
     )
 
 
+def test_bad_path_to_keytab(kerberos_cluster):
+
+    assert (
+        "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name."
+        in make_auth(instance3)
+    )
+    assert instance3.contains_in_log("Keytab file not found")
+
+
 if __name__ == "__main__":
     cluster.start()
     input("Cluster created, press any key to destroy...")

From a6f860f24e416b4d452ffeea9e67d8ddf7b60158 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Fri, 23 Dec 2022 17:57:17 +0100
Subject: [PATCH 074/262] Fixed review comments and updated
 FunctionParameterValuesVisitor to use visitFunction - 40907 Parameterized
 views as table functions

---
 src/Interpreters/InterpreterSelectQuery.cpp   |  5 +++--
 .../FunctionParameterValuesVisitor.cpp        | 19 ++++++++++++-------
 src/Storages/StorageSnapshot.cpp              | 13 +++++++------
 3 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 8381fee22b1..d16eb7e2bac 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -510,9 +510,10 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (view)
         {
             query_info.is_parameterized_view = view->isParameterizedView();
-            /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced.
+            /// We need to fetch the parameters set for SELECT ... FROM parameterized_view(<params>) before the query is replaced.
             /// replaceWithSubquery replaces the function child and adds the subquery in its place.
-            /// the parameters are children of function child, if function is replaced the parameters are also gone from tree
+            /// the parameters are children of function child, if function (which corresponds to parametrised view and has
+            /// parameters in its arguments: `parametrised_view(<params>)`) is replaced the parameters are also gone from tree
             /// So we need to get the parameters before they are removed from the tree
             /// and after query is replaced, we use these parameters to substitute in the parameterized view query
             if (query_info.is_parameterized_view)
diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp
index cb187b2a56a..31ba7ac4f86 100644
--- a/src/Parsers/FunctionParameterValuesVisitor.cpp
+++ b/src/Parsers/FunctionParameterValuesVisitor.cpp
@@ -27,8 +27,8 @@ public:
 
     void visit(const ASTPtr & ast)
     {
-        if (const auto * expression = ast->as<ASTExpressionList>())
-            visitExpressionList(*expression);
+        if (const auto * function = ast->as<ASTFunction>())
+            visitFunction(*function);
         for (const auto & child : ast->children)
             visit(child);
     }
@@ -36,18 +36,23 @@ public:
 private:
     NameToNameMap & parameter_values;
 
-    void visitExpressionList(const ASTExpressionList & expression_list)
+    void visitFunction(const ASTFunction & parameter_function)
     {
-        if (expression_list.children.size() != 2)
+        if (parameter_function.name != "equals" && parameter_function.children.size() != 1)
             return;
 
-        if (const auto * identifier = expression_list.children[0]->as<ASTIdentifier>())
+        const auto * expression_list = parameter_function.children[0]->as<ASTExpressionList>();
+
+        if (expression_list && expression_list->children.size() != 2)
+            return;
+
+        if (const auto * identifier = expression_list->children[0]->as<ASTIdentifier>())
         {
-            if (const auto * literal = expression_list.children[1]->as<ASTLiteral>())
+            if (const auto * literal = expression_list->children[1]->as<ASTLiteral>())
             {
                 parameter_values[identifier->name()] = convertFieldToString(literal->value);
             }
-            else if (const auto * function = expression_list.children[1]->as<ASTFunction>())
+            else if (const auto * function = expression_list->children[1]->as<ASTFunction>())
             {
                 if (isFunctionCast(function))
                 {
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index b88e07d93b8..00f5160ae11 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -117,10 +117,11 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const
     Block res;
 
     const auto & columns = getMetadataForQuery()->getColumns();
-    for (const auto & name : column_names)
+    for (const auto & column_name : column_names)
     {
-        const std::string & column_name  = name;
-        std::string substituted_column_name = name;
+        /// substituted_column_name is used for parameterized view (which are created using query parameters
+        /// and SELECT is used with substitution of these query parameters )
+        std::string substituted_column_name = column_name;
         std::string::size_type pos = 0u;
         for (const auto & parameter : parameter_values)
         {
@@ -141,17 +142,17 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const
         {
             res.insert({object_column->type->createColumn(), object_column->type, column_name});
         }
-        else if (auto it = virtual_columns.find(name); it != virtual_columns.end())
+        else if (auto it = virtual_columns.find(column_name); it != virtual_columns.end())
         {
             /// Virtual columns must be appended after ordinary, because user can
             /// override them.
             const auto & type = it->second;
-            res.insert({type->createColumn(), type, name});
+            res.insert({type->createColumn(), type, column_name});
         }
         else
         {
             throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK,
-                "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs());
+                "Column {} not found in table {}", backQuote(substituted_column_name), storage.getStorageID().getNameForLogs());
         }
     }
     return res;

From a70e3c20fa81276318cc550ce787e3d55a2fae79 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 26 Dec 2022 11:56:45 +0000
Subject: [PATCH 075/262] Make calls of test from Ubuntu client

---
 .../kerberos_image_config.sh                  |  4 ++++
 .../test_kerberos_auth/secrets/krb.conf       |  4 ++--
 tests/integration/test_kerberos_auth/test.py  | 21 +++++++++++++------
 3 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
index 9ee5f3490fe..18f57ef2585 100644
--- a/tests/integration/test_kerberos_auth/kerberos_image_config.sh
+++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh
@@ -104,10 +104,14 @@ create_keytabs() {
   kadmin.local -q "addprinc -randkey HTTP/instance3@${REALM}"
   kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse3.keytab HTTP/instance3@${REALM}"
 
+  kadmin.local -q "addprinc -randkey HTTP/client@${REALM}"
+  kadmin.local -q "ktadd -norandkey -k /tmp/keytab/client.keytab HTTP/client@${REALM}"
+
   chmod g+r /tmp/keytab/kuser.keytab
   chmod g+r /tmp/keytab/clickhouse1.keytab
   chmod g+r /tmp/keytab/clickhouse2.keytab
   chmod g+r /tmp/keytab/clickhouse3.keytab
+  chmod g+r /tmp/keytab/client.keytab
 }
 
 main() {
diff --git a/tests/integration/test_kerberos_auth/secrets/krb.conf b/tests/integration/test_kerberos_auth/secrets/krb.conf
index 87520f65b2d..88431d68554 100644
--- a/tests/integration/test_kerberos_auth/secrets/krb.conf
+++ b/tests/integration/test_kerberos_auth/secrets/krb.conf
@@ -18,5 +18,5 @@
  }
 
 [domain_realm]
- .TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH
- TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH
+ .test.clickhouse.com = TEST.CLICKHOUSE.TECH
+ test.clickhouse.com = TEST.CLICKHOUSE.TECH
diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index 89735d1c8fd..b024f4b59ef 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -20,6 +20,12 @@ instance3 = cluster.add_instance(
     user_configs=["configs/users.xml"],
     with_kerberos_kdc=True,
 )
+client = cluster.add_instance(
+    "client",
+    main_configs=["configs/kerberos_without_keytab.xml"],
+    user_configs=["configs/users.xml"],
+    with_kerberos_kdc=True,
+)
 
 
 # Fixtures
@@ -38,16 +44,20 @@ def kerberos_cluster():
 
 
 def make_auth(instance):
-    instance.exec_in_container(
+    instance_ip = cluster.get_instance_ip(instance.name)
+
+    client.exec_in_container(
+        (["bash", "-c", f"echo '{instance_ip} {instance.hostname}' >> /etc/hosts"])
+    )
+
+    client.exec_in_container(
         ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"]
     )
-    return instance.exec_in_container(
+    return client.exec_in_container(
         [
             "bash",
             "-c",
-            "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(
-                instance.hostname
-            ),
+            f"echo 'select currentUser()' | curl -vvv --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-",
         ]
     )
 
@@ -64,7 +74,6 @@ def test_kerberos_auth_without_keytab(kerberos_cluster):
 
 
 def test_bad_path_to_keytab(kerberos_cluster):
-
     assert (
         "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name."
         in make_auth(instance3)

From 3cdc9b3f81df1749cd96347aba4df2dd7d33e779 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 26 Dec 2022 11:59:37 +0000
Subject: [PATCH 076/262] Remove -vvv from kinit call in tests

---
 tests/integration/test_kerberos_auth/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index b024f4b59ef..37625dcef85 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -57,7 +57,7 @@ def make_auth(instance):
         [
             "bash",
             "-c",
-            f"echo 'select currentUser()' | curl -vvv --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-",
+            f"echo 'select currentUser()' | curl --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-",
         ]
     )
 

From 5a7257069b0b50b1c914b6c0155371b0b239a450 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 26 Dec 2022 12:22:29 +0000
Subject: [PATCH 077/262] Updated docs

---
 docs/en/operations/external-authenticators/kerberos.md | 4 +++-
 docs/ru/operations/external-authenticators/kerberos.md | 3 +++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md
index c1360e880ad..95944e96194 100644
--- a/docs/en/operations/external-authenticators/kerberos.md
+++ b/docs/en/operations/external-authenticators/kerberos.md
@@ -22,10 +22,12 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This
 - `principal` - canonical service principal name that will be acquired and used when accepting security contexts.
     - This parameter is optional, if omitted, the default principal will be used.
 
-
 - `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it.
     - This parameter is optional, if omitted, no additional filtering by realm will be applied.
 
+- `keytab` - path to service keytab file.
+    - This parameter is optional, if omitted, path to service keytab file must be set in `KRB5_KTNAME` environment variable.
+
 Example (goes into `config.xml`):
 
 ```xml
diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md
index 865ea639c89..4641f15cb56 100644
--- a/docs/ru/operations/external-authenticators/kerberos.md
+++ b/docs/ru/operations/external-authenticators/kerberos.md
@@ -22,6 +22,9 @@ ClickHouse предоставляет возможность аутентифи
 - `realm` &mdash; обеспечивает фильтрацию по реалм (realm). Пользователям, чей реалм не совпадает с указанным, будет отказано в аутентификации.
   - Это опциональный параметр, при его отсутствии фильтр по реалм применяться не будет.
 
+- `keytab` &mdash; задаёт путь к файлу keytab.
+  - Это опциональный параметр, при его отсутствии путь к файлу keytab должен быть задан в переменной окружения `KRB5_KTNAME`.
+
 Примеры, как должен выглядеть файл `config.xml`:
 
 ```xml

From 465cc36526156b19e6bb497ebccece6f3145eda8 Mon Sep 17 00:00:00 2001
From: Roman Vasin <r.vasin@arenadata.io>
Date: Mon, 26 Dec 2022 13:17:22 +0000
Subject: [PATCH 078/262] Update Exception messages in test.py

---
 tests/integration/test_kerberos_auth/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py
index 37625dcef85..3a183ad86a0 100644
--- a/tests/integration/test_kerberos_auth/test.py
+++ b/tests/integration/test_kerberos_auth/test.py
@@ -68,14 +68,14 @@ def test_kerberos_auth_with_keytab(kerberos_cluster):
 
 def test_kerberos_auth_without_keytab(kerberos_cluster):
     assert (
-        "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name."
+        "DB::Exception: : Authentication failed: password is incorrect, or there is no user with such name."
         in make_auth(instance2)
     )
 
 
 def test_bad_path_to_keytab(kerberos_cluster):
     assert (
-        "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name."
+        "DB::Exception: : Authentication failed: password is incorrect, or there is no user with such name."
         in make_auth(instance3)
     )
     assert instance3.contains_in_log("Keytab file not found")

From f6ed1eaada8ef53b82d7bf42042a647cabd6b475 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Tue, 27 Dec 2022 16:41:26 +0100
Subject: [PATCH 079/262] Fix check black

---
 tests/queries/0_stateless/02473_infile_progress.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py
index 28ad2c8413a..842acf2b697 100755
--- a/tests/queries/0_stateless/02473_infile_progress.py
+++ b/tests/queries/0_stateless/02473_infile_progress.py
@@ -18,7 +18,7 @@ with client(
     name="client>",
     log=log,
     command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse")
-    + " client --storage_file_read_method=pread"
+    + " client --storage_file_read_method=pread",
 ) as client1:
     filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv"
 

From 1ce69371fb5da17528ed4655e76841b9004caea4 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 28 Dec 2022 21:46:08 +0000
Subject: [PATCH 080/262] Infer UInt64 in case of Int64 overflow

---
 src/DataTypes/transformTypesRecursively.cpp   |   4 +-
 src/DataTypes/transformTypesRecursively.h     |   2 +-
 src/Formats/JSONUtils.cpp                     |   4 +
 src/Formats/SchemaInferenceUtils.cpp          | 146 +++++++++++++++---
 ...uint64_in_case_of_int64_overflow.reference |  12 ++
 ..._infer_uint64_in_case_of_int64_overflow.sh |  18 +++
 6 files changed, 160 insertions(+), 26 deletions(-)
 create mode 100644 tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference
 create mode 100755 tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh

diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp
index da3af0beee7..05f82a08abe 100644
--- a/src/DataTypes/transformTypesRecursively.cpp
+++ b/src/DataTypes/transformTypesRecursively.cpp
@@ -8,7 +8,7 @@
 namespace DB
 {
 
-void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types)
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types)
 {
     TypeIndexesSet type_indexes;
     for (const auto & type : types)
@@ -156,7 +156,7 @@ void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &
 void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback)
 {
     DataTypes types = {type};
-    transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {});
+    transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {});
 }
 
 }
diff --git a/src/DataTypes/transformTypesRecursively.h b/src/DataTypes/transformTypesRecursively.h
index 2cf8664f920..f9c776b4205 100644
--- a/src/DataTypes/transformTypesRecursively.h
+++ b/src/DataTypes/transformTypesRecursively.h
@@ -12,7 +12,7 @@ namespace DB
 /// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types.
 /// Function transform_simple_types will be applied to resulting simple types after all recursive calls.
 /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types.
-void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, const TypeIndexesSet &)> transform_complex_types);
+void transformTypesRecursively(DataTypes & types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_simple_types, std::function<void(DataTypes &, TypeIndexesSet &)> transform_complex_types);
 
 void callOnNestedSimpleTypes(DataTypePtr & type, std::function<void(DataTypePtr &)> callback);
 
diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 16f275ed6b8..384619dba1d 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -131,6 +131,7 @@ namespace JSONUtils
     {
         skipWhitespaceIfAny(in);
         assertChar('{', in);
+        skipWhitespaceIfAny(in);
         bool first = true;
         NamesAndTypesList names_and_types;
         String field;
@@ -144,6 +145,7 @@ namespace JSONUtils
             auto name = readFieldName(in);
             auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
             names_and_types.emplace_back(name, type);
+            skipWhitespaceIfAny(in);
         }
 
         if (in.eof())
@@ -157,6 +159,7 @@ namespace JSONUtils
     {
         skipWhitespaceIfAny(in);
         assertChar('[', in);
+        skipWhitespaceIfAny(in);
         bool first = true;
         DataTypes types;
         String field;
@@ -168,6 +171,7 @@ namespace JSONUtils
                 first = false;
             auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
             types.push_back(std::move(type));
+            skipWhitespaceIfAny(in);
         }
 
         if (in.eof())
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 9d40ac98964..3d00f67884d 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -44,9 +44,16 @@ namespace
         return true;
     }
 
+    void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes)
+    {
+        type_indexes.clear();
+        for (const auto & type : data_types)
+            type_indexes.insert(type->getTypeId());
+    }
+
     /// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing.
     /// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String]
-    void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         /// Check if we have both Nothing and non Nothing types.
         if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1)
@@ -67,12 +74,32 @@ namespace
             if (isNothing(type))
                 type = not_nothing_type;
         }
+
+        type_indexes.erase(TypeIndex::Nothing);
+    }
+
+    /// If we have both Int64 and UInt64, convert all Int64 to UInt64,
+    /// because UInt64 is inferred only in case of Int64 overflow.
+    void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes)
+    {
+        if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64))
+            return;
+
+        for (auto & type : data_types)
+        {
+            if (WhichDataType(type).isInt64())
+                type = std::make_shared<DataTypeUInt64>();
+        }
+
+        type_indexes.erase(TypeIndex::Int64);
     }
 
     /// If we have both Int64 and Float64 types, convert all Int64 to Float64.
-    void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
-        if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64))
+        bool have_floats = type_indexes.contains(TypeIndex::Float64);
+        bool have_integers = type_indexes.contains(TypeIndex::Int64) ||  type_indexes.contains(TypeIndex::UInt64);
+        if (!have_integers || !have_floats)
             return;
 
         for (auto & type : data_types)
@@ -80,11 +107,14 @@ namespace
             if (isInteger(type))
                 type = std::make_shared<DataTypeFloat64>();
         }
+
+        type_indexes.erase(TypeIndex::Int64);
+        type_indexes.erase(TypeIndex::UInt64);
     }
 
     /// If we have only Date and DateTime types, convert Date to DateTime,
     /// otherwise, convert all Date and DateTime to String.
-    void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         bool have_dates = type_indexes.contains(TypeIndex::Date);
         bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64);
@@ -98,6 +128,8 @@ namespace
                     type = std::make_shared<DataTypeString>();
             }
 
+            type_indexes.erase(TypeIndex::Date);
+            type_indexes.erase(TypeIndex::DateTime);
             return;
         }
 
@@ -108,16 +140,18 @@ namespace
                 if (isDate(type))
                     type = std::make_shared<DataTypeDateTime64>(9);
             }
+
+            type_indexes.erase(TypeIndex::Date);
         }
     }
 
-    /// If we have numbers (Int64/Float64) and String types and numbers were parsed from String,
+    /// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String,
     /// convert all numbers to String.
     void transformJSONNumbersBackToString(
-        DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
+        DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
     {
         bool have_strings = type_indexes.contains(TypeIndex::String);
-        bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64);
+        bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64);
         if (!have_strings || !have_numbers)
             return;
 
@@ -128,36 +162,43 @@ namespace
                     || json_info->numbers_parsed_from_json_strings.contains(type.get())))
                 type = std::make_shared<DataTypeString>();
         }
+
+        updateTypeIndexes(data_types, type_indexes);
     }
 
-    /// If we have both Bool and number (Int64/Float64) types,
-    /// convert all Bool to Int64/Float64.
-    void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    /// If we have both Bool and number (Int64/UInt64/Float64) types,
+    /// convert all Bool to Int64/UInt64/Float64.
+    void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         bool have_floats = type_indexes.contains(TypeIndex::Float64);
-        bool have_integers = type_indexes.contains(TypeIndex::Int64);
+        bool have_signed_integers = type_indexes.contains(TypeIndex::Int64);
+        bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64);
         bool have_bools = type_indexes.contains(TypeIndex::UInt8);
         /// Check if we have both Bool and Integer/Float.
-        if (!have_bools || (!have_integers && !have_floats))
+        if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats))
             return;
 
         for (auto & type : data_types)
         {
             if (isBool(type))
             {
-                if (have_integers)
+                if (have_signed_integers)
                     type = std::make_shared<DataTypeInt64>();
+                else if (have_unsigned_integers)
+                    type = std::make_shared<DataTypeUInt64>();
                 else
                     type = std::make_shared<DataTypeFloat64>();
             }
         }
+
+        type_indexes.erase(TypeIndex::UInt8);
     }
 
     /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types,
     /// convert all Nothing/Nullable(Nothing) types to the first non Nothing.
     /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)]
     /// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))])
-    void transformNothingComplexTypes(DataTypes & data_types)
+    void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         bool have_nothing = false;
         DataTypePtr not_nothing_type = nullptr;
@@ -177,10 +218,12 @@ namespace
             if (isNothing(removeNullable(type)))
                 type = not_nothing_type;
         }
+
+        updateTypeIndexes(data_types, type_indexes);
     }
 
     /// If we have both Nullable and non Nullable types, make all types Nullable
-    void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         if (!type_indexes.contains(TypeIndex::Nullable))
             return;
@@ -190,6 +233,8 @@ namespace
             if (type->canBeInsideNullable())
                 type = makeNullable(type);
         }
+
+        updateTypeIndexes(data_types, type_indexes);
     }
 
     /// If we have Tuple with the same nested types like Tuple(Int64, Int64),
@@ -197,11 +242,12 @@ namespace
     /// For example when we had type Tuple(Int64, Nullable(Nothing)) and we
     /// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will
     /// also transform it to Array(Nullable(Int64))
-    void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         if (!type_indexes.contains(TypeIndex::Tuple))
             return;
 
+        bool remove_tuple_index = true;
         for (auto & type : data_types)
         {
             if (isTuple(type))
@@ -209,8 +255,13 @@ namespace
                 const auto * tuple_type = assert_cast<const DataTypeTuple *>(type.get());
                 if (checkIfTypesAreEqual(tuple_type->getElements()))
                     type = std::make_shared<DataTypeArray>(tuple_type->getElements().back());
+                else
+                    remove_tuple_index = false;
             }
         }
+
+        if (remove_tuple_index)
+            type_indexes.erase(TypeIndex::Tuple);
     }
 
     template <bool is_json>
@@ -221,7 +272,7 @@ namespace
     /// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)]
     /// it will convert them all to Array(String)
     void transformJSONTuplesAndArraysToArrays(
-        DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
+        DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info)
     {
         if (!type_indexes.contains(TypeIndex::Tuple))
             return;
@@ -266,12 +317,14 @@ namespace
                 if (isArray(type) || isTuple(type))
                     type = std::make_shared<DataTypeArray>(nested_types.back());
             }
+
+            type_indexes.erase(TypeIndex::Tuple);
         }
     }
 
     /// If we have Map and Object(JSON) types, convert all Map types to Object(JSON).
     /// If we have Map types with different value types, convert all Map types to Object(JSON)
-    void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformMapsAndObjectsToObjects(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         if (!type_indexes.contains(TypeIndex::Map))
             return;
@@ -298,9 +351,11 @@ namespace
             if (isMap(type))
                 type = std::make_shared<DataTypeObject>("json", true);
         }
+
+        type_indexes.erase(TypeIndex::Map);
     }
 
-    void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes)
+    void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         bool have_maps = type_indexes.contains(TypeIndex::Map);
         bool have_objects = type_indexes.contains(TypeIndex::Object);
@@ -315,19 +370,26 @@ namespace
             if (isMap(type) || isObject(type))
                 type = std::make_shared<DataTypeString>();
         }
+
+        type_indexes.erase(TypeIndex::Map);
+        type_indexes.erase(TypeIndex::Object);
     }
 
     template <bool is_json>
     void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info)
     {
-        auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
+        auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
         {
             /// Remove all Nothing type if possible.
             transformNothingSimpleTypes(data_types, type_indexes);
 
-            /// Transform integers to floats if needed.
             if (settings.try_infer_integers)
+            {
+                /// Transform Int64 to UInt64 if needed.
+                transformIntegers(data_types, type_indexes);
+                /// Transform integers to floats if needed.
                 transformIntegersAndFloatsToFloats(data_types, type_indexes);
+            }
 
             /// Transform Date to DateTime or both to String if needed.
             if (settings.try_infer_dates || settings.try_infer_datetimes)
@@ -347,14 +409,14 @@ namespace
                 transformBoolsAndNumbersToNumbers(data_types, type_indexes);
         };
 
-        auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes)
+        auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes)
         {
             /// Make types Nullable if needed.
             transformNullableTypes(data_types, type_indexes);
 
             /// If we have type Nothing, it means that we had empty Array/Map while inference.
             /// If there is at least one non Nothing type, change all Nothing types to it.
-            transformNothingComplexTypes(data_types);
+            transformNothingComplexTypes(data_types, type_indexes);
 
             if constexpr (!is_json)
                 return;
@@ -571,10 +633,28 @@ namespace
                 char * int_end = buf.position();
                 /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof.
                 buf.position() = number_start;
+
+                bool read_uint = false;
+                char * uint_end = nullptr;
+                /// In case of Int64 overflow we can try to infer UInt64.
+                if (!read_int)
+                {
+                    UInt64 tmp_uint;
+                    read_uint = tryReadIntText(tmp_uint, buf);
+                    /// If we reached eof, it cannot be float (it requires no less data than integer)
+                    if (buf.eof())
+                        return read_uint ? std::make_shared<DataTypeUInt64>() : nullptr;
+
+                    uint_end = buf.position();
+                    buf.position() = number_start;
+                }
+
                 if (tryReadFloatText(tmp_float, buf))
                 {
                     if (read_int && buf.position() == int_end)
                         return std::make_shared<DataTypeInt64>();
+                    if (read_uint && buf.position() == uint_end)
+                        return std::make_shared<DataTypeUInt64>();
                     return std::make_shared<DataTypeFloat64>();
                 }
 
@@ -590,6 +670,19 @@ namespace
             bool read_int = tryReadIntText(tmp_int, peekable_buf);
             auto * int_end = peekable_buf.position();
             peekable_buf.rollbackToCheckpoint(true);
+
+            bool read_uint = false;
+            char * uint_end = nullptr;
+            /// In case of Int64 overflow we can try to infer UInt64.
+            if (!read_int)
+            {
+                PeekableReadBufferCheckpoint new_checkpoint(peekable_buf);
+                UInt64 tmp_uint;
+                read_uint = tryReadIntText(tmp_uint, peekable_buf);
+                uint_end = peekable_buf.position();
+                peekable_buf.rollbackToCheckpoint(true);
+            }
+
             if (tryReadFloatText(tmp_float, peekable_buf))
             {
                 /// Float parsing reads no fewer bytes than integer parsing,
@@ -597,6 +690,8 @@ namespace
                 /// If it's the same, then it's integer.
                 if (read_int && peekable_buf.position() == int_end)
                     return std::make_shared<DataTypeInt64>();
+                if (read_uint && peekable_buf.position() == uint_end)
+                    return std::make_shared<DataTypeUInt64>();
                 return std::make_shared<DataTypeFloat64>();
             }
         }
@@ -874,6 +969,11 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting
         Int64 tmp_int;
         if (tryReadIntText(tmp_int, buf) && buf.eof())
             return std::make_shared<DataTypeInt64>();
+
+        /// In case of Int64 overflow, try to infer UInt64
+        UInt64 tmp_uint;
+        if (tryReadIntText(tmp_uint, buf) && buf.eof())
+            return std::make_shared<DataTypeUInt64>();
     }
 
     /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof.
diff --git a/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference
new file mode 100644
index 00000000000..96a50d75eee
--- /dev/null
+++ b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference
@@ -0,0 +1,12 @@
+c1	Nullable(UInt64)					
+c1	Array(Nullable(UInt64))					
+c1	Nullable(UInt64)					
+c1	Nullable(UInt64)					
+c1	Array(Nullable(UInt64))					
+c1	Nullable(UInt64)					
+number	Nullable(UInt64)					
+number	Array(Nullable(UInt64))					
+number	Array(Nullable(UInt64))					
+number	Nullable(UInt64)					
+number	Nullable(UInt64)					
+number	Nullable(UInt64)					
diff --git a/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh
new file mode 100755
index 00000000000..4019d2b7a78
--- /dev/null
+++ b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh
@@ -0,0 +1,18 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test";
+echo -ne '"[18446744073709551615, 10, 11]"' | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test";
+echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test  --input-format=CSV -q "desc test";
+echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test";
+echo -ne "[18446744073709551615, 10, 11]" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test";
+echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test";
+echo -ne '{"number" : 18446744073709551615}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
+echo -ne '{"number" : [18446744073709551615, 10, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
+echo -ne '{"number" : [18446744073709551615, true, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
+echo -ne '{"number" : 18446744073709551615}, {"number" : 10}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
+echo -ne '{"number" : 18446744073709551615}, {"number" : false}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";
+echo -ne '{"number" : "18446744073709551615"}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test";

From 18214c85654223b3d3913717718a4f06ff071a87 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Mon, 2 Jan 2023 15:05:43 +0100
Subject: [PATCH 081/262] Updated test to use custom database - 40907
 Parameterized views as table functions

---
 .../0_stateless/02428_parameterized_view.sql  | 21 +++++++++++--------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index d2118cd1279..b2d4f99a5f1 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -50,15 +50,17 @@ SELECT Price FROM pv3(price=10);
 
 CREATE VIEW pv4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN}
 
-CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+CREATE DATABASE test_02428;
 
-INSERT INTO system.Catalog VALUES ('Pen', 10, 3);
-INSERT INTO system.Catalog VALUES ('Book', 50, 2);
-INSERT INTO system.Catalog VALUES ('Paper', 20, 1);
+CREATE TABLE test_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 
-CREATE VIEW system.pv1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64};
-SELECT Price FROM system.pv1(price=20);
-SELECT Price FROM `system.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION }
+INSERT INTO test_02428.Catalog VALUES ('Pen', 10, 3);
+INSERT INTO test_02428.Catalog VALUES ('Book', 50, 2);
+INSERT INTO test_02428.Catalog VALUES ('Paper', 20, 1);
+
+CREATE VIEW test_02428.pv1 AS SELECT * FROM test_02428.Catalog WHERE Price={price:UInt64};
+SELECT Price FROM test_02428.pv1(price=20);
+SELECT Price FROM `test_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION }
 
 INSERT INTO Catalog VALUES ('Book2', 30, 8);
 INSERT INTO Catalog VALUES ('Book3', 30, 8);
@@ -80,5 +82,6 @@ DROP VIEW pv6;
 DROP VIEW pv7;
 DROP VIEW v1;
 DROP TABLE Catalog;
-DROP TABLE system.pv1;
-DROP TABLE system.Catalog;
\ No newline at end of file
+DROP TABLE test_02428.pv1;
+DROP TABLE test_02428.Catalog;
+DROP DATABASE test_02428;
\ No newline at end of file

From 73fecae5ffed84ceca78f902e8f6967447b3863a Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Mon, 2 Jan 2023 15:31:07 +0000
Subject: [PATCH 082/262] Fix comments

---
 src/Formats/JSONUtils.cpp            | 4 ++--
 src/Formats/SchemaInferenceUtils.cpp | 5 +++--
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp
index 384619dba1d..574759b0c07 100644
--- a/src/Formats/JSONUtils.cpp
+++ b/src/Formats/JSONUtils.cpp
@@ -138,7 +138,7 @@ namespace JSONUtils
         while (!in.eof() && *in.position() != '}')
         {
             if (!first)
-                skipComma(in);
+                assertChar(',', in);
             else
                 first = false;
 
@@ -166,7 +166,7 @@ namespace JSONUtils
         while (!in.eof() && *in.position() != ']')
         {
             if (!first)
-                skipComma(in);
+                assertChar(',', in);
             else
                 first = false;
             auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info);
diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 4bcbae1e9ea..77ef2e8f27a 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -104,7 +104,8 @@ namespace
 
         for (auto & type : data_types)
         {
-            if (isInteger(type))
+            WhichDataType which(type);
+            if (which.isFloat64() || which.isInt64() || which.isUInt64())
                 type = std::make_shared<DataTypeFloat64>();
         }
 
@@ -631,7 +632,7 @@ namespace
                     return read_int ? std::make_shared<DataTypeInt64>() : nullptr;
 
                 char * int_end = buf.position();
-                /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof.
+                /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof.
                 buf.position() = number_start;
 
                 bool read_uint = false;

From f6deea1365037b3e091a07941746538805b493a1 Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Mon, 2 Jan 2023 16:51:11 +0100
Subject: [PATCH 083/262] Try fix build

---
 src/Core/Settings.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 4c14fea742b..86d86669db8 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -592,7 +592,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
-    M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
+    M(StorageFileReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
     M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \

From 6ae0ffe8d2da413c4ab7e71de3690c14bbe87d17 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 4 Nov 2022 17:12:14 +0000
Subject: [PATCH 084/262] Implement optimize_redundant_functions_in_order_by

---
 ...ptimizeRedundantFunctionsInOrderByPass.cpp | 121 ++++++++++++++++++
 .../OptimizeRedundantFunctionsInOrderByPass.h |  23 ++++
 src/Analyzer/QueryTreePassManager.cpp         |   5 +-
 3 files changed, 148 insertions(+), 1 deletion(-)
 create mode 100644 src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
 create mode 100644 src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
new file mode 100644
index 00000000000..1359bd2616d
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -0,0 +1,121 @@
+#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
+#include <Analyzer/ColumnNode.h>
+#include <Analyzer/FunctionNode.h>
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/QueryNode.h>
+#include <Analyzer/SortNode.h>
+#include <Functions/IFunction.h>
+
+namespace DB
+{
+
+namespace
+{
+
+class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
+{
+
+    struct RedundancyVerdict
+    {
+        bool redundant = true;
+        bool done = false;
+    };
+
+    static constexpr RedundancyVerdict makeNonRedundant() noexcept { return { .redundant = false, .done = true }; }
+
+    std::unordered_set<String> existing_keys;
+
+    RedundancyVerdict isRedundantExpression(FunctionNode * function)
+    {
+        if (function->getArguments().getNodes().empty())
+            return makeNonRedundant();
+
+        if (function->getFunction()->isDeterministicInScopeOfQuery())
+            return makeNonRedundant();
+
+        // TODO: handle constants here
+        for (auto & arg : function->getArguments().getNodes())
+        {
+            switch (arg->getNodeType())
+            {
+                case QueryTreeNodeType::FUNCTION:
+                {
+                    auto subresult = isRedundantExpression(arg->as<FunctionNode>());
+                    if (subresult.done)
+                        return subresult;
+                    break;
+                }
+                case QueryTreeNodeType::COLUMN:
+                {
+                    auto * column = arg->as<ColumnNode>();
+                    if (!existing_keys.contains(column->getColumnName()))
+                        return makeNonRedundant();
+                    break;
+                }
+                default:
+                    return makeNonRedundant();
+            }
+        }
+
+        return {};
+    }
+
+public:
+    bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
+    {
+        if (node->as<FunctionNode>())
+            return false;
+        return true;
+    }
+
+    void visitImpl(QueryTreeNodePtr & node)
+    {
+        auto * query = node->as<QueryNode>();
+        if (!query)
+            return;
+
+        if (!query->hasOrderBy())
+            return;
+
+        auto & order_by = query->getOrderBy();
+        for (auto & elem : order_by.getNodes())
+        {
+            auto * order_by_elem = elem->as<SortNode>();
+            if (order_by_elem->withFill())
+                return;
+        }
+
+        QueryTreeNodes new_order_by;
+        new_order_by.reserve(order_by.getNodes().size());
+
+        for (auto & elem : order_by.getNodes())
+        {
+            auto * order_by_elem = elem->as<SortNode>();
+            if (auto * expr = order_by_elem->getExpression()->as<FunctionNode>())
+            {
+                if (isRedundantExpression(expr).redundant)
+                    continue;
+            }
+            else
+            {
+                auto * column = elem->as<ColumnNode>();
+                existing_keys.insert(column->getColumnName());
+            }
+
+            new_order_by.push_back(elem);
+        }
+        existing_keys.clear();
+
+        if (new_order_by.size() < order_by.getNodes().size())
+            order_by.getNodes() = std::move(new_order_by);
+    }
+};
+
+}
+
+void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/)
+{
+    OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node);
+}
+
+}
diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
new file mode 100644
index 00000000000..609a6360d27
--- /dev/null
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <Analyzer/IQueryTreePass.h>
+
+namespace DB
+{
+
+/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x.
+  * Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y
+  * in case if f(), g(), h(), t() are deterministic (in scope of query).
+  * Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x).
+  */
+class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass
+{
+public:
+    String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; }
+
+    String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; }
+
+    void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override;
+};
+
+}
diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp
index 06a1fec4698..dd14fc269f9 100644
--- a/src/Analyzer/QueryTreePassManager.cpp
+++ b/src/Analyzer/QueryTreePassManager.cpp
@@ -15,6 +15,7 @@
 #include <Analyzer/Passes/OrderByLimitByDuplicateEliminationPass.h>
 #include <Analyzer/Passes/FuseFunctionsPass.h>
 #include <Analyzer/Passes/IfTransformStringsToEnumPass.h>
+#include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
 
 #include <IO/WriteHelpers.h>
 #include <IO/Operators.h>
@@ -91,7 +92,6 @@ public:
   * TODO: Support setting optimize_move_functions_out_of_any.
   * TODO: Support setting optimize_aggregators_of_group_by_keys.
   * TODO: Support setting optimize_duplicate_order_by_and_distinct.
-  * TODO: Support setting optimize_redundant_functions_in_order_by.
   * TODO: Support setting optimize_monotonous_functions_in_order_by.
   * TODO: Support settings.optimize_or_like_chain.
   * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column).
@@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager)
     if (settings.optimize_if_chain_to_multiif)
         manager.addPass(std::make_unique<IfChainToMultiIfPass>());
 
+    if (settings.optimize_redundant_functions_in_order_by)
+        manager.addPass(std::make_unique<OptimizeRedundantFunctionsInOrderByPass>());
+
     manager.addPass(std::make_unique<OrderByTupleEliminationPass>());
     manager.addPass(std::make_unique<OrderByLimitByDuplicateEliminationPass>());
 

From 6e8191367c7d21a7f439d6be416173829a5bae29 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Fri, 4 Nov 2022 19:04:16 +0000
Subject: [PATCH 085/262] Fixup

---
 .../Passes/OptimizeRedundantFunctionsInOrderByPass.cpp         | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index 1359bd2616d..0337b5239d7 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -96,9 +96,8 @@ public:
                 if (isRedundantExpression(expr).redundant)
                     continue;
             }
-            else
+            else if (auto * column = elem->as<ColumnNode>())
             {
-                auto * column = elem->as<ColumnNode>();
                 existing_keys.insert(column->getColumnName());
             }
 

From 0ecf6164ac77992fe83b434397076bd3858ad1df Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 7 Nov 2022 15:00:18 +0000
Subject: [PATCH 086/262] Add tests

---
 ...ptimizeRedundantFunctionsInOrderByPass.cpp |   8 +-
 ..._redundant_functions_in_order_by.reference | 191 ++++++++++++++++++
 .../01323_redundant_functions_in_order_by.sql |  12 ++
 3 files changed, 207 insertions(+), 4 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index 0337b5239d7..140129a9fb2 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -30,7 +30,7 @@ class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisito
         if (function->getArguments().getNodes().empty())
             return makeNonRedundant();
 
-        if (function->getFunction()->isDeterministicInScopeOfQuery())
+        if (!function->getFunction()->isDeterministicInScopeOfQuery())
             return makeNonRedundant();
 
         // TODO: handle constants here
@@ -90,13 +90,13 @@ public:
 
         for (auto & elem : order_by.getNodes())
         {
-            auto * order_by_elem = elem->as<SortNode>();
-            if (auto * expr = order_by_elem->getExpression()->as<FunctionNode>())
+            auto & order_by_expr = elem->as<SortNode>()->getExpression();
+            if (auto * expr = order_by_expr->as<FunctionNode>())
             {
                 if (isRedundantExpression(expr).redundant)
                     continue;
             }
-            else if (auto * column = elem->as<ColumnNode>())
+            else if (auto * column = order_by_expr->as<ColumnNode>())
             {
                 existing_keys.insert(column->getColumnName());
             }
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
index b32ad433730..88703af7def 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
@@ -1,6 +1,15 @@
 [0,1,2]
 [0,1,2]
 [0,1,2]
+[0,1,2]
+[0,1,2]
+[0,1,2]
+0	0	0	0
+0	1	1	1
+2	2	2	2
+3	3	3	3
+4	0		0
+5	0		0
 0	0	0	0
 0	1	1	1
 2	2	2	2
@@ -15,6 +24,14 @@
 1	1
 2	2
 3	3
+0	0
+1	1
+2	2
+3	3
+0	0
+1	1
+2	2
+3	3
 SELECT groupArray(x)
 FROM
 (
@@ -22,6 +39,32 @@ FROM
     FROM numbers(3)
     ORDER BY x ASC
 )
+QUERY id: 0
+  PROJECTION COLUMNS
+    groupArray(x) Array(UInt64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
+  JOIN TREE
+    QUERY id: 5, is_subquery: 1
+      PROJECTION COLUMNS
+        x UInt64
+      PROJECTION
+        LIST id: 6, nodes: 1
+          COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+      JOIN TREE
+        TABLE_FUNCTION id: 8, table_function_name: numbers
+          ARGUMENTS
+            LIST id: 9, nodes: 1
+              CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+      ORDER BY
+        LIST id: 11, nodes: 1
+          SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
 SELECT groupArray(x)
 FROM
 (
@@ -29,6 +72,32 @@ FROM
     FROM numbers(3)
     ORDER BY x ASC
 )
+QUERY id: 0
+  PROJECTION COLUMNS
+    groupArray(x) Array(UInt64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
+  JOIN TREE
+    QUERY id: 5, is_subquery: 1
+      PROJECTION COLUMNS
+        x UInt64
+      PROJECTION
+        LIST id: 6, nodes: 1
+          COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+      JOIN TREE
+        TABLE_FUNCTION id: 8, table_function_name: numbers
+          ARGUMENTS
+            LIST id: 9, nodes: 1
+              CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+      ORDER BY
+        LIST id: 11, nodes: 1
+          SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
 SELECT groupArray(x)
 FROM
 (
@@ -38,6 +107,38 @@ FROM
         exp(x) ASC,
         x ASC
 )
+QUERY id: 0
+  PROJECTION COLUMNS
+    groupArray(x) Array(UInt64)
+  PROJECTION
+    LIST id: 1, nodes: 1
+      FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64)
+        ARGUMENTS
+          LIST id: 3, nodes: 1
+            COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5
+  JOIN TREE
+    QUERY id: 5, is_subquery: 1
+      PROJECTION COLUMNS
+        x UInt64
+      PROJECTION
+        LIST id: 6, nodes: 1
+          COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+      JOIN TREE
+        TABLE_FUNCTION id: 8, table_function_name: numbers
+          ARGUMENTS
+            LIST id: 9, nodes: 1
+              CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8
+      ORDER BY
+        LIST id: 11, nodes: 2
+          SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              FUNCTION id: 13, function_name: exp, function_type: ordinary, result_type: Float64
+                ARGUMENTS
+                  LIST id: 14, nodes: 1
+                    COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
+          SORT id: 15, sort_direction: ASCENDING, with_fill: 0
+            EXPRESSION
+              COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8
 SELECT
     key,
     a,
@@ -52,6 +153,53 @@ ALL FULL OUTER JOIN test AS t USING (key)
 ORDER BY
     key ASC,
     t.key ASC
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+    a UInt8
+    b String
+    c Float64
+  PROJECTION
+    LIST id: 1, nodes: 4
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 5
+      COLUMN id: 6, column_name: b, result_type: String, source_id: 5
+      COLUMN id: 7, column_name: c, result_type: Float64, source_id: 5
+  JOIN TREE
+    JOIN id: 8, strictness: ALL, kind: FULL
+      LEFT TABLE EXPRESSION
+        QUERY id: 3, alias: s, is_subquery: 1
+          PROJECTION COLUMNS
+            key UInt64
+          PROJECTION
+            LIST id: 9, nodes: 1
+              FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64
+                ARGUMENTS
+                  LIST id: 11, nodes: 2
+                    COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13
+                    CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8
+          JOIN TREE
+            TABLE_FUNCTION id: 13, table_function_name: numbers
+              ARGUMENTS
+                LIST id: 15, nodes: 1
+                  CONSTANT id: 16, constant_value: UInt64_4, constant_value_type: UInt8
+      RIGHT TABLE EXPRESSION
+        TABLE id: 5, alias: t, table_name: default.test
+      JOIN EXPRESSION
+        LIST id: 17, nodes: 1
+          COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8
+            EXPRESSION
+              LIST id: 19, nodes: 2
+                COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 3
+                COLUMN id: 21, column_name: key, result_type: UInt64, source_id: 5
+  ORDER BY
+    LIST id: 22, nodes: 2
+      SORT id: 23, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 24, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 25, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 26, column_name: key, result_type: UInt64, source_id: 5
 SELECT
     key,
     a
@@ -59,6 +207,24 @@ FROM test
 ORDER BY
     key ASC,
     a ASC
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+    a UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test
+  ORDER BY
+    LIST id: 5, nodes: 2
+      SORT id: 6, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 7, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
 SELECT
     key,
     a
@@ -66,6 +232,31 @@ FROM test
 ORDER BY
     key ASC,
     exp(key + a) ASC
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+    a UInt8
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test
+  ORDER BY
+    LIST id: 5, nodes: 2
+      SORT id: 6, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 7, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          FUNCTION id: 8, function_name: exp, function_type: ordinary, result_type: Float64
+            ARGUMENTS
+              LIST id: 9, nodes: 1
+                FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64
+                  ARGUMENTS
+                    LIST id: 11, nodes: 2
+                      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+                      COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
 [0,1,2]
 [0,1,2]
 [0,1,2]
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
index c810567f73a..9e87b5e1da4 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
@@ -6,17 +6,29 @@ INSERT INTO test SELECT number, number, toString(number), number from numbers(4)
 set optimize_redundant_functions_in_order_by = 1;
 
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
+SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)) SETTINGS allow_experimental_analyzer=1;
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x)));
+SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))) SETTINGS allow_experimental_analyzer=1;
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x);
+SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x) SETTINGS allow_experimental_analyzer=1;
 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key;
+SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key SETTINGS allow_experimental_analyzer=1;
 SELECT key, a FROM test ORDER BY key, a, exp(key + a);
+SELECT key, a FROM test ORDER BY key, a, exp(key + a) SETTINGS allow_experimental_analyzer=1;
 SELECT key, a FROM test ORDER BY key, exp(key + a);
+SELECT key, a FROM test ORDER BY key, exp(key + a) SETTINGS allow_experimental_analyzer=1;
 EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
+EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
 EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x)));
+EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x)));
 EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x);
+EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x);
 EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key;
+EXPLAIN QUERY TREE run_passes=1 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key;
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a);
+EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a);
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a);
+EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a);
 
 set optimize_redundant_functions_in_order_by = 0;
 

From 0a42d698aca0ac9b7e9ae97a2074d164a4af1c22 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 2 Jan 2023 18:49:29 +0000
Subject: [PATCH 087/262] Fixes after review

---
 ...ptimizeRedundantFunctionsInOrderByPass.cpp | 17 ++++-----
 ..._redundant_functions_in_order_by.reference | 36 +++++++++++++++----
 .../01323_redundant_functions_in_order_by.sql |  1 +
 3 files changed, 39 insertions(+), 15 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index 140129a9fb2..aaa777de13c 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -1,3 +1,4 @@
+#include <string_view>
 #include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
 #include <Analyzer/ColumnNode.h>
 #include <Analyzer/FunctionNode.h>
@@ -23,14 +24,14 @@ class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisito
 
     static constexpr RedundancyVerdict makeNonRedundant() noexcept { return { .redundant = false, .done = true }; }
 
-    std::unordered_set<String> existing_keys;
+    std::unordered_set<std::string_view> existing_keys;
 
     RedundancyVerdict isRedundantExpression(FunctionNode * function)
     {
         if (function->getArguments().getNodes().empty())
             return makeNonRedundant();
-
-        if (!function->getFunction()->isDeterministicInScopeOfQuery())
+        const auto & function_base = function->getFunction();
+        if (!function_base || !function_base->isDeterministicInScopeOfQuery())
             return makeNonRedundant();
 
         // TODO: handle constants here
@@ -85,8 +86,8 @@ public:
                 return;
         }
 
-        QueryTreeNodes new_order_by;
-        new_order_by.reserve(order_by.getNodes().size());
+        QueryTreeNodes new_order_by_nodes;
+        new_order_by_nodes.reserve(order_by.getNodes().size());
 
         for (auto & elem : order_by.getNodes())
         {
@@ -101,12 +102,12 @@ public:
                 existing_keys.insert(column->getColumnName());
             }
 
-            new_order_by.push_back(elem);
+            new_order_by_nodes.push_back(elem);
         }
         existing_keys.clear();
 
-        if (new_order_by.size() < order_by.getNodes().size())
-            order_by.getNodes() = std::move(new_order_by);
+        if (new_order_by_nodes.size() < order_by.getNodes().size())
+            order_by.getNodes() = std::move(new_order_by_nodes);
     }
 };
 
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
index 88703af7def..ae160ed35d6 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
@@ -190,16 +190,16 @@ QUERY id: 0
           COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8
             EXPRESSION
               LIST id: 19, nodes: 2
-                COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 3
-                COLUMN id: 21, column_name: key, result_type: UInt64, source_id: 5
+                COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+                COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 5
   ORDER BY
-    LIST id: 22, nodes: 2
-      SORT id: 23, sort_direction: ASCENDING, with_fill: 0
+    LIST id: 21, nodes: 2
+      SORT id: 22, sort_direction: ASCENDING, with_fill: 0
         EXPRESSION
-          COLUMN id: 24, column_name: key, result_type: UInt64, source_id: 3
-      SORT id: 25, sort_direction: ASCENDING, with_fill: 0
+          COLUMN id: 23, column_name: key, result_type: UInt64, source_id: 3
+      SORT id: 24, sort_direction: ASCENDING, with_fill: 0
         EXPRESSION
-          COLUMN id: 26, column_name: key, result_type: UInt64, source_id: 5
+          COLUMN id: 25, column_name: key, result_type: UInt64, source_id: 5
 SELECT
     key,
     a
@@ -257,6 +257,28 @@ QUERY id: 0
                     LIST id: 11, nodes: 2
                       COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
                       COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3
+QUERY id: 0
+  PROJECTION COLUMNS
+    key UInt64
+  PROJECTION
+    LIST id: 1, nodes: 1
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+  JOIN TREE
+    TABLE id: 3, table_name: default.test
+  GROUP BY
+    LIST id: 4, nodes: 1
+      COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+  ORDER BY
+    LIST id: 5, nodes: 2
+      SORT id: 6, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          FUNCTION id: 7, function_name: avg, function_type: aggregate, result_type: Float64
+            ARGUMENTS
+              LIST id: 8, nodes: 1
+                COLUMN id: 9, column_name: a, result_type: UInt8, source_id: 3
+      SORT id: 10, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
 [0,1,2]
 [0,1,2]
 [0,1,2]
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
index 9e87b5e1da4..3573773b76c 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
@@ -29,6 +29,7 @@ EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a);
 EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a);
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a);
 EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a);
+EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key;
 
 set optimize_redundant_functions_in_order_by = 0;
 

From 09d9ac6c8ef253d12c34e393970f20a8205eef88 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Mon, 2 Jan 2023 19:09:45 +0000
Subject: [PATCH 088/262] Simplify code

---
 ...ptimizeRedundantFunctionsInOrderByPass.cpp | 28 ++++++-------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index aaa777de13c..29724de0f20 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -15,24 +15,15 @@ namespace
 
 class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
 {
-
-    struct RedundancyVerdict
-    {
-        bool redundant = true;
-        bool done = false;
-    };
-
-    static constexpr RedundancyVerdict makeNonRedundant() noexcept { return { .redundant = false, .done = true }; }
-
     std::unordered_set<std::string_view> existing_keys;
 
-    RedundancyVerdict isRedundantExpression(FunctionNode * function)
+    bool isRedundantExpression(FunctionNode * function)
     {
         if (function->getArguments().getNodes().empty())
-            return makeNonRedundant();
+            return false;
         const auto & function_base = function->getFunction();
         if (!function_base || !function_base->isDeterministicInScopeOfQuery())
-            return makeNonRedundant();
+            return false;
 
         // TODO: handle constants here
         for (auto & arg : function->getArguments().getNodes())
@@ -41,24 +32,23 @@ class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisito
             {
                 case QueryTreeNodeType::FUNCTION:
                 {
-                    auto subresult = isRedundantExpression(arg->as<FunctionNode>());
-                    if (subresult.done)
-                        return subresult;
+                    if (!isRedundantExpression(arg->as<FunctionNode>()))
+                        return false;
                     break;
                 }
                 case QueryTreeNodeType::COLUMN:
                 {
                     auto * column = arg->as<ColumnNode>();
                     if (!existing_keys.contains(column->getColumnName()))
-                        return makeNonRedundant();
+                        return false;
                     break;
                 }
                 default:
-                    return makeNonRedundant();
+                    return false;
             }
         }
 
-        return {};
+        return true;
     }
 
 public:
@@ -94,7 +84,7 @@ public:
             auto & order_by_expr = elem->as<SortNode>()->getExpression();
             if (auto * expr = order_by_expr->as<FunctionNode>())
             {
-                if (isRedundantExpression(expr).redundant)
+                if (isRedundantExpression(expr))
                     continue;
             }
             else if (auto * column = order_by_expr->as<ColumnNode>())

From 1f89db78a5876c718a6f865814d2f6bb58319d66 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 3 Jan 2023 11:00:39 +0100
Subject: [PATCH 089/262] Added is_create_parameterized_view flag in
 ActionsVisitor, added functions for column manipulation for parameterized
 view in StorageView, updated tests to fix flaky test issues and addressed
 small review comments- 40907 Parameterized views as table functions

---
 .../sql-reference/statements/create/view.md   |   2 +-
 src/Interpreters/ActionsVisitor.cpp           |   6 +-
 src/Interpreters/ActionsVisitor.h             |   4 +-
 src/Interpreters/ExpressionAnalyzer.cpp       |  19 ++-
 src/Interpreters/ExpressionAnalyzer.h         |   7 +-
 .../TranslateQualifiedNamesVisitor.cpp        |  22 ++--
 src/Interpreters/TreeRewriter.cpp             |  22 +---
 src/Parsers/FunctionParameterValuesVisitor.h  |   1 -
 src/Storages/StorageSnapshot.cpp              |  17 +--
 src/Storages/StorageView.cpp                  |  37 ++++++
 src/Storages/StorageView.h                    |   2 +
 .../02428_parameterized_view.reference        |   4 +-
 .../0_stateless/02428_parameterized_view.sql  | 123 +++++++++---------
 13 files changed, 149 insertions(+), 117 deletions(-)

diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md
index b69d09dd266..91f542be285 100644
--- a/docs/en/sql-reference/statements/create/view.md
+++ b/docs/en/sql-reference/statements/create/view.md
@@ -37,7 +37,7 @@ SELECT a, b, c FROM (SELECT ...)
 ```
 
 ## Parameterized View
-This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes.
+Parametrized views are similar to normal views, but can be created with parameters which are not resolved immediately. These views can be used with table functions, which specify the name of the view as function name and the parameter values as its arguments.
 
 ``` sql
 CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ...
diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp
index 7e4fa5d168a..e1af752b100 100644
--- a/src/Interpreters/ActionsVisitor.cpp
+++ b/src/Interpreters/ActionsVisitor.cpp
@@ -538,7 +538,8 @@ ActionsMatcher::Data::Data(
     bool only_consts_,
     bool create_source_for_in_,
     AggregationKeysInfo aggregation_keys_info_,
-    bool build_expression_with_window_functions_)
+    bool build_expression_with_window_functions_,
+    bool is_create_parameterized_view_)
     : WithContext(context_)
     , set_size_limit(set_size_limit_)
     , subquery_depth(subquery_depth_)
@@ -552,6 +553,7 @@ ActionsMatcher::Data::Data(
     , actions_stack(std::move(actions_dag), context_)
     , aggregation_keys_info(aggregation_keys_info_)
     , build_expression_with_window_functions(build_expression_with_window_functions_)
+    , is_create_parameterized_view(is_create_parameterized_view_)
     , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1)
 {
 }
@@ -1205,7 +1207,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data &
                 argument_types.push_back(column.type);
                 argument_names.push_back(column.name);
             }
-            else if (query_parameter)
+            else if (data.is_create_parameterized_view && query_parameter)
             {
                 const auto data_type = DataTypeFactory::instance().get(query_parameter->type);
                 ColumnWithTypeAndName column(data_type,query_parameter->getColumnName());
diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h
index fea013fd075..0269371b46e 100644
--- a/src/Interpreters/ActionsVisitor.h
+++ b/src/Interpreters/ActionsVisitor.h
@@ -134,6 +134,7 @@ public:
         ScopeStack actions_stack;
         AggregationKeysInfo aggregation_keys_info;
         bool build_expression_with_window_functions;
+        bool is_create_parameterized_view;
 
         /*
          * Remember the last unique column suffix to avoid quadratic behavior
@@ -154,7 +155,8 @@ public:
             bool only_consts_,
             bool create_source_for_in_,
             AggregationKeysInfo aggregation_keys_info_,
-            bool build_expression_with_window_functions_ = false);
+            bool build_expression_with_window_functions_ = false,
+            bool is_create_parameterized_view_ = false);
 
         /// Does result of the calculation already exists in the block.
         bool hasColumn(const String & column_name) const;
diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp
index 1220eca4e45..98c2a49dbaa 100644
--- a/src/Interpreters/ExpressionAnalyzer.cpp
+++ b/src/Interpreters/ExpressionAnalyzer.cpp
@@ -159,11 +159,13 @@ ExpressionAnalyzer::ExpressionAnalyzer(
     size_t subquery_depth_,
     bool do_global,
     bool is_explain,
-    PreparedSetsPtr prepared_sets_)
+    PreparedSetsPtr prepared_sets_,
+    bool is_create_parameterized_view_)
     : WithContext(context_)
     , query(query_), settings(getContext()->getSettings())
     , subquery_depth(subquery_depth_)
     , syntax(syntax_analyzer_result_)
+    , is_create_parameterized_view(is_create_parameterized_view_)
 {
     /// Cache prepared sets because we might run analysis multiple times
     if (prepared_sets_)
@@ -556,7 +558,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_
         only_consts,
         !isRemoteStorage() /* create_source_for_in */,
         getAggregationKeysInfo(),
-        false /* build_expression_with_window_functions */);
+        false /* build_expression_with_window_functions */,
+        is_create_parameterized_view);
     ActionsVisitor(visitor_data, log.stream()).visit(ast);
     actions = visitor_data.getActions();
 }
@@ -575,7 +578,9 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP
         true /* no_makeset */,
         only_consts,
         !isRemoteStorage() /* create_source_for_in */,
-        getAggregationKeysInfo());
+        getAggregationKeysInfo(),
+        false /* build_expression_with_window_functions */,
+        is_create_parameterized_view);
     ActionsVisitor(visitor_data, log.stream()).visit(ast);
     actions = visitor_data.getActions();
 }
@@ -596,7 +601,9 @@ void ExpressionAnalyzer::getRootActionsForHaving(
         false /* no_makeset */,
         only_consts,
         true /* create_source_for_in */,
-        getAggregationKeysInfo());
+        getAggregationKeysInfo(),
+        false /* build_expression_with_window_functions */,
+        is_create_parameterized_view);
     ActionsVisitor(visitor_data, log.stream()).visit(ast);
     actions = visitor_data.getActions();
 }
@@ -1319,7 +1326,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain,
 
     ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join);
 
-    getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/);
+    getRootActions(select_query->where(), only_types, step.actions());
 
     auto where_column_name = select_query->where()->getColumnName();
     step.addRequiredOutput(where_column_name);
@@ -1525,7 +1532,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain,
 
     ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns);
 
-    getRootActions(select_query->select(), only_types, step.actions(), false /*only_consts*/);
+    getRootActions(select_query->select(), only_types, step.actions());
 
     for (const auto & child : select_query->select()->children)
         appendSelectSkipWindowExpressions(step, child);
diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h
index ddb41a00f84..ba188fb0198 100644
--- a/src/Interpreters/ExpressionAnalyzer.h
+++ b/src/Interpreters/ExpressionAnalyzer.h
@@ -158,13 +158,15 @@ protected:
         size_t subquery_depth_,
         bool do_global_,
         bool is_explain_,
-        PreparedSetsPtr prepared_sets_);
+        PreparedSetsPtr prepared_sets_,
+        bool is_create_parameterized_view_ = false);
 
     ASTPtr query;
     const ExtractedSettings settings;
     size_t subquery_depth;
 
     TreeRewriterResultPtr syntax;
+    bool is_create_parameterized_view;
 
     const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists.
     const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; }
@@ -318,7 +320,8 @@ public:
             options_.subquery_depth,
             do_global_,
             options_.is_explain,
-            prepared_sets_)
+            prepared_sets_,
+            options_.is_create_parameterized_view)
         , metadata_snapshot(metadata_snapshot_)
         , required_result_columns(required_result_columns_)
         , query_options(options_)
diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
index ff97eccab58..e4ffa1ef3f1 100644
--- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
+++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp
@@ -20,6 +20,7 @@
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTColumnsMatcher.h>
 #include <Parsers/ASTColumnsTransformers.h>
+#include <Storages/StorageView.h>
 
 
 namespace DB
@@ -251,20 +252,13 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt
                         if (first_table || !data.join_using_columns.contains(column.name))
                         {
                             std::string column_name = column.name;
-                            std::string::size_type pos = 0u;
-                            for (const auto & parameter : data.parameter_values)
-                            {
-                                if ((pos = column_name.find(parameter.first)) != std::string::npos)
-                                {
-                                    auto parameter_datatype_iterator = data.parameter_types.find(parameter.first);
-                                    if (parameter_datatype_iterator != data.parameter_types.end())
-                                    {
-                                        String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')");
-                                        column_name.replace(pos, parameter.first.size(), parameter_name);
-                                        break;
-                                    }
-                                }
-                            }
+
+                            /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters
+                            /// and SELECT is used with substitution of these query parameters )
+                            if (!data.parameter_values.empty())
+                                column_name
+                                    = StorageView::replaceQueryParameterWithValue(column_name, data.parameter_values, data.parameter_types);
+
                             addIdentifier(columns, table.table, column_name);
                         }
                     }
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 22df8c1cbe7..bd3472d5dc1 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -53,6 +53,7 @@
 #include <Storages/IStorage.h>
 #include <Storages/StorageJoin.h>
 #include <Common/checkStackSize.h>
+#include <Storages/StorageView.h>
 
 #include <AggregateFunctions/AggregateFunctionFactory.h>
 
@@ -1395,26 +1396,13 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect(
     result.window_function_asts = getWindowFunctions(query, *select_query);
     result.expressions_with_window_function = getExpressionsWithWindowFunctions(query);
 
+    /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters
+    /// and SELECT is used with substitution of these query parameters )
+    /// the replaced column names will be used in the next steps
     if (is_parameterized_view)
     {
         for (auto & column : result.source_columns)
-        {
-            std::string column_name = column.name;
-            std::string::size_type pos = 0u;
-            for (auto & parameter : parameter_values)
-            {
-                if ((pos = column_name.find(parameter.first)) != std::string::npos)
-                {
-                    auto parameter_datatype_iterator = parameter_types.find(parameter.first);
-                    if (parameter_datatype_iterator != parameter_types.end())
-                    {
-                        String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')");
-                        column.name.replace(pos, parameter.first.size(), parameter_name);
-                        break;
-                    }
-                }
-            }
-        }
+            column.name = StorageView::replaceQueryParameterWithValue(column.name, parameter_values, parameter_types);
     }
 
     result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key);
diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h
index f87257fc979..e6ce0e42d06 100644
--- a/src/Parsers/FunctionParameterValuesVisitor.h
+++ b/src/Parsers/FunctionParameterValuesVisitor.h
@@ -1,6 +1,5 @@
 #pragma once
 
-#include <string>
 #include <Core/Names.h>
 #include <Parsers/IAST_fwd.h>
 
diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp
index 00f5160ae11..31770c9a32b 100644
--- a/src/Storages/StorageSnapshot.cpp
+++ b/src/Storages/StorageSnapshot.cpp
@@ -3,6 +3,7 @@
 #include <Storages/IStorage.h>
 #include <DataTypes/ObjectUtils.h>
 #include <DataTypes/NestedUtils.h>
+#include <Storages/StorageView.h>
 #include <sparsehash/dense_hash_set>
 
 namespace DB
@@ -112,25 +113,19 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co
     return *column;
 }
 
-Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const NameToNameMap & parameter_values) const
+Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values) const
 {
     Block res;
 
     const auto & columns = getMetadataForQuery()->getColumns();
     for (const auto & column_name : column_names)
     {
+        std::string substituted_column_name = column_name;
+
         /// substituted_column_name is used for parameterized view (which are created using query parameters
         /// and SELECT is used with substitution of these query parameters )
-        std::string substituted_column_name = column_name;
-        std::string::size_type pos = 0u;
-        for (const auto & parameter : parameter_values)
-        {
-            if ((pos = substituted_column_name.find("_CAST(" + parameter.second)) != std::string::npos)
-            {
-                substituted_column_name = substituted_column_name.substr(0,pos) + parameter.first + ")";
-                break;
-            }
-        }
+        if (!parameter_values.empty())
+            substituted_column_name = StorageView::replaceValueWithQueryParameter(column_name, parameter_values);
 
         auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name);
         auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name);
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index 2446659cebc..df74def509d 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -252,6 +252,43 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_
             child = view_query;
 }
 
+String StorageView::replaceQueryParameterWithValue(const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types)
+{
+    std::string name = column_name;
+    std::string::size_type pos = 0u;
+    for (const auto & parameter : parameter_values)
+    {
+        if ((pos = name.find(parameter.first)) != std::string::npos)
+        {
+            auto parameter_datatype_iterator = parameter_types.find(parameter.first);
+            if (parameter_datatype_iterator != parameter_types.end())
+            {
+                String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')");
+                name.replace(pos, parameter.first.size(), parameter_name);
+                break;
+            }
+            else
+                throw Exception("Datatype not found for query parameter " + parameter.first, ErrorCodes::LOGICAL_ERROR);
+        }
+    }
+    return name;
+}
+
+String StorageView::replaceValueWithQueryParameter(const String & column_name, const NameToNameMap & parameter_values)
+{
+    String name = column_name;
+    std::string::size_type pos = 0u;
+    for (const auto & parameter : parameter_values)
+    {
+        if ((pos = name.find("_CAST(" + parameter.second)) != std::string::npos)
+        {
+            name = name.substr(0,pos) + parameter.first + ")";
+            break;
+        }
+    }
+    return name;
+}
+
 ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name)
 {
     ASTTableExpression * table_expression = getFirstTableExpression(select_query);
diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h
index 756106a95d1..6cd4bb171f5 100644
--- a/src/Storages/StorageView.h
+++ b/src/Storages/StorageView.h
@@ -44,6 +44,8 @@ public:
 
     static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view);
     static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name);
+    static String replaceQueryParameterWithValue (const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types);
+    static String replaceValueWithQueryParameter (const String & column_name, const NameToNameMap & parameter_values);
 
     void setParameterValues (NameToNameMap parameter_values_)
     {
diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index 38355617601..da3ad8a9a3c 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -9,9 +9,9 @@ SELECT
 FROM
 (
     SELECT *
-    FROM default.Catalog
+    FROM default.test_02428_Catalog
     WHERE Price = _CAST(10, \'UInt64\')
-) AS pv1
+) AS test_02428_pv1
 50
 10
 20
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index b2d4f99a5f1..feedaed0c44 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -1,87 +1,90 @@
-DROP VIEW IF EXISTS pv1;
-DROP VIEW IF EXISTS pv2;
-DROP VIEW IF EXISTS pv3;
-DROP VIEW IF EXISTS pv4;
-DROP VIEW IF EXISTS pv5;
-DROP VIEW IF EXISTS pv6;
-DROP VIEW IF EXISTS pv7;
-DROP VIEW IF EXISTS v1;
-DROP TABLE IF EXISTS Catalog;
-DROP TABLE IF EXISTS system.pv1;
-DROP TABLE IF EXISTS system.Catalog;
+-- Tags: no-parallel
 
-CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+DROP VIEW IF EXISTS test_02428_pv1;
+DROP VIEW IF EXISTS test_02428_pv2;
+DROP VIEW IF EXISTS test_02428_pv3;
+DROP VIEW IF EXISTS test_02428_pv4;
+DROP VIEW IF EXISTS test_02428_pv5;
+DROP VIEW IF EXISTS test_02428_pv6;
+DROP VIEW IF EXISTS test_02428_pv7;
+DROP VIEW IF EXISTS test_02428_v1;
+DROP TABLE IF EXISTS test_02428_Catalog;
+DROP TABLE IF EXISTS db_02428.pv1;
+DROP TABLE IF EXISTS db_02428.Catalog;
+DROP DATABASE IF EXISTS db_02428;
 
-INSERT INTO Catalog VALUES ('Pen', 10, 3);
-INSERT INTO Catalog VALUES ('Book', 50, 2);
-INSERT INTO Catalog VALUES ('Paper', 20, 1);
+CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 
-CREATE VIEW pv1 AS SELECT * FROM Catalog WHERE Price={price:UInt64};
-SELECT Price FROM pv1(price=20);
-SELECT Price FROM `pv1`(price=20);
+INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3);
+INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2);
+INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1);
+
+CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64};
+SELECT Price FROM test_02428_pv1(price=20);
+SELECT Price FROM `test_02428_pv1`(price=20);
 
 set param_p=10;
-SELECT Price FROM pv1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
-SELECT Price FROM pv1(price={p:UInt64});
+SELECT Price FROM test_02428_pv1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
+SELECT Price FROM test_02428_pv1(price={p:UInt64});
 
 set param_l=1;
-SELECT Price FROM pv1(price=50) LIMIT ({l:UInt64});
+SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64});
 
-DETACH TABLE pv1;
-ATTACH TABLE pv1;
+DETACH TABLE test_02428_pv1;
+ATTACH TABLE test_02428_pv1;
 
-EXPLAIN SYNTAX SELECT * from pv1(price=10);
+EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10);
 
-INSERT INTO pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED}
+INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED}
 
 SELECT Price FROM pv123(price=20); -- { serverError UNKNOWN_FUNCTION }
 
-CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price=10;
+CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10;
 
-SELECT Price FROM v1(price=10);  -- { serverError UNKNOWN_FUNCTION }
+SELECT Price FROM test_02428_v1(price=10);  -- { serverError UNKNOWN_FUNCTION }
 
-CREATE VIEW pv2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
-SELECT Price FROM pv2(price=50,quantity=2);
+CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
+SELECT Price FROM test_02428_pv2(price=50,quantity=2);
 
-SELECT Price FROM pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
+SELECT Price FROM test_02428_pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
 
-CREATE VIEW pv3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3;
-SELECT Price FROM pv3(price=10);
+CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3;
+SELECT Price FROM test_02428_pv3(price=10);
 
-CREATE VIEW pv4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN}
+CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN}
 
-CREATE DATABASE test_02428;
+CREATE DATABASE db_02428;
 
-CREATE TABLE test_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
+CREATE TABLE db_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
 
-INSERT INTO test_02428.Catalog VALUES ('Pen', 10, 3);
-INSERT INTO test_02428.Catalog VALUES ('Book', 50, 2);
-INSERT INTO test_02428.Catalog VALUES ('Paper', 20, 1);
+INSERT INTO db_02428.Catalog VALUES ('Pen', 10, 3);
+INSERT INTO db_02428.Catalog VALUES ('Book', 50, 2);
+INSERT INTO db_02428.Catalog VALUES ('Paper', 20, 1);
 
-CREATE VIEW test_02428.pv1 AS SELECT * FROM test_02428.Catalog WHERE Price={price:UInt64};
-SELECT Price FROM test_02428.pv1(price=20);
-SELECT Price FROM `test_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION }
+CREATE VIEW db_02428.pv1 AS SELECT * FROM db_02428.Catalog WHERE Price={price:UInt64};
+SELECT Price FROM db_02428.pv1(price=20);
+SELECT Price FROM `db_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION }
 
-INSERT INTO Catalog VALUES ('Book2', 30, 8);
-INSERT INTO Catalog VALUES ('Book3', 30, 8);
+INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8);
+INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8);
 
-CREATE VIEW pv5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64};
-SELECT Price FROM pv5(price=30, quantity=8,limit=1);
+CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64};
+SELECT Price FROM test_02428_pv5(price=30, quantity=8,limit=1);
 
-CREATE VIEW pv6 AS SELECT Price+{price:UInt64} FROM Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64};
-SELECT * FROM pv6(price=10);
+CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64};
+SELECT * FROM test_02428_pv6(price=10);
 
-CREATE VIEW pv7 AS SELECT Price/{price:UInt64} FROM Catalog ORDER BY Price;
-SELECT * FROM pv7(price=10);
+CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price;
+SELECT * FROM test_02428_pv7(price=10);
 
-DROP VIEW pv1;
-DROP VIEW pv2;
-DROP VIEW pv3;
-DROP VIEW pv5;
-DROP VIEW pv6;
-DROP VIEW pv7;
-DROP VIEW v1;
-DROP TABLE Catalog;
-DROP TABLE test_02428.pv1;
-DROP TABLE test_02428.Catalog;
-DROP DATABASE test_02428;
\ No newline at end of file
+DROP VIEW test_02428_pv1;
+DROP VIEW test_02428_pv2;
+DROP VIEW test_02428_pv3;
+DROP VIEW test_02428_pv5;
+DROP VIEW test_02428_pv6;
+DROP VIEW test_02428_pv7;
+DROP VIEW test_02428_v1;
+DROP TABLE test_02428_Catalog;
+DROP TABLE db_02428.pv1;
+DROP TABLE db_02428.Catalog;
+DROP DATABASE db_02428;
\ No newline at end of file

From 11fa29d243fd45e0944b7e06b4c7f53a76238a07 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 3 Jan 2023 17:59:04 +0000
Subject: [PATCH 090/262] Get rid of recursion

---
 ...ptimizeRedundantFunctionsInOrderByPass.cpp | 101 ++++++++++--------
 1 file changed, 58 insertions(+), 43 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index 29724de0f20..8136052cbd5 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -15,42 +15,6 @@ namespace
 
 class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
 {
-    std::unordered_set<std::string_view> existing_keys;
-
-    bool isRedundantExpression(FunctionNode * function)
-    {
-        if (function->getArguments().getNodes().empty())
-            return false;
-        const auto & function_base = function->getFunction();
-        if (!function_base || !function_base->isDeterministicInScopeOfQuery())
-            return false;
-
-        // TODO: handle constants here
-        for (auto & arg : function->getArguments().getNodes())
-        {
-            switch (arg->getNodeType())
-            {
-                case QueryTreeNodeType::FUNCTION:
-                {
-                    if (!isRedundantExpression(arg->as<FunctionNode>()))
-                        return false;
-                    break;
-                }
-                case QueryTreeNodeType::COLUMN:
-                {
-                    auto * column = arg->as<ColumnNode>();
-                    if (!existing_keys.contains(column->getColumnName()))
-                        return false;
-                    break;
-                }
-                default:
-                    return false;
-            }
-        }
-
-        return true;
-    }
-
 public:
     bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
     {
@@ -82,14 +46,22 @@ public:
         for (auto & elem : order_by.getNodes())
         {
             auto & order_by_expr = elem->as<SortNode>()->getExpression();
-            if (auto * expr = order_by_expr->as<FunctionNode>())
+            switch (order_by_expr->getNodeType())
             {
-                if (isRedundantExpression(expr))
-                    continue;
-            }
-            else if (auto * column = order_by_expr->as<ColumnNode>())
-            {
-                existing_keys.insert(column->getColumnName());
+                case QueryTreeNodeType::FUNCTION:
+                {
+                    if (isRedundantExpression(order_by_expr))
+                        continue;
+                    break;
+                }
+                case QueryTreeNodeType::COLUMN:
+                {
+                    auto * column = order_by_expr->as<ColumnNode>();
+                    existing_keys.insert(column->getColumnName());
+                    break;
+                }
+                default:
+                    break;
             }
 
             new_order_by_nodes.push_back(elem);
@@ -99,6 +71,49 @@ public:
         if (new_order_by_nodes.size() < order_by.getNodes().size())
             order_by.getNodes() = std::move(new_order_by_nodes);
     }
+
+private:
+    std::unordered_set<std::string_view> existing_keys;
+
+    bool isRedundantExpression(QueryTreeNodePtr function)
+    {
+        QueryTreeNodes nodes_to_process{ function };
+        while (!nodes_to_process.empty())
+        {
+            auto node = nodes_to_process.back();
+            nodes_to_process.pop_back();
+
+            // TODO: handle constants here
+            switch (node->getNodeType())
+            {
+                case QueryTreeNodeType::FUNCTION:
+                {
+                    auto * function_node = node->as<FunctionNode>();
+                    const auto & function_arguments = function_node->getArguments().getNodes();
+                    if (function_arguments.empty())
+                        return false;
+                    const auto & function_base = function_node->getFunction();
+                    if (!function_base || !function_base->isDeterministicInScopeOfQuery())
+                        return false;
+
+                    // Process arguments in order
+                    for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it)
+                        nodes_to_process.push_back(*it);
+                    break;
+                }
+                case QueryTreeNodeType::COLUMN:
+                {
+                    auto * column = node->as<ColumnNode>();
+                    if (!existing_keys.contains(column->getColumnName()))
+                        return false;
+                    break;
+                }
+                default:
+                    return false;
+            }
+        }
+        return true;
+    }
 };
 
 }

From 15f20cb4e87531e364bc4cd5e34f745fd5387245 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 3 Jan 2023 18:13:31 +0000
Subject: [PATCH 091/262] Fix column comparison

---
 ...ptimizeRedundantFunctionsInOrderByPass.cpp | 10 +++----
 ..._redundant_functions_in_order_by.reference | 28 +++++++++++++++++++
 .../01323_redundant_functions_in_order_by.sql |  9 ++++++
 3 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index 8136052cbd5..105fc0ef00a 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -1,7 +1,7 @@
-#include <string_view>
 #include <Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h>
 #include <Analyzer/ColumnNode.h>
 #include <Analyzer/FunctionNode.h>
+#include <Analyzer/HashUtils.h>
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/SortNode.h>
@@ -56,8 +56,7 @@ public:
                 }
                 case QueryTreeNodeType::COLUMN:
                 {
-                    auto * column = order_by_expr->as<ColumnNode>();
-                    existing_keys.insert(column->getColumnName());
+                    existing_keys.insert(order_by_expr);
                     break;
                 }
                 default:
@@ -73,7 +72,7 @@ public:
     }
 
 private:
-    std::unordered_set<std::string_view> existing_keys;
+    QueryTreeNodePtrWithHashSet existing_keys;
 
     bool isRedundantExpression(QueryTreeNodePtr function)
     {
@@ -103,8 +102,7 @@ private:
                 }
                 case QueryTreeNodeType::COLUMN:
                 {
-                    auto * column = node->as<ColumnNode>();
-                    if (!existing_keys.contains(column->getColumnName()))
+                    if (!existing_keys.contains(node))
                         return false;
                     break;
                 }
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
index ae160ed35d6..c69f8bb2c46 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference
@@ -279,6 +279,34 @@ QUERY id: 0
       SORT id: 10, sort_direction: ASCENDING, with_fill: 0
         EXPRESSION
           COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3
+QUERY id: 0
+  PROJECTION COLUMNS
+    t1.id UInt64
+    t2.id UInt64
+  PROJECTION
+    LIST id: 1, nodes: 2
+      COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3
+      COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5
+  JOIN TREE
+    JOIN id: 6, strictness: ALL, kind: INNER
+      LEFT TABLE EXPRESSION
+        TABLE id: 3, table_name: default.t1
+      RIGHT TABLE EXPRESSION
+        TABLE id: 5, table_name: default.t2
+      JOIN EXPRESSION
+        FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8
+          ARGUMENTS
+            LIST id: 8, nodes: 2
+              COLUMN id: 9, column_name: id, result_type: UInt64, source_id: 3
+              COLUMN id: 10, column_name: id, result_type: UInt64, source_id: 5
+  ORDER BY
+    LIST id: 11, nodes: 2
+      SORT id: 12, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 13, column_name: id, result_type: UInt64, source_id: 3
+      SORT id: 14, sort_direction: ASCENDING, with_fill: 0
+        EXPRESSION
+          COLUMN id: 15, column_name: id, result_type: UInt64, source_id: 5
 [0,1,2]
 [0,1,2]
 [0,1,2]
diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
index 3573773b76c..5cdc4164d56 100644
--- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
+++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql
@@ -31,6 +31,13 @@ EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a);
 EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a);
 EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key;
 
+DROP TABLE IF EXISTS t1;
+DROP TABLE IF EXISTS t2;
+CREATE TABLE t1 (id UInt64) ENGINE = MergeTree() ORDER BY id;
+CREATE TABLE t2 (id UInt64) ENGINE = MergeTree() ORDER BY id;
+
+EXPLAIN QUERY TREE run_passes=1 SELECT * FROM t1 INNER JOIN t2 ON t1.id = t2.id ORDER BY t1.id, t2.id;
+
 set optimize_redundant_functions_in_order_by = 0;
 
 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x));
@@ -46,4 +53,6 @@ EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL J
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a);
 EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a);
 
+DROP TABLE t1;
+DROP TABLE t2;
 DROP TABLE test;

From 0e743254dfd253cba19c5772c23398cf969640ae Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Wed, 4 Jan 2023 08:19:15 +0100
Subject: [PATCH 092/262] Removed no-parallel tag from test and removed an
 exception from StorageView - 40907 Parameterized views as table functions

---
 src/Storages/StorageView.cpp                           | 2 --
 tests/queries/0_stateless/02428_parameterized_view.sql | 2 --
 2 files changed, 4 deletions(-)

diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index df74def509d..13202d8f782 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -267,8 +267,6 @@ String StorageView::replaceQueryParameterWithValue(const String & column_name, c
                 name.replace(pos, parameter.first.size(), parameter_name);
                 break;
             }
-            else
-                throw Exception("Datatype not found for query parameter " + parameter.first, ErrorCodes::LOGICAL_ERROR);
         }
     }
     return name;
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
index feedaed0c44..fbc1d8b2970 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ b/tests/queries/0_stateless/02428_parameterized_view.sql
@@ -1,5 +1,3 @@
--- Tags: no-parallel
-
 DROP VIEW IF EXISTS test_02428_pv1;
 DROP VIEW IF EXISTS test_02428_pv2;
 DROP VIEW IF EXISTS test_02428_pv3;

From 6f0c0252f03dc504b454c6a36d792ea1f4da2363 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 4 Jan 2023 14:55:32 +0000
Subject: [PATCH 093/262] Fix tests

---
 tests/queries/0_stateless/02500_numbers_inference.reference | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/queries/0_stateless/02500_numbers_inference.reference b/tests/queries/0_stateless/02500_numbers_inference.reference
index bff7211f66a..7e1bb6510bb 100644
--- a/tests/queries/0_stateless/02500_numbers_inference.reference
+++ b/tests/queries/0_stateless/02500_numbers_inference.reference
@@ -16,5 +16,5 @@ c1	Nullable(Float64)
 c1	Nullable(Float64)					
 c1	Array(Nullable(Float64))					
 c1	Array(Nullable(Float64))					
-c1	Array(Nullable(Float64))					
-c1	Array(Nullable(Float64))					
+c1	Nullable(String)					
+c1	Nullable(String)					

From 1f3d75cbf256c493b248dcddfb729fd0e4fb55fc Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Wed, 4 Jan 2023 14:58:17 +0000
Subject: [PATCH 094/262] Better

---
 src/Formats/SchemaInferenceUtils.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp
index 5db9b04a6c1..6d0853f6169 100644
--- a/src/Formats/SchemaInferenceUtils.cpp
+++ b/src/Formats/SchemaInferenceUtils.cpp
@@ -98,14 +98,14 @@ namespace
     void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes)
     {
         bool have_floats = type_indexes.contains(TypeIndex::Float64);
-        bool have_integers = type_indexes.contains(TypeIndex::Int64) ||  type_indexes.contains(TypeIndex::UInt64);
+        bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64);
         if (!have_integers || !have_floats)
             return;
 
         for (auto & type : data_types)
         {
             WhichDataType which(type);
-            if (which.isFloat64() || which.isInt64() || which.isUInt64())
+            if (which.isInt64() || which.isUInt64())
                 type = std::make_shared<DataTypeFloat64>();
         }
 

From 712de132d9a9927e8e93e079e81acccbb441cc6c Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 4 Jan 2023 16:05:05 +0000
Subject: [PATCH 095/262] Fix special build

---
 src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
index 105fc0ef00a..8c9db191bbd 100644
--- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
+++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp
@@ -16,7 +16,7 @@ namespace
 class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor<OptimizeRedundantFunctionsInOrderByVisitor>
 {
 public:
-    bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
+    static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/)
     {
         if (node->as<FunctionNode>())
             return false;

From baf6297f1d9686f7a1fe949ebab405317a1b9722 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 4 Jan 2023 21:50:57 +0000
Subject: [PATCH 096/262] add fast and cancellable shared_mutex alternatives

---
 src/Common/ErrorCodes.cpp            |   1 +
 src/Common/Threading.cpp             | 511 +++++++++++++++++++++++++++
 src/Common/Threading.h               | 282 +++++++++++++++
 src/Common/tests/gtest_threading.cpp | 369 +++++++++++++++++++
 4 files changed, 1163 insertions(+)
 create mode 100644 src/Common/Threading.cpp
 create mode 100644 src/Common/Threading.h
 create mode 100644 src/Common/tests/gtest_threading.cpp

diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 95333eccbcd..dec63d114eb 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -643,6 +643,7 @@
     M(672, INVALID_SCHEDULER_NODE) \
     M(673, RESOURCE_ACCESS_DENIED) \
     M(674, RESOURCE_NOT_FOUND) \
+    M(675, THREAD_WAS_CANCELLED) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
new file mode 100644
index 00000000000..4d135ef93a6
--- /dev/null
+++ b/src/Common/Threading.cpp
@@ -0,0 +1,511 @@
+#include <Common/Threading.h>
+#include <Common/Exception.h>
+
+#ifdef OS_LINUX /// Because of 'sigqueue' functions, RT signals and futex.
+
+#include <base/getThreadId.h>
+
+#include <bit>
+
+#include <linux/futex.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int THREAD_WAS_CANCELLED;
+}
+
+namespace
+{
+    inline long futexWait(void * address, UInt32 value)
+    {
+        return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+    }
+
+    inline long futexWake(void * address, int count)
+    {
+        return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+    }
+
+    // inline void waitFetch(std::atomic<UInt32> & address, UInt32 & value)
+    // {
+    //     futexWait(&address, value);
+    //     value = address.load();
+    // }
+
+    // inline void wakeOne(std::atomic<UInt32> & address)
+    // {
+    //     futexWake(&address, 1);
+    // }
+
+    // inline void wakeAll(std::atomic<UInt32> & address)
+    // {
+    //      futexWake(&address, INT_MAX);
+    // }
+
+    inline constexpr UInt32 lowerValue(UInt64 value)
+    {
+        return UInt32(value & 0xffffffffull);
+    }
+
+    inline constexpr UInt32 upperValue(UInt64 value)
+    {
+        return UInt32(value >> 32ull);
+    }
+
+    inline UInt32 * lowerAddress(void * address)
+    {
+        return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::big);
+    }
+
+    inline UInt32 * upperAddress(void * address)
+    {
+        return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::little);
+    }
+
+    inline void waitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        futexWait(lowerAddress(&address), lowerValue(value));
+        value = address.load();
+    }
+
+    inline bool cancellableWaitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        bool res = CancelToken::local().wait(lowerAddress(&address), lowerValue(value));
+        value = address.load();
+        return res;
+    }
+
+    inline void wakeLowerOne(std::atomic<UInt64> & address)
+    {
+        syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+    }
+
+    // inline void wakeLowerAll(std::atomic<UInt64> & address)
+    // {
+    //     syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
+    // }
+
+    inline void waitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        futexWait(upperAddress(&address), upperValue(value));
+        value = address.load();
+    }
+
+    inline bool cancellableWaitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        bool res = CancelToken::local().wait(upperAddress(&address), upperValue(value));
+        value = address.load();
+        return res;
+    }
+
+    // inline void wakeUpperOne(std::atomic<UInt64> & address)
+    // {
+    //     syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
+    // }
+
+    inline void wakeUpperAll(std::atomic<UInt64> & address)
+    {
+        syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
+    }
+}
+
+CancelToken::Registry::Registry()
+{
+    // setupCancelSignalHandler();
+}
+
+void CancelToken::Registry::insert(CancelToken * token)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    threads[token->thread_id] = token;
+}
+
+void CancelToken::Registry::remove(CancelToken * token)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    threads.erase(token->thread_id);
+}
+
+void CancelToken::Registry::signal(UInt64 tid)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    if (auto it = threads.find(tid); it != threads.end())
+        it->second->signalImpl();
+}
+
+void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    if (auto it = threads.find(tid); it != threads.end())
+        it->second->signalImpl(code, message);
+}
+
+CancelToken::Registry & CancelToken::Registry::instance()
+{
+    static Registry registry;
+    return registry;
+}
+
+CancelToken::CancelToken()
+    : state(disabled)
+    , thread_id(getThreadId())
+{
+    Registry::instance().insert(this);
+}
+
+CancelToken::~CancelToken()
+{
+    Registry::instance().remove(this);
+}
+
+void CancelToken::signal(UInt64 tid)
+{
+    Registry::instance().signal(tid);
+}
+
+void CancelToken::signal(UInt64 tid, int code, const String & message)
+{
+    Registry::instance().signal(tid, code, message);
+}
+
+bool CancelToken::wait(UInt32 * address, UInt32 value)
+{
+    chassert((reinterpret_cast<UInt64>(address) & canceled) == 0); // An `address` must be 2-byte aligned
+    if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread
+    {
+        // static std::atomic<int> x{0};
+        // if (x++ > 5)
+        //     sleep(3600);
+        return true; // Spin-wait unless signal is handled
+    }
+
+    UInt64 s = state.load();
+    while (true)
+    {
+        DBG("s={}", s);
+        if (s & disabled)
+        {
+            // Start non-cancellable wait on futex. Spurious wake-up is possible.
+            futexWait(address, value);
+            return true; // Disabled - true is forced
+        }
+        if (s & canceled)
+            return false; // Has already been canceled
+        if (state.compare_exchange_strong(s, reinterpret_cast<UInt64>(address)))
+            break; // This futex has been "acquired" by this token
+    }
+
+    // Start cancellable wait. Spurious wake-up is possible.
+    DBG("start cancellable wait address={} value={}", static_cast<void*>(address), value);
+    futexWait(address, value);
+
+    // "Release" futex and check for cancellation
+    s = state.load();
+    while (true)
+    {
+        DBG("finish cancellable wait, s={}", s);
+        chassert((s & disabled) != disabled); // `disable()` must not be called from another thread
+        if (s & canceled)
+        {
+            if (s == canceled)
+                break; // Signaled; futex "release" has been done by the signaling thread
+            else
+            {
+                s = state.load();
+                continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish
+            }
+        }
+        if (state.compare_exchange_strong(s, 0))
+            return true; // There was no cancellation; futex "released"
+    }
+
+    // Reset signaled bit
+    reinterpret_cast<std::atomic<UInt32> *>(address)->fetch_and(~signaled);
+    return false;
+}
+
+void CancelToken::raise()
+{
+    std::unique_lock<std::mutex> lock(signal_mutex);
+    DBG("raise code={} msg={}", exception_code, exception_message);
+    if (exception_code != 0)
+        throw DB::Exception(
+            std::exchange(exception_code, 0),
+            std::exchange(exception_message, {}));
+    else
+        throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled");
+}
+
+void CancelToken::notifyOne(UInt32 * address)
+{
+    futexWake(address, 1);
+}
+
+void CancelToken::notifyAll(UInt32 * address)
+{
+    futexWake(address, INT_MAX);
+}
+
+void CancelToken::signalImpl()
+{
+    signalImpl(0, {});
+}
+
+std::mutex CancelToken::signal_mutex;
+
+void CancelToken::signalImpl(int code, const String & message)
+{
+    // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls
+    std::unique_lock<std::mutex> lock(signal_mutex);
+
+    UInt64 s = state.load();
+    while (true)
+    {
+        DBG("s={}", s);
+        if (s & canceled)
+            return; // Already cancelled - don't signal twice
+        if (state.compare_exchange_strong(s, s | canceled))
+            break; // It is the cancelling thread - should deliver signal if necessary
+    }
+
+    DBG("cancel tid={} code={} msg={}", thread_id, code, message);
+    exception_code = code;
+    exception_message = message;
+
+    if ((s & disabled) == disabled)
+        return; // Cancellation is disabled - just signal token for later, but don't wake
+    std::atomic<UInt32> * address = reinterpret_cast<std::atomic<UInt32> *>(s & disabled);
+    DBG("address={}", static_cast<void*>(address));
+    if (address == nullptr)
+        return; // Thread is currently not waiting on futex - wake-up not required
+
+    // Set signaled bit
+    UInt32 value = address->load();
+    while (true)
+    {
+        if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter
+            value = address->load();
+        else if (address->compare_exchange_strong(value, value | signaled))
+            break;
+    }
+
+    // Wake all threads waiting on `address`, one of them will be cancelled and others will get spurious wake-ups
+    // Woken canceled thread will reset signaled bit
+    DBG("wake");
+    futexWake(address, INT_MAX);
+
+    // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return.
+    // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed.
+    state.store(canceled);
+}
+
+Cancellable::Cancellable()
+{
+    CancelToken::local().reset();
+}
+
+Cancellable::~Cancellable()
+{
+    CancelToken::local().disable();
+}
+
+NotCancellable::NotCancellable()
+{
+    CancelToken::local().disable();
+}
+
+NotCancellable::~NotCancellable()
+{
+    CancelToken::local().enable();
+}
+
+CancellableSharedMutex::CancellableSharedMutex()
+    : state(0)
+    , waiters(0)
+{}
+
+void CancellableSharedMutex::lock()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        DBG("#A r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
+        if (value & writers)
+        {
+            waiters++;
+            if (!cancellableWaitUpperFetch(state, value))
+            {
+                waiters--;
+                CancelToken::local().raise();
+            }
+            else
+                waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value | writers))
+            break;
+    }
+
+    value |= writers;
+    while (value & readers)
+    {
+        DBG("#B r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
+        if (!cancellableWaitLowerFetch(state, value))
+        {
+            state.fetch_and(~writers);
+            wakeUpperAll(state);
+            CancelToken::local().raise();
+        }
+    }
+}
+
+bool CancellableSharedMutex::try_lock()
+{
+    UInt64 value = state.load();
+    if ((value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers))
+        return true;
+    return false;
+}
+
+void CancellableSharedMutex::unlock()
+{
+    UInt64 value = state.fetch_and(~writers);
+    DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
+    if (waiters)
+        wakeUpperAll(state);
+}
+
+void CancellableSharedMutex::lock_shared()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
+        if (value & writers)
+        {
+            waiters++;
+            if (!cancellableWaitUpperFetch(state, value))
+            {
+                waiters--;
+                CancelToken::local().raise();
+            }
+            else
+                waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+            break;
+    }
+}
+
+bool CancellableSharedMutex::try_lock_shared()
+{
+    UInt64 value = state.load();
+    if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+        return true;
+    return false;
+}
+
+void CancellableSharedMutex::unlock_shared()
+{
+    UInt64 value = state.fetch_sub(1) - 1;
+    DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
+    if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers
+        wakeLowerOne(state); // Wake writer
+}
+
+FastSharedMutex::FastSharedMutex()
+    : state(0)
+    , waiters(0)
+{}
+
+void FastSharedMutex::lock()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            waitUpperFetch(state, value);
+            waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value | writers))
+            break;
+    }
+
+    value |= writers;
+    while (value & readers)
+        waitLowerFetch(state, value);
+}
+
+bool FastSharedMutex::try_lock()
+{
+    UInt64 value = 0;
+    if (state.compare_exchange_strong(value, writers))
+        return true;
+    return false;
+}
+
+void FastSharedMutex::unlock()
+{
+    state.store(0);
+    if (waiters)
+        wakeUpperAll(state);
+}
+
+void FastSharedMutex::lock_shared()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            waitUpperFetch(state, value);
+            waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value + 1))
+            break;
+    }
+}
+
+bool FastSharedMutex::try_lock_shared()
+{
+    UInt64 value = state.load();
+    if (!(value & writers) && state.compare_exchange_strong(value, value + 1))
+        return true;
+    return false;
+}
+
+void FastSharedMutex::unlock_shared()
+{
+    UInt64 value = state.fetch_sub(1) - 1;
+    if (value == writers)
+        wakeLowerOne(state); // Wake writer
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int THREAD_WAS_CANCELLED;
+}
+
+void CancelToken::raise()
+{
+    throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled");
+}
+
+}
+
+#endif
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
new file mode 100644
index 00000000000..14743def476
--- /dev/null
+++ b/src/Common/Threading.h
@@ -0,0 +1,282 @@
+#pragma once
+
+#include <base/types.h>
+#include <base/defines.h>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <atomic>
+#include <mutex>
+#include <unordered_map>
+
+
+// TODO(serxa): for debug only, remove it
+#if 0
+#include <iostream>
+#include <base/getThreadId.h>
+#define DBG(...) std::cout << fmt::format("\033[01;3{}m[{}] {} {} {}\033[00m {}:{}\n", 1 + getThreadId() % 8, getThreadId(), reinterpret_cast<void*>(this), fmt::format(__VA_ARGS__), __PRETTY_FUNCTION__, __FILE__, __LINE__)
+#else
+#include <base/defines.h>
+#define DBG(...) UNUSED(__VA_ARGS__)
+#endif
+
+namespace DB
+{
+
+// Scoped object, enabling thread cancellation (cannot be nested)
+struct Cancellable
+{
+    Cancellable();
+    ~Cancellable();
+};
+
+// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancellable` region)
+struct NotCancellable
+{
+    NotCancellable();
+    ~NotCancellable();
+};
+
+// Responsible for synchronization needed to deliver thread cancellation signal.
+// Basic building block for cancallable synchronization primitives.
+// Allows to perform cancellable wait on memory addresses (think futex)
+class CancelToken
+{
+public:
+    CancelToken();
+    CancelToken(const CancelToken &) = delete;
+    CancelToken(CancelToken &&) = delete;
+    CancelToken & operator=(const CancelToken &) = delete;
+    ~CancelToken();
+
+    // Returns token for the current thread
+    static CancelToken & local()
+    {
+        static thread_local CancelToken token;
+        return token;
+    }
+
+    // Cancellable wait on memory address (futex word).
+    //   Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()`
+    //   or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`.
+    //   Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`.
+    //   WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero.
+    // Return value:
+    //   true - woken by either notify or spurious wakeup;
+    //   false - iff cancelation signal has been received.
+    // Implementation details:
+    //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal.
+    //   Highest bit of `*address` is used for guarantied delivery of the signal, but is guaranteed to be zero on return due to cancellation.
+    // Intented to be called only by thread associated with this token.
+    bool wait(UInt32 * address, UInt32 value);
+
+    // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false.
+    // Intented to be called only by thread associated with this token.
+    [[noreturn]] void raise();
+
+    // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`.
+    // Can be called from any thread.
+    static void notifyOne(UInt32 * address);
+    static void notifyAll(UInt32 * address);
+
+    // Send cancel signal to thread with specified `tid`.
+    // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled).
+    // Can be called from any thread.
+    static void signal(UInt64 tid);
+    static void signal(UInt64 tid, int code, const String & message);
+
+    // Flag used to deliver cancellation into memory address to wake a thread.
+    // Note that most significat bit at `addresses` to be used with `wait()` is reserved.
+    static constexpr UInt32 signaled = 1u << 31u;
+
+private:
+    friend struct Cancellable;
+    friend struct NotCancellable;
+
+    // Restores initial state for token to be reused. See `Cancellable` struct.
+    // Intented to be called only by thread associated with this token.
+    void reset()
+    {
+        state.store(0);
+    }
+
+    // Enable thread cancellation. See `NotCancellable` struct.
+    // Intented to be called only by thread associated with this token.
+    void enable()
+    {
+        chassert((state.load() & disabled) == disabled);
+        state.fetch_and(~disabled);
+    }
+
+    // Disable thread cancellation. See `NotCancellable` struct.
+    // Intented to be called only by thread associated with this token.
+    void disable()
+    {
+        chassert((state.load() & disabled) == 0);
+        state.fetch_or(disabled);
+    }
+
+    // Singleton. Maps thread IDs to tokens.
+    struct Registry;
+    friend struct Registry;
+    struct Registry
+    {
+        Registry();
+
+        std::mutex mutex;
+        std::unordered_map<UInt64, CancelToken*> threads; // By thread ID
+
+        void insert(CancelToken * token);
+        void remove(CancelToken * token);
+        void signal(UInt64 tid);
+        void signal(UInt64 tid, int code, const String & message);
+
+        static Registry & instance();
+    };
+
+    // Cancels this token and wakes thread if necessary.
+    // Can be called from any thread.
+    void signalImpl();
+    void signalImpl(int code, const String & message);
+
+    // Lower bit: cancel signal received flag
+    static constexpr UInt64 canceled = 1;
+
+    // Upper bits - possible values:
+    // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address;
+    // 2) all ones: token is disabled, i.e. wait() call cannot be cancelled;
+    // 3) specific `address`: token is enabled and thread is currently waiting on this `address`.
+    static constexpr UInt64 disabled = ~canceled;
+    static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address
+
+    // All signal handling logic should be globally serialized using this mutex
+    static std::mutex signal_mutex;
+
+    // Cancellation state
+    alignas(64) std::atomic<UInt64> state;
+    [[maybe_unused]] char padding[64 - sizeof(state)];
+
+    // Cancellation exception
+    int exception_code;
+    String exception_message;
+
+    // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens.
+    const UInt64 thread_id;
+};
+
+class CancellableSharedMutex
+{
+public:
+    CancellableSharedMutex();
+    ~CancellableSharedMutex() = default;
+    CancellableSharedMutex(const CancellableSharedMutex &) = delete;
+    CancellableSharedMutex & operator=(const CancellableSharedMutex &) = delete;
+
+    // Exclusive ownership
+    void lock();
+    bool try_lock();
+    void unlock();
+
+    // Shared ownership
+    void lock_shared();
+    bool try_lock_shared();
+    void unlock_shared();
+
+private:
+    // State 64-bits layout:
+    //    1b    -   31b   -    1b    -   31b
+    // signaled - writers - signaled - readers
+    // 63------------------------------------0
+    // Two 32-bit words are used for cancellable waiting, so each has its own separate signaled bit
+    static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled;
+    static constexpr UInt64 readers_signaled = CancelToken::signaled;
+    static constexpr UInt64 writers = readers << 32ull;
+    static constexpr UInt64 writers_signaled = readers_signaled << 32ull;
+
+    alignas(64) std::atomic<UInt64> state;
+    std::atomic<UInt32> waiters;
+};
+
+class FastSharedMutex
+{
+public:
+    FastSharedMutex();
+    ~FastSharedMutex() = default;
+    FastSharedMutex(const FastSharedMutex &) = delete;
+    FastSharedMutex & operator=(const FastSharedMutex &) = delete;
+
+    // Exclusive ownership
+    void lock();
+    bool try_lock();
+    void unlock();
+
+    // Shared ownership
+    void lock_shared();
+    bool try_lock_shared();
+    void unlock_shared();
+
+private:
+    static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state
+    static constexpr UInt64 writers = ~readers; // Upper 32 bits of state
+
+    alignas(64) std::atomic<UInt64> state;
+    std::atomic<UInt32> waiters;
+};
+
+}
+
+#else
+
+#include <shared_mutex>
+
+// WARNING: We support cancellable synchronization primitives only on linux for now
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int THREAD_WAS_CANCELLED;
+}
+
+struct Cancellable
+{
+    Cancellable() = default;
+    ~Cancellable() = default;
+};
+
+struct NotCancellable
+{
+    NotCancellable() = default;
+    ~NotCancellable() = default;
+};
+
+class CancelToken
+{
+public:
+    CancelToken() = default;
+    CancelToken(const CancelToken &) = delete;
+    CancelToken(CancelToken &&) = delete;
+    CancelToken & operator=(const CancelToken &) = delete;
+    ~CancelToken() = default;
+
+    static CancelToken & local()
+    {
+        static CancelToken token;
+        return token;
+    }
+
+    bool wait(UInt32 *, UInt32) { return true; }
+    [[noreturn]] void raise();
+    static void notifyOne(UInt32 *) {}
+    static void notifyAll(UInt32 *) {}
+    static void signal(UInt64) {}
+    static void signal(UInt64, int, const String &) {}
+};
+
+using CancellableSharedMutex = std::shared_mutex;
+using FastSharedMutex = std::shared_mutex;
+
+}
+
+#endif
diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
new file mode 100644
index 00000000000..d9cb8748eeb
--- /dev/null
+++ b/src/Common/tests/gtest_threading.cpp
@@ -0,0 +1,369 @@
+#include <gtest/gtest.h>
+
+#include <thread>
+#include <condition_variable>
+#include <shared_mutex>
+#include <barrier>
+#include <atomic>
+
+#include "Common/Exception.h"
+#include <Common/Threading.h>
+#include <Common/Stopwatch.h>
+
+#include <base/demangle.h>
+#include <base/getThreadId.h>
+
+
+namespace DB
+{
+    namespace ErrorCodes
+    {
+        extern const int THREAD_WAS_CANCELLED;
+    }
+}
+
+struct NoCancel {};
+
+// for all PerfTests
+static constexpr int requests = 512 * 1024;
+static constexpr int max_threads = 16;
+
+template <class T, class Status = NoCancel>
+void TestSharedMutex()
+{
+    // Test multiple readers can acquire lock
+    for (int readers = 1; readers <= 128; readers *= 2)
+    {
+        T sm;
+        std::atomic<int> test(0);
+        std::barrier sync(readers + 1);
+
+        std::vector<std::thread> threads;
+        threads.reserve(readers);
+        auto reader = [&]
+        {
+            [[maybe_unused]] Status status;
+            std::shared_lock lock(sm);
+            test++;
+            sync.arrive_and_wait();
+        };
+
+        for (int i = 0; i < readers; i++)
+            threads.emplace_back(reader);
+
+        { // writer
+            [[maybe_unused]] Status status;
+            sync.arrive_and_wait(); // wait for all reader to acquire lock to avoid blocking them
+            std::unique_lock lock(sm);
+            test++;
+        }
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(test, readers + 1);
+    }
+
+    // Test multiple writers cannot acquire lock simultaneously
+    for (int writers = 1; writers <= 128; writers *= 2)
+    {
+        T sm;
+        int test = 0;
+        std::barrier sync(writers);
+        std::vector<std::thread> threads;
+
+        threads.reserve(writers);
+        auto writer = [&]
+        {
+            [[maybe_unused]] Status status;
+            sync.arrive_and_wait();
+            std::unique_lock lock(sm);
+            test++;
+        };
+
+        for (int i = 0; i < writers; i++)
+            threads.emplace_back(writer);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(test, writers);
+    }
+}
+
+template <class T, class Status = NoCancel>
+void TestSharedMutexCancelReader()
+{
+    constexpr int readers = 8;
+    constexpr int tasks_per_reader = 32;
+
+    T sm;
+    std::atomic<int> successes(0);
+    std::atomic<int> cancels(0);
+    std::barrier sync(readers + 1);
+    std::barrier cancel_sync(readers / 2 + 1);
+    std::vector<std::thread> threads;
+
+    std::mutex m;
+    std::vector<UInt64> tids_to_cancel;
+
+    threads.reserve(readers);
+    auto reader = [&] (int reader_id)
+    {
+        if (reader_id % 2 == 0)
+        {
+            std::unique_lock lock(m);
+            tids_to_cancel.emplace_back(getThreadId());
+        }
+        for (int task = 0; task < tasks_per_reader; task++) {
+            try
+            {
+                [[maybe_unused]] Status status;
+                sync.arrive_and_wait(); // (A) sync with writer
+                sync.arrive_and_wait(); // (B) wait for writer to acquire unique_lock
+                std::shared_lock lock(sm);
+                successes++;
+            }
+            catch(DB::Exception & e)
+            {
+                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED);
+                ASSERT_EQ(e.message(), "test");
+                cancels++;
+                cancel_sync.arrive_and_wait(); // (C) sync with writer
+            }
+        }
+    };
+
+    for (int reader_id = 0; reader_id < readers; reader_id++)
+        threads.emplace_back(reader, reader_id);
+
+    { // writer
+        [[maybe_unused]] Status status;
+        for (int task = 0; task < tasks_per_reader; task++) {
+            sync.arrive_and_wait(); // (A) wait for readers to finish previous task
+            ASSERT_EQ(cancels + successes, task * readers);
+            ASSERT_EQ(cancels, task * readers / 2);
+            ASSERT_EQ(successes, task * readers / 2);
+            std::unique_lock lock(sm);
+            sync.arrive_and_wait(); // (B) sync with readers
+            //std::unique_lock lock(m); // not needed, already synced using barrier
+            for (UInt64 tid : tids_to_cancel)
+                DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test");
+
+            // This sync is crutial. It is needed to hold `lock` long enough.
+            // It guarantees that every cancelled thread will find `sm` blocked by writer, and thus will begin to wait.
+            // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+            // And this is the desired behaviour.
+            cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock.
+        }
+    }
+
+    for (auto & thread : threads)
+        thread.join();
+
+    ASSERT_EQ(successes, tasks_per_reader * readers / 2);
+    ASSERT_EQ(cancels, tasks_per_reader * readers / 2);
+}
+
+template <class T, class Status = NoCancel>
+void TestSharedMutexCancelWriter()
+{
+    constexpr int writers = 8;
+    constexpr int tasks_per_writer = 32;
+
+    T sm;
+    std::atomic<int> successes(0);
+    std::atomic<int> cancels(0);
+    std::barrier sync(writers);
+    std::vector<std::thread> threads;
+
+    std::mutex m;
+    std::vector<UInt64> all_tids;
+
+    threads.reserve(writers);
+    auto writer = [&]
+    {
+        {
+            std::unique_lock lock(m);
+            all_tids.emplace_back(getThreadId());
+        }
+        for (int task = 0; task < tasks_per_writer; task++) {
+            try
+            {
+                [[maybe_unused]] Status status;
+                sync.arrive_and_wait(); // (A) sync all threads before race to acquire the lock
+                std::unique_lock lock(sm);
+                successes++;
+                // Thread that managed to acquire the lock cancels all other waiting writers
+                //std::unique_lock lock(m); // not needed, already synced using barrier
+                for (UInt64 tid : all_tids)
+                {
+                    if (tid != getThreadId())
+                        DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test");
+                }
+
+                // This sync is crutial. It is needed to hold `lock` long enough.
+                // It guarantees that every cancelled thread will find `sm` blocked, and thus will begin to wait.
+                // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+                // And this is the desired behaviour.
+                sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock.
+            }
+            catch(DB::Exception & e)
+            {
+                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED);
+                ASSERT_EQ(e.message(), "test");
+                cancels++;
+                sync.arrive_and_wait(); // (B) sync with race winner
+            }
+        }
+    };
+
+    for (int writer_id = 0; writer_id < writers; writer_id++)
+        threads.emplace_back(writer);
+
+    for (auto & thread : threads)
+        thread.join();
+
+    ASSERT_EQ(successes, tasks_per_writer);
+    ASSERT_EQ(cancels, tasks_per_writer * (writers - 1));
+}
+
+template <class T, class Status = NoCancel>
+void PerfTestSharedMutexReadersOnly()
+{
+    std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+    for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+    {
+        T sm;
+        std::vector<std::thread> threads;
+        threads.reserve(thrs);
+        auto reader = [&]
+        {
+            [[maybe_unused]] Status status;
+            for (int request = requests / thrs; request; request--)
+            {
+                std::shared_lock lock(sm);
+            }
+        };
+
+        Stopwatch watch;
+        for (int i = 0; i < thrs; i++)
+            threads.emplace_back(reader);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        double ns = watch.elapsedNanoseconds();
+        std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+    }
+}
+
+template <class T, class Status = NoCancel>
+void PerfTestSharedMutexWritersOnly()
+{
+    std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+    for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+    {
+        int counter = 0;
+        T sm;
+        std::vector<std::thread> threads;
+        threads.reserve(thrs);
+        auto writer = [&]
+        {
+            [[maybe_unused]] Status status;
+            for (int request = requests / thrs; request; request--)
+            {
+                std::unique_lock lock(sm);
+                ASSERT_TRUE(counter % 2 == 0);
+                counter++;
+                std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions
+                counter++;
+            }
+        };
+
+        Stopwatch watch;
+        for (int i = 0; i < thrs; i++)
+            threads.emplace_back(writer);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(counter, requests * 2);
+
+        double ns = watch.elapsedNanoseconds();
+        std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+    }
+}
+
+template <class T, class Status = NoCancel>
+void PerfTestSharedMutexRW()
+{
+    std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl;
+
+    for (int thrs = 1; thrs <= max_threads; thrs *= 2)
+    {
+        int counter = 0;
+        T sm;
+        std::vector<std::thread> threads;
+        threads.reserve(thrs);
+        auto reader = [&]
+        {
+            [[maybe_unused]] Status status;
+            for (int request = requests / thrs / 2; request; request--)
+            {
+                {
+                    std::shared_lock lock(sm);
+                    ASSERT_TRUE(counter % 2 == 0);
+                }
+                {
+                    std::unique_lock lock(sm);
+                    ASSERT_TRUE(counter % 2 == 0);
+                    counter++;
+                    std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions
+                    counter++;
+                }
+            }
+        };
+
+        Stopwatch watch;
+        for (int i = 0; i < thrs; i++)
+            threads.emplace_back(reader);
+
+        for (auto & thread : threads)
+            thread.join();
+
+        ASSERT_EQ(counter, requests);
+
+        double ns = watch.elapsedNanoseconds();
+        std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl;
+    }
+}
+
+TEST(Threading, SharedMutexSmokeCancellableEnabled) { TestSharedMutex<DB::CancellableSharedMutex, DB::Cancellable>(); }
+TEST(Threading, SharedMutexSmokeCancellableDisabled) { TestSharedMutex<DB::CancellableSharedMutex>(); }
+TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex<DB::FastSharedMutex>(); }
+TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex<std::shared_mutex>(); }
+
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableEnabled) { PerfTestSharedMutexReadersOnly<DB::CancellableSharedMutex, DB::Cancellable>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableDisabled) { PerfTestSharedMutexReadersOnly<DB::CancellableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly<DB::FastSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly<std::shared_mutex>(); }
+
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableEnabled) { PerfTestSharedMutexWritersOnly<DB::CancellableSharedMutex, DB::Cancellable>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableDisabled) { PerfTestSharedMutexWritersOnly<DB::CancellableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly<DB::FastSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly<std::shared_mutex>(); }
+
+TEST(Threading, PerfTestSharedMutexRWCancellableEnabled) { PerfTestSharedMutexRW<DB::CancellableSharedMutex, DB::Cancellable>(); }
+TEST(Threading, PerfTestSharedMutexRWCancellableDisabled) { PerfTestSharedMutexRW<DB::CancellableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW<DB::FastSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW<std::shared_mutex>(); }
+
+#ifdef OS_LINUX /// These tests require cancellability
+
+TEST(Threading, SharedMutexCancelReaderCancellableEnabled) { TestSharedMutexCancelReader<DB::CancellableSharedMutex, DB::Cancellable>(); }
+TEST(Threading, SharedMutexCancelWriterCancellableEnabled) { TestSharedMutexCancelWriter<DB::CancellableSharedMutex, DB::Cancellable>(); }
+
+#endif

From 3ea04f0429bda92ebce56688caf5f21638a38a1b Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 4 Jan 2023 22:59:29 +0000
Subject: [PATCH 097/262] fix typos

---
 src/Common/Threading.cpp             |  2 +-
 src/Common/Threading.h               | 21 ++++++++-------------
 src/Common/tests/gtest_threading.cpp |  8 ++++----
 3 files changed, 13 insertions(+), 18 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 4d135ef93a6..0b0f347eb38 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -1,7 +1,7 @@
 #include <Common/Threading.h>
 #include <Common/Exception.h>
 
-#ifdef OS_LINUX /// Because of 'sigqueue' functions, RT signals and futex.
+#ifdef OS_LINUX /// Because of futex
 
 #include <base/getThreadId.h>
 
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index 14743def476..4d9e21193d3 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -63,15 +63,15 @@ public:
     //   WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero.
     // Return value:
     //   true - woken by either notify or spurious wakeup;
-    //   false - iff cancelation signal has been received.
+    //   false - iff cancellation signal has been received.
     // Implementation details:
     //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal.
-    //   Highest bit of `*address` is used for guarantied delivery of the signal, but is guaranteed to be zero on return due to cancellation.
-    // Intented to be called only by thread associated with this token.
+    //   Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation.
+    // Intended to be called only by thread associated with this token.
     bool wait(UInt32 * address, UInt32 value);
 
     // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false.
-    // Intented to be called only by thread associated with this token.
+    // Intended to be called only by thread associated with this token.
     [[noreturn]] void raise();
 
     // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`.
@@ -86,7 +86,7 @@ public:
     static void signal(UInt64 tid, int code, const String & message);
 
     // Flag used to deliver cancellation into memory address to wake a thread.
-    // Note that most significat bit at `addresses` to be used with `wait()` is reserved.
+    // Note that most significant bit at `addresses` to be used with `wait()` is reserved.
     static constexpr UInt32 signaled = 1u << 31u;
 
 private:
@@ -94,14 +94,14 @@ private:
     friend struct NotCancellable;
 
     // Restores initial state for token to be reused. See `Cancellable` struct.
-    // Intented to be called only by thread associated with this token.
+    // Intended to be called only by thread associated with this token.
     void reset()
     {
         state.store(0);
     }
 
     // Enable thread cancellation. See `NotCancellable` struct.
-    // Intented to be called only by thread associated with this token.
+    // Intended to be called only by thread associated with this token.
     void enable()
     {
         chassert((state.load() & disabled) == disabled);
@@ -109,7 +109,7 @@ private:
     }
 
     // Disable thread cancellation. See `NotCancellable` struct.
-    // Intented to be called only by thread associated with this token.
+    // Intended to be called only by thread associated with this token.
     void disable()
     {
         chassert((state.load() & disabled) == 0);
@@ -234,11 +234,6 @@ private:
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int THREAD_WAS_CANCELLED;
-}
-
 struct Cancellable
 {
     Cancellable() = default;
diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
index d9cb8748eeb..5ac3cc35448 100644
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@@ -150,7 +150,7 @@ void TestSharedMutexCancelReader()
             for (UInt64 tid : tids_to_cancel)
                 DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test");
 
-            // This sync is crutial. It is needed to hold `lock` long enough.
+            // This sync is crucial. It is needed to hold `lock` long enough.
             // It guarantees that every cancelled thread will find `sm` blocked by writer, and thus will begin to wait.
             // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
             // And this is the desired behaviour.
@@ -202,7 +202,7 @@ void TestSharedMutexCancelWriter()
                         DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test");
                 }
 
-                // This sync is crutial. It is needed to hold `lock` long enough.
+                // This sync is crucial. It is needed to hold `lock` long enough.
                 // It guarantees that every cancelled thread will find `sm` blocked, and thus will begin to wait.
                 // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
                 // And this is the desired behaviour.
@@ -278,7 +278,7 @@ void PerfTestSharedMutexWritersOnly()
                 std::unique_lock lock(sm);
                 ASSERT_TRUE(counter % 2 == 0);
                 counter++;
-                std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions
+                std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions
                 counter++;
             }
         };
@@ -321,7 +321,7 @@ void PerfTestSharedMutexRW()
                     std::unique_lock lock(sm);
                     ASSERT_TRUE(counter % 2 == 0);
                     counter++;
-                    std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions
+                    std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions
                     counter++;
                 }
             }

From e9e3414ae1dababe0fda07ad5086c5d433584e4f Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 4 Jan 2023 23:04:04 +0000
Subject: [PATCH 098/262] remove debug print

---
 src/Common/Threading.cpp | 15 +--------------
 src/Common/Threading.h   | 11 -----------
 2 files changed, 1 insertion(+), 25 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 0b0f347eb38..860e26efc76 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -188,7 +188,6 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
     UInt64 s = state.load();
     while (true)
     {
-        DBG("s={}", s);
         if (s & disabled)
         {
             // Start non-cancellable wait on futex. Spurious wake-up is possible.
@@ -202,14 +201,12 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
     }
 
     // Start cancellable wait. Spurious wake-up is possible.
-    DBG("start cancellable wait address={} value={}", static_cast<void*>(address), value);
     futexWait(address, value);
 
     // "Release" futex and check for cancellation
     s = state.load();
     while (true)
     {
-        DBG("finish cancellable wait, s={}", s);
         chassert((s & disabled) != disabled); // `disable()` must not be called from another thread
         if (s & canceled)
         {
@@ -233,7 +230,6 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
 void CancelToken::raise()
 {
     std::unique_lock<std::mutex> lock(signal_mutex);
-    DBG("raise code={} msg={}", exception_code, exception_message);
     if (exception_code != 0)
         throw DB::Exception(
             std::exchange(exception_code, 0),
@@ -267,21 +263,18 @@ void CancelToken::signalImpl(int code, const String & message)
     UInt64 s = state.load();
     while (true)
     {
-        DBG("s={}", s);
         if (s & canceled)
             return; // Already cancelled - don't signal twice
         if (state.compare_exchange_strong(s, s | canceled))
             break; // It is the cancelling thread - should deliver signal if necessary
     }
 
-    DBG("cancel tid={} code={} msg={}", thread_id, code, message);
     exception_code = code;
     exception_message = message;
 
     if ((s & disabled) == disabled)
         return; // Cancellation is disabled - just signal token for later, but don't wake
     std::atomic<UInt32> * address = reinterpret_cast<std::atomic<UInt32> *>(s & disabled);
-    DBG("address={}", static_cast<void*>(address));
     if (address == nullptr)
         return; // Thread is currently not waiting on futex - wake-up not required
 
@@ -297,7 +290,6 @@ void CancelToken::signalImpl(int code, const String & message)
 
     // Wake all threads waiting on `address`, one of them will be cancelled and others will get spurious wake-ups
     // Woken canceled thread will reset signaled bit
-    DBG("wake");
     futexWake(address, INT_MAX);
 
     // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return.
@@ -335,7 +327,6 @@ void CancellableSharedMutex::lock()
     UInt64 value = state.load();
     while (true)
     {
-        DBG("#A r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
         if (value & writers)
         {
             waiters++;
@@ -354,7 +345,6 @@ void CancellableSharedMutex::lock()
     value |= writers;
     while (value & readers)
     {
-        DBG("#B r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
         if (!cancellableWaitLowerFetch(state, value))
         {
             state.fetch_and(~writers);
@@ -374,8 +364,7 @@ bool CancellableSharedMutex::try_lock()
 
 void CancellableSharedMutex::unlock()
 {
-    UInt64 value = state.fetch_and(~writers);
-    DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
+    state.fetch_and(~writers);
     if (waiters)
         wakeUpperAll(state);
 }
@@ -385,7 +374,6 @@ void CancellableSharedMutex::lock_shared()
     UInt64 value = state.load();
     while (true)
     {
-        DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
         if (value & writers)
         {
             waiters++;
@@ -413,7 +401,6 @@ bool CancellableSharedMutex::try_lock_shared()
 void CancellableSharedMutex::unlock_shared()
 {
     UInt64 value = state.fetch_sub(1) - 1;
-    DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0);
     if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers
         wakeLowerOne(state); // Wake writer
 }
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index 4d9e21193d3..08f0242a1df 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -9,17 +9,6 @@
 #include <mutex>
 #include <unordered_map>
 
-
-// TODO(serxa): for debug only, remove it
-#if 0
-#include <iostream>
-#include <base/getThreadId.h>
-#define DBG(...) std::cout << fmt::format("\033[01;3{}m[{}] {} {} {}\033[00m {}:{}\n", 1 + getThreadId() % 8, getThreadId(), reinterpret_cast<void*>(this), fmt::format(__VA_ARGS__), __PRETTY_FUNCTION__, __FILE__, __LINE__)
-#else
-#include <base/defines.h>
-#define DBG(...) UNUSED(__VA_ARGS__)
-#endif
-
 namespace DB
 {
 

From f706cf8903cc0f93e47763c842f832144805b543 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 4 Jan 2023 23:49:29 +0000
Subject: [PATCH 099/262] fix style

---
 src/Common/Threading.cpp | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 860e26efc76..2a013e6485e 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -1,6 +1,14 @@
 #include <Common/Threading.h>
 #include <Common/Exception.h>
 
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int THREAD_WAS_CANCELLED;
+}
+}
+
 #ifdef OS_LINUX /// Because of futex
 
 #include <base/getThreadId.h>
@@ -15,11 +23,6 @@
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int THREAD_WAS_CANCELLED;
-}
-
 namespace
 {
     inline long futexWait(void * address, UInt32 value)
@@ -483,11 +486,6 @@ void FastSharedMutex::unlock_shared()
 namespace DB
 {
 
-namespace ErrorCodes
-{
-    extern const int THREAD_WAS_CANCELLED;
-}
-
 void CancelToken::raise()
 {
     throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled");

From c4e896f73a0a71ee4c8fb048856a0b9d1ac9011e Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 5 Jan 2023 00:23:10 +0000
Subject: [PATCH 100/262] cleanup

---
 src/Common/Threading.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 2a013e6485e..caf255cab64 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -181,12 +181,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
 {
     chassert((reinterpret_cast<UInt64>(address) & canceled) == 0); // An `address` must be 2-byte aligned
     if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread
-    {
-        // static std::atomic<int> x{0};
-        // if (x++ > 5)
-        //     sleep(3600);
         return true; // Spin-wait unless signal is handled
-    }
 
     UInt64 s = state.load();
     while (true)

From abf63d0c3365ef421bc0e68ad28c108d07dec2bd Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 5 Jan 2023 00:26:28 +0000
Subject: [PATCH 101/262] cleanup

---
 src/Common/Threading.cpp | 5 -----
 src/Common/Threading.h   | 2 --
 2 files changed, 7 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index caf255cab64..7a32af405de 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -118,11 +118,6 @@ namespace
     }
 }
 
-CancelToken::Registry::Registry()
-{
-    // setupCancelSignalHandler();
-}
-
 void CancelToken::Registry::insert(CancelToken * token)
 {
     std::lock_guard<std::mutex> lock(mutex);
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index 08f0242a1df..1398e5b1e0e 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -110,8 +110,6 @@ private:
     friend struct Registry;
     struct Registry
     {
-        Registry();
-
         std::mutex mutex;
         std::unordered_map<UInt64, CancelToken*> threads; // By thread ID
 

From c507d7ecef6840a84e32888528f315c69f230d84 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 5 Jan 2023 15:27:21 +0100
Subject: [PATCH 102/262] Update src/Common/tests/gtest_threading.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Common/tests/gtest_threading.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
index 5ac3cc35448..fd84e4f0633 100644
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@@ -208,7 +208,7 @@ void TestSharedMutexCancelWriter()
                 // And this is the desired behaviour.
                 sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock.
             }
-            catch(DB::Exception & e)
+            catch (DB::Exception & e)
             {
                 ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED);
                 ASSERT_EQ(e.message(), "test");

From 985dff1dbfe2db124f3919ec6143fc8b312b33a6 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 5 Jan 2023 15:27:32 +0100
Subject: [PATCH 103/262] Update src/Common/tests/gtest_threading.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Common/tests/gtest_threading.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
index fd84e4f0633..62a9085cdbe 100644
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@@ -124,7 +124,7 @@ void TestSharedMutexCancelReader()
                 std::shared_lock lock(sm);
                 successes++;
             }
-            catch(DB::Exception & e)
+            catch (DB::Exception & e)
             {
                 ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED);
                 ASSERT_EQ(e.message(), "test");

From 3f87e6cd776ad2c99d6c4733d98cdfdc1049ad0f Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Thu, 5 Jan 2023 15:27:51 +0100
Subject: [PATCH 104/262] Update src/Common/Threading.h

Co-authored-by: Igor Nikonov <954088+devcrafter@users.noreply.github.com>
---
 src/Common/Threading.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index 1398e5b1e0e..4b6a372ce2d 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -27,7 +27,7 @@ struct NotCancellable
 };
 
 // Responsible for synchronization needed to deliver thread cancellation signal.
-// Basic building block for cancallable synchronization primitives.
+// Basic building block for cancellable synchronization primitives.
 // Allows to perform cancellable wait on memory addresses (think futex)
 class CancelToken
 {

From d217136edea8910dcee40cda2535fa8928506154 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 5 Jan 2023 20:41:36 +0000
Subject: [PATCH 105/262] review fixes

---
 src/Common/Threading.cpp             |  8 ++++----
 src/Common/Threading.h               | 18 +++++++++---------
 src/Common/tests/gtest_threading.cpp | 10 +++++-----
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 7a32af405de..0e073162bb8 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -53,12 +53,12 @@ namespace
 
     inline constexpr UInt32 lowerValue(UInt64 value)
     {
-        return UInt32(value & 0xffffffffull);
+        return static_cast<UInt32>(value & 0xffffffffull);
     }
 
     inline constexpr UInt32 upperValue(UInt64 value)
     {
-        return UInt32(value >> 32ull);
+        return static_cast<UInt32>(value >> 32ull);
     }
 
     inline UInt32 * lowerAddress(void * address)
@@ -300,12 +300,12 @@ Cancellable::~Cancellable()
     CancelToken::local().disable();
 }
 
-NotCancellable::NotCancellable()
+NonCancellable::NonCancellable()
 {
     CancelToken::local().disable();
 }
 
-NotCancellable::~NotCancellable()
+NonCancellable::~NonCancellable()
 {
     CancelToken::local().enable();
 }
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index 4b6a372ce2d..e21fc5608b2 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -20,10 +20,10 @@ struct Cancellable
 };
 
 // Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancellable` region)
-struct NotCancellable
+struct NonCancellable
 {
-    NotCancellable();
-    ~NotCancellable();
+    NonCancellable();
+    ~NonCancellable();
 };
 
 // Responsible for synchronization needed to deliver thread cancellation signal.
@@ -80,7 +80,7 @@ public:
 
 private:
     friend struct Cancellable;
-    friend struct NotCancellable;
+    friend struct NonCancellable;
 
     // Restores initial state for token to be reused. See `Cancellable` struct.
     // Intended to be called only by thread associated with this token.
@@ -89,7 +89,7 @@ private:
         state.store(0);
     }
 
-    // Enable thread cancellation. See `NotCancellable` struct.
+    // Enable thread cancellation. See `NonCancellable` struct.
     // Intended to be called only by thread associated with this token.
     void enable()
     {
@@ -97,7 +97,7 @@ private:
         state.fetch_and(~disabled);
     }
 
-    // Disable thread cancellation. See `NotCancellable` struct.
+    // Disable thread cancellation. See `NonCancellable` struct.
     // Intended to be called only by thread associated with this token.
     void disable()
     {
@@ -227,10 +227,10 @@ struct Cancellable
     ~Cancellable() = default;
 };
 
-struct NotCancellable
+struct NonCancellable
 {
-    NotCancellable() = default;
-    ~NotCancellable() = default;
+    NonCancellable() = default;
+    ~NonCancellable() = default;
 };
 
 class CancelToken
diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
index 62a9085cdbe..767739deb46 100644
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@@ -44,8 +44,8 @@ void TestSharedMutex()
         {
             [[maybe_unused]] Status status;
             std::shared_lock lock(sm);
-            test++;
             sync.arrive_and_wait();
+            test++;
         };
 
         for (int i = 0; i < readers; i++)
@@ -94,8 +94,8 @@ void TestSharedMutex()
 template <class T, class Status = NoCancel>
 void TestSharedMutexCancelReader()
 {
-    constexpr int readers = 8;
-    constexpr int tasks_per_reader = 32;
+    static constexpr int readers = 8;
+    static constexpr int tasks_per_reader = 32;
 
     T sm;
     std::atomic<int> successes(0);
@@ -168,8 +168,8 @@ void TestSharedMutexCancelReader()
 template <class T, class Status = NoCancel>
 void TestSharedMutexCancelWriter()
 {
-    constexpr int writers = 8;
-    constexpr int tasks_per_writer = 32;
+    static constexpr int writers = 8;
+    static constexpr int tasks_per_writer = 32;
 
     T sm;
     std::atomic<int> successes(0);

From 5cde7762ad0574c2c01d8f0e988c3f2f63ba27a0 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Thu, 5 Jan 2023 15:54:44 -0500
Subject: [PATCH 106/262] WIP

---
 .../table-engines/integrations/deltalake.md   |  33 ++++
 .../table-functions/deltalake.md              | 184 ++++++++++++++++++
 2 files changed, 217 insertions(+)
 create mode 100644 docs/en/engines/table-engines/integrations/deltalake.md
 create mode 100644 docs/en/sql-reference/table-functions/deltalake.md

diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md
new file mode 100644
index 00000000000..44407e34e38
--- /dev/null
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@@ -0,0 +1,33 @@
+---
+slug: /en/engines/table-engines/integrations/deltalake
+sidebar_label: DeltaLake
+---
+
+# DeltaLake Table Engine
+
+This engine provides a read-only integration with existing Delta Lake tables in Amazon S3.
+
+## Create Table
+
+Note that the Delta Lake table must already exist in S3, this command does not take DDL parameters to create a new table.
+
+``` sql
+CREATE TABLE deltalake
+    ENGINE = DeltaLake(path, [aws_access_key_id, aws_secret_access_key,])
+```
+
+**Engine parameters**
+
+-   `path` — Bucket url with path to the existing Delta Lake table.
+-   `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
+
+**Example**
+
+```sql
+CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
+```
+
+## See also
+
+-  [DeltaLake table function](../../../sql-reference/table-functions/deltalake.md)
+
diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md
new file mode 100644
index 00000000000..7e3fffe4d8b
--- /dev/null
+++ b/docs/en/sql-reference/table-functions/deltalake.md
@@ -0,0 +1,184 @@
+---
+slug: /en/sql-reference/table-functions/deltalake
+sidebar_label: DeltLake
+---
+
+# DeltaLake Table Function
+
+Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in [Amazon S3](https://aws.amazon.com/s3/).
+
+For example, to query an existing Delta Lake table named `deltalake` in S3:
+```sql
+CREATE TABLE dl_hits
+    ENGINE = DeltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/','','');
+
+SHOW TABLES;
+
+DESCRIBE dl_hits;
+
+SELECT URL, Referer, UserAgent FROM dl_hits WHERE URL IS NOT NULL LIMIT 10;
+
+SELECT URL, Referer, UserAgent FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/') WHERE URL IS NOT NULL LIMIT 10;
+
+```
+
+**Syntax**
+
+``` sql
+s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+```
+
+**Arguments**
+
+-   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
+-   `format` — The [format](../../interfaces/formats.md#formats) of the file.
+-   `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+-   `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
+
+**Returned value**
+
+A table with the specified structure for reading or writing data in the specified file.
+
+**Examples**
+
+Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`:
+
+``` sql
+SELECT *
+FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
+LIMIT 2;
+```
+
+``` text
+┌─column1─┬─column2─┬─column3─┐
+│       1 │       2 │       3 │
+│       3 │       2 │       1 │
+└─────────┴─────────┴─────────┘
+```
+
+The similar but from file with `gzip` compression:
+
+``` sql
+SELECT *
+FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
+LIMIT 2;
+```
+
+``` text
+┌─column1─┬─column2─┬─column3─┐
+│       1 │       2 │       3 │
+│       3 │       2 │       1 │
+└─────────┴─────────┴─────────┘
+```
+
+## Usage
+
+Suppose that we have several files with following URIs on S3:
+
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv'
+-   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv'
+
+Count the amount of rows in files ending with numbers from 1 to 3:
+
+``` sql
+SELECT count(*)
+FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
+```
+
+``` text
+┌─count()─┐
+│      18 │
+└─────────┘
+```
+
+Count the total amount of rows in all files in these two directories:
+
+``` sql
+SELECT count(*)
+FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
+```
+
+``` text
+┌─count()─┐
+│      24 │
+└─────────┘
+```
+
+:::warning
+If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
+:::
+
+Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
+
+``` sql
+SELECT count(*)
+FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
+```
+
+``` text
+┌─count()─┐
+│      12 │
+└─────────┘
+```
+
+Insert data into file `test-data.csv.gz`:
+
+``` sql
+INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+VALUES ('test-data', 1), ('test-data-2', 2);
+```
+
+Insert data into file `test-data.csv.gz` from existing table:
+
+``` sql
+INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
+SELECT name, value FROM existing_table;
+```
+
+Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
+
+``` sql
+SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
+```
+
+The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
+
+``` sql
+SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
+```
+
+## Partitioned Write
+
+If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
+
+**Examples**
+
+1. Using partition ID in a key creates separate files:
+
+```sql
+INSERT INTO TABLE FUNCTION
+    s3('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32')
+    PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
+```
+As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`.
+
+2. Using partition ID in a bucket name creates files in different buckets:
+
+```sql
+INSERT INTO TABLE FUNCTION
+    s3('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
+    PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
+```
+As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`.
+
+**See Also**
+
+-   [S3 engine](../../engines/table-engines/integrations/s3.md)
+
+[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/s3/) <!--hide-->

From a67afdff6a1ef47fcb7c70ffcb83f34dedfc4f46 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Thu, 5 Jan 2023 22:43:41 +0000
Subject: [PATCH 107/262] Fix: insert delay calculation

---
 src/Storages/MergeTree/MergeTreeData.cpp | 103 ++++++++++++++---------
 src/Storages/MergeTree/MergeTreeData.h   |   4 +-
 2 files changed, 65 insertions(+), 42 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 30d0570ff11..89d90011398 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3701,7 +3701,7 @@ std::pair<size_t, size_t> MergeTreeData::getMaxPartsCountAndSizeForPartition() c
 }
 
 
-size_t MergeTreeData::getMaxInactivePartsCountForPartition() const
+size_t MergeTreeData::getMaxOutdatedPartsCountForPartition() const
 {
     return getMaxPartsCountAndSizeForPartitionWithState(DataPartState::Outdated).first;
 }
@@ -3722,70 +3722,93 @@ std::optional<Int64> MergeTreeData::getMinPartDataVersion() const
 }
 
 
-void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr query_context) const
+void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const
 {
     const auto settings = getSettings();
     const auto & query_settings = query_context->getSettingsRef();
     const size_t parts_count_in_total = getPartsCount();
+
+    /// check if have too many parts in total
     if (parts_count_in_total >= settings->max_parts_in_total)
     {
         ProfileEvents::increment(ProfileEvents::RejectedInserts);
-        throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS);
+        throw Exception(
+            ErrorCodes::TOO_MANY_PARTS,
+            "Too many parts ({}) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified "
+            "with 'max_parts_in_total' setting in <merge_tree> element in config.xml or with per-table setting.",
+            toString(parts_count_in_total));
     }
 
-    auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition();
-    ssize_t k_inactive = -1;
-    if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
+    size_t outdated_parts_over_threshold = [&]() -> size_t
     {
-        size_t inactive_parts_count_in_partition = getMaxInactivePartsCountForPartition();
-        if (settings->inactive_parts_to_throw_insert > 0 && inactive_parts_count_in_partition >= settings->inactive_parts_to_throw_insert)
+        size_t outdated_parts_count_in_partition = 0;
+        if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
+            outdated_parts_count_in_partition = getMaxOutdatedPartsCountForPartition();
+
+        if (settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert)
         {
             ProfileEvents::increment(ProfileEvents::RejectedInserts);
             throw Exception(
                 ErrorCodes::TOO_MANY_PARTS,
                 "Too many inactive parts ({}). Parts cleaning are processing significantly slower than inserts",
-                inactive_parts_count_in_partition);
+                outdated_parts_count_in_partition);
         }
-        k_inactive = static_cast<ssize_t>(inactive_parts_count_in_partition) - static_cast<ssize_t>(settings->inactive_parts_to_delay_insert);
-    }
+        if (settings->inactive_parts_to_delay_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_delay_insert)
+            return outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1;
 
-    auto parts_to_delay_insert = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert;
-    auto parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert;
+        return 0;
+    }();
 
+    auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition();
     size_t average_part_size = parts_count_in_partition ? size_of_partition / parts_count_in_partition : 0;
-    bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts
-        && average_part_size > settings->max_avg_part_size_for_too_many_parts;
-
-    if (parts_count_in_partition >= parts_to_throw_insert && !parts_are_large_enough_in_average)
+    const auto active_parts_to_delay_insert
+        = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert;
+    const auto active_parts_to_throw_insert
+        = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert;
+    size_t active_parts_over_threshold = [&](size_t parts_count) -> size_t
     {
-        ProfileEvents::increment(ProfileEvents::RejectedInserts);
-        throw Exception(
-            ErrorCodes::TOO_MANY_PARTS,
-            "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts",
-            parts_count_in_partition, ReadableSize(average_part_size));
-    }
+        bool parts_are_large_enough_in_average
+            = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts;
 
-    if (k_inactive < 0 && (parts_count_in_partition < parts_to_delay_insert || parts_are_large_enough_in_average))
+        if (parts_count >= active_parts_to_throw_insert && !parts_are_large_enough_in_average)
+        {
+            ProfileEvents::increment(ProfileEvents::RejectedInserts);
+            throw Exception(
+                ErrorCodes::TOO_MANY_PARTS,
+                "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts",
+                parts_count,
+                ReadableSize(average_part_size));
+        }
+        if (active_parts_to_delay_insert > 0 && parts_count >= active_parts_to_delay_insert && !parts_are_large_enough_in_average)
+            /// if parts_count == parts_to_delay_insert -> we're 1 part over threshold
+            return parts_count - active_parts_to_delay_insert + 1;
+
+        return 0;
+    }(parts_count_in_partition);
+
+    /// no need for delay
+    if (!active_parts_over_threshold && !outdated_parts_over_threshold)
         return;
 
-    const ssize_t k_active = ssize_t(parts_count_in_partition) - ssize_t(parts_to_delay_insert);
-    size_t max_k;
-    size_t k;
-    if (k_active > k_inactive)
+    const UInt64 delay_milliseconds = [&]() -> UInt64
     {
-        max_k = parts_to_throw_insert - parts_to_delay_insert;
-        k = k_active + 1;
-    }
-    else
-    {
-        max_k = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert;
-        k = k_inactive + 1;
-    }
+        size_t parts_over_threshold = std::max(active_parts_over_threshold, outdated_parts_over_threshold);
+        size_t allowed_parts_over_threshold = 1;
+        if (active_parts_over_threshold >= outdated_parts_over_threshold)
+            allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert;
+        else
+            allowed_parts_over_threshold
+                = (settings->inactive_parts_to_throw_insert > 0
+                       ? settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert
+                       : outdated_parts_over_threshold);
 
-    const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000);
-    /// min() as a save guard here
-    const UInt64 delay_milliseconds
-        = std::min(max_delay_milliseconds, static_cast<UInt64>(::pow(max_delay_milliseconds, static_cast<double>(k) / max_k)));
+        chassert(parts_over_threshold <= allowed_parts_over_threshold);
+
+        const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000);
+        double delay_factor = static_cast<double>(parts_over_threshold) / allowed_parts_over_threshold;
+        /// min() as a save guard here
+        return std::min(max_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor));
+    }();
 
     ProfileEvents::increment(ProfileEvents::DelayedInserts);
     ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds);
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 670c755cf72..f846ba5e184 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -533,7 +533,7 @@ public:
     std::pair<size_t, size_t> getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const;
     std::pair<size_t, size_t> getMaxPartsCountAndSizeForPartition() const;
 
-    size_t getMaxInactivePartsCountForPartition() const;
+    size_t getMaxOutdatedPartsCountForPartition() const;
 
     /// Get min value of part->info.getDataVersion() for all active parts.
     /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition.
@@ -553,7 +553,7 @@ public:
 
     /// If the table contains too many active parts, sleep for a while to give them time to merge.
     /// If until is non-null, wake up from the sleep earlier if the event happened.
-    void delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr query_context) const;
+    void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const;
 
     /// Renames temporary part to a permanent part and adds it to the parts set.
     /// It is assumed that the part does not intersect with existing parts.

From 00b2c96ce8bb27fec49589ea86ab4255580557b5 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Fri, 6 Jan 2023 22:48:19 +0000
Subject: [PATCH 108/262] fix special builds

---
 src/Common/Threading.cpp | 5 ++---
 src/Common/Threading.h   | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 0e073162bb8..cc7d119fa8c 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -1,5 +1,4 @@
 #include <Common/Threading.h>
-#include <Common/Exception.h>
 
 namespace DB
 {
@@ -25,12 +24,12 @@ namespace DB
 
 namespace
 {
-    inline long futexWait(void * address, UInt32 value)
+    inline Int64 futexWait(void * address, UInt32 value)
     {
         return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
     }
 
-    inline long futexWake(void * address, int count)
+    inline Int64 futexWake(void * address, int count)
     {
         return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
     }
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index e21fc5608b2..7a656b05ff1 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -3,6 +3,8 @@
 #include <base/types.h>
 #include <base/defines.h>
 
+#include <Common/Exception.h>
+
 #ifdef OS_LINUX /// Because of futex
 
 #include <atomic>

From 705c8f01affcdaff9a1abef8e3c955dae1eb0881 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Sat, 7 Jan 2023 00:31:53 +0000
Subject: [PATCH 109/262] fix tests

---
 src/Common/Threading.cpp | 13 +++++++------
 src/Common/Threading.h   |  6 +++++-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index cc7d119fa8c..714f45d8c90 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -143,32 +143,33 @@ void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
         it->second->signalImpl(code, message);
 }
 
-CancelToken::Registry & CancelToken::Registry::instance()
+const std::shared_ptr<CancelToken::Registry> & CancelToken::Registry::instance()
 {
-    static Registry registry;
+    static std::shared_ptr<Registry> registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry
     return registry;
 }
 
 CancelToken::CancelToken()
     : state(disabled)
     , thread_id(getThreadId())
+    , registry(Registry::instance())
 {
-    Registry::instance().insert(this);
+    registry->insert(this);
 }
 
 CancelToken::~CancelToken()
 {
-    Registry::instance().remove(this);
+    registry->remove(this);
 }
 
 void CancelToken::signal(UInt64 tid)
 {
-    Registry::instance().signal(tid);
+    Registry::instance()->signal(tid);
 }
 
 void CancelToken::signal(UInt64 tid, int code, const String & message)
 {
-    Registry::instance().signal(tid, code, message);
+    Registry::instance()->signal(tid, code, message);
 }
 
 bool CancelToken::wait(UInt32 * address, UInt32 value)
diff --git a/src/Common/Threading.h b/src/Common/Threading.h
index 7a656b05ff1..d5d32e73b67 100644
--- a/src/Common/Threading.h
+++ b/src/Common/Threading.h
@@ -10,6 +10,7 @@
 #include <atomic>
 #include <mutex>
 #include <unordered_map>
+#include <memory>
 
 namespace DB
 {
@@ -120,7 +121,7 @@ private:
         void signal(UInt64 tid);
         void signal(UInt64 tid, int code, const String & message);
 
-        static Registry & instance();
+        static const std::shared_ptr<Registry> & instance();
     };
 
     // Cancels this token and wakes thread if necessary.
@@ -151,6 +152,9 @@ private:
 
     // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens.
     const UInt64 thread_id;
+
+    // To avoid `Registry` destruction before last `Token` destruction
+    const std::shared_ptr<Registry> registry;
 };
 
 class CancellableSharedMutex

From bb71ec7f0e50ad15b18ad47f7d9cab9a6510fccf Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 7 Jan 2023 02:47:32 +0100
Subject: [PATCH 110/262] Supposedly fix the "Download script failed" error

---
 tests/ci/get_previous_release_tag.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py
index b9ad51379d2..373d1656a18 100755
--- a/tests/ci/get_previous_release_tag.py
+++ b/tests/ci/get_previous_release_tag.py
@@ -6,6 +6,7 @@ import logging
 import requests  # type: ignore
 
 CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
+CLICKHOUSE_PACKAGE_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
 VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
 
 
@@ -42,7 +43,29 @@ def find_previous_release(server_version, releases):
 
     for release in releases:
         if release.version < server_version:
-            return True, release
+
+            # Check if the artifact exists on GitHub.
+            # It can be not true for a short period of time
+            # after creating a tag for a new release before uploading the packages.
+            if (
+                requests.head(
+                    CLICKHOUSE_PACKAGE_URL.format(
+                        version=release.version, type=release.type
+                    ),
+                    total=10,
+                    read=10,
+                    connect=10,
+                    backoff_factor=0.3,
+                ).status_code
+                != 404
+            ):
+                return True, release
+            else:
+                print(
+                    "The tag {version}-{type} exists bug the package is not yet available on GitHub".format(
+                        version=release.version, type=release.type
+                    )
+                )
 
     return False, None
 

From 8ca1740c37d7b4f19cc00ec1ee108d5ba9c51bad Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Sat, 7 Jan 2023 04:48:42 +0300
Subject: [PATCH 111/262] Update get_previous_release_tag.py

---
 tests/ci/get_previous_release_tag.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py
index 373d1656a18..6551ba80ecd 100755
--- a/tests/ci/get_previous_release_tag.py
+++ b/tests/ci/get_previous_release_tag.py
@@ -62,7 +62,7 @@ def find_previous_release(server_version, releases):
                 return True, release
             else:
                 print(
-                    "The tag {version}-{type} exists bug the package is not yet available on GitHub".format(
+                    "The tag {version}-{type} exists but the package is not yet available on GitHub".format(
                         version=release.version, type=release.type
                     )
                 )

From a4470dd1b27bf5b1f5e251434bb017d39b12eb27 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Mon, 9 Jan 2023 09:01:44 +0100
Subject: [PATCH 112/262] Update src/Common/Threading.cpp

Co-authored-by: Antonio Andelic <antonio2368@users.noreply.github.com>
---
 src/Common/Threading.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
index 714f45d8c90..ae32a1a1052 100644
--- a/src/Common/Threading.cpp
+++ b/src/Common/Threading.cpp
@@ -350,9 +350,7 @@ void CancellableSharedMutex::lock()
 bool CancellableSharedMutex::try_lock()
 {
     UInt64 value = state.load();
-    if ((value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers))
-        return true;
-    return false;
+    return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers);
 }
 
 void CancellableSharedMutex::unlock()

From 20c7c0b1eff01457fcc9ad4933c64f40c84341f6 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 9 Jan 2023 18:21:31 +0800
Subject: [PATCH 113/262] change as request

---
 .../functions/date-time-functions.md          | 11 ++++++--
 src/Common/DateLUTImpl.h                      | 26 +++++++++++++++++++
 src/Functions/DateTimeTransforms.h            | 20 ++++++++------
 src/Functions/toDayOfWeek.cpp                 |  5 ++--
 .../02521_to_custom_day_of_week.reference     |  7 +++++
 .../02521_to_custom_day_of_week.sql           | 10 +++++++
 6 files changed, 67 insertions(+), 12 deletions(-)
 create mode 100644 tests/queries/0_stateless/02521_to_custom_day_of_week.reference
 create mode 100644 tests/queries/0_stateless/02521_to_custom_day_of_week.sql

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 89fa72de8bf..4ff89414e0a 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -207,9 +207,16 @@ Converts a date or date with time to a UInt8 number containing the number of the
 
 Aliases: `DAYOFMONTH`, `DAY`.
 
-## toDayOfWeek
+## toDayOfWeek(date\[,mode\])
 
-Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
+Converts a date or date with time to a UInt8 number containing the number of the day of the week. The two-argument form of toDayOfWeek() enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or from 1-7. If the mode argument is ommited, the default mode is 0.
+
+| Mode | First day of week | Range                                          |
+|------|-------------------|------------------------------------------------|
+| 0    | Monday            | 1-7, Monday = 1, Tuesday = 2, ..., Sunday = 7  |
+| 1    | Monday            | 0-6, Monday = 0, Tuesday = 1, ..., Sunday = 6  |
+| 2    | Sunday            | 0-6, Sunday = 0, Monday = 1, ..., Saturday = 6 |
+| 3    | Sunday            | 1-7, Sunday = 1, Monday = 2, ..., Saturday = 7 |
 
 Alias: `DAYOFWEEK`.
 
diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h
index 84f063f9555..6bf530008dc 100644
--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@@ -622,6 +622,25 @@ public:
     template <typename DateOrTime>
     inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
 
+    template <typename DateOrTime>
+    inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
+    {
+        /// 0: Sun = 7, Mon = 1
+        /// 1: Sun = 6, Mon = 0
+        /// 2: Sun = 0, Mon = 1
+        /// 3: Sun = 1, Mon = 2
+        week_day_mode = check_week_day_mode(week_day_mode);
+        auto res = toDayOfWeek(v);
+
+        bool start_from_sunday = week_day_mode & (1 << 1);
+        bool zero_based = (week_day_mode == 1 || week_day_mode == 2);
+        if (start_from_sunday)
+            res = res % 7 + 1;
+        if (zero_based)
+            --res;
+        return res;
+    }
+
     template <typename DateOrTime>
     inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
 
@@ -844,6 +863,13 @@ public:
         return week_format;
     }
 
+    /// Check and change mode to effective.
+    inline UInt8 check_week_day_mode(UInt8 mode) const /// NOLINT
+    {
+        return mode & 3;
+    }
+
+
     /** Calculate weekday from d.
       * Returns 0 for monday, 1 for tuesday...
       */
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index f4163a336ef..56e4a0e2668 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -786,21 +786,25 @@ struct ToDayOfWeekImpl
 {
     static constexpr auto name = "toDayOfWeek";
 
-    static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); }
+    static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); }
+    static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(ExtendedDayNum(d)); }
+    static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(DayNum(d)); }
+    static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(t);
+        return time_zone.toDayOfWeek(t, week_day_mode);
     }
-    static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(UInt32 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(t);
+        return time_zone.toDayOfWeek(t, week_day_mode);
     }
-    static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(Int32 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(ExtendedDayNum(d));
+        return time_zone.toDayOfWeek(ExtendedDayNum(d), week_day_mode);
     }
-    static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(UInt16 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(DayNum(d));
+        return time_zone.toDayOfWeek(DayNum(d), week_day_mode);
     }
 
     using FactorTransform = ToMondayImpl;
diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp
index 354d4dea894..09271cbe55d 100644
--- a/src/Functions/toDayOfWeek.cpp
+++ b/src/Functions/toDayOfWeek.cpp
@@ -1,13 +1,14 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/DateTimeTransforms.h>
-#include <Functions/FunctionDateOrDateTimeToSomething.h>
 #include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionCustomWeekToSomething.h>
+
 
 
 namespace DB
 {
 
-using FunctionToDayOfWeek = FunctionDateOrDateTimeToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
+using FunctionToDayOfWeek = FunctionCustomWeekToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
 
 REGISTER_FUNCTION(ToDayOfWeek)
 {
diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.reference b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference
new file mode 100644
index 00000000000..660dff37b72
--- /dev/null
+++ b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference
@@ -0,0 +1,7 @@
+1	7
+1	7
+0	6
+1	0
+2	1
+1	7
+0	6
diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.sql b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql
new file mode 100644
index 00000000000..5475e15a984
--- /dev/null
+++ b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql
@@ -0,0 +1,10 @@
+
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon), toDayOfWeek(date_sun);
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 0), toDayOfWeek(date_sun, 0);
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 1), toDayOfWeek(date_sun, 1);
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 2), toDayOfWeek(date_sun, 2);
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 3), toDayOfWeek(date_sun, 3);
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 4), toDayOfWeek(date_sun, 4);
+with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 5), toDayOfWeek(date_sun, 5);
+
+select toDayOfWeek(today(), -1); -- { serverError 43 }

From 10aa2207b312c3a713b611693730cd7b2fa32bca Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 3 Jan 2023 12:55:12 +0100
Subject: [PATCH 114/262] Add typing to stopwatch.py

---
 tests/ci/stopwatch.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/ci/stopwatch.py b/tests/ci/stopwatch.py
index db174550c03..1ab6737530c 100644
--- a/tests/ci/stopwatch.py
+++ b/tests/ci/stopwatch.py
@@ -9,9 +9,9 @@ class Stopwatch:
         self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S")
 
     @property
-    def duration_seconds(self):
+    def duration_seconds(self) -> float:
         return (datetime.datetime.utcnow() - self.start_time).total_seconds()
 
     @property
-    def start_time_str(self):
+    def start_time_str(self) -> str:
         return self.start_time_str_value

From db96f9e3db4309fb51bad74986fcca144c8c7d17 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 3 Jan 2023 15:23:19 +0100
Subject: [PATCH 115/262] Rework test_results list/tuple/whatever into class

---
 tests/ci/ast_fuzzer_check.py       |  20 ++--
 tests/ci/bugfix_validate_check.py  |  23 ++--
 tests/ci/clickhouse_helper.py      |  42 ++++----
 tests/ci/codebrowser_check.py      |   9 +-
 tests/ci/compatibility_check.py    |  67 +++++++-----
 tests/ci/docker_images_check.py    |  90 ++++++++--------
 tests/ci/docker_manifests_merge.py |   5 +-
 tests/ci/docker_server.py          |  17 +--
 tests/ci/docker_test.py            |  40 ++++---
 tests/ci/docs_check.py             |  41 ++++---
 tests/ci/docs_release.py           |  33 +++---
 tests/ci/fast_test_check.py        |  44 ++++----
 tests/ci/functional_test_check.py  |  45 ++++----
 tests/ci/integration_test_check.py |  47 ++++----
 tests/ci/jepsen_check.py           |  31 +++---
 tests/ci/report.py                 | 168 ++++++++++++++++++++---------
 tests/ci/sqlancer_check.py         |  37 +++----
 tests/ci/stress_check.py           |  35 +++---
 tests/ci/style_check.py            |  20 ++--
 tests/ci/unit_tests_check.py       |  38 ++++---
 tests/ci/upload_result_helper.py   |  55 +++++-----
 21 files changed, 512 insertions(+), 395 deletions(-)

diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py
index 096edeed149..2a7dc0ad947 100644
--- a/tests/ci/ast_fuzzer_check.py
+++ b/tests/ci/ast_fuzzer_check.py
@@ -7,6 +7,10 @@ import sys
 
 from github import Github
 
+from build_download_helper import get_build_name_for_check, read_build_urls
+from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
+from commit_status_helper import post_commit_status
+from docker_pull_helper import get_image_with_version
 from env_helper import (
     GITHUB_REPOSITORY,
     GITHUB_RUN_URL,
@@ -14,15 +18,12 @@ from env_helper import (
     REPO_COPY,
     TEMP_PATH,
 )
-from s3_helper import S3Helper
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from build_download_helper import get_build_name_for_check, read_build_urls
-from docker_pull_helper import get_image_with_version
-from commit_status_helper import post_commit_status
-from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from stopwatch import Stopwatch
+from report import TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 
 IMAGE_NAME = "clickhouse/fuzzer"
 
@@ -148,16 +149,15 @@ if __name__ == "__main__":
         status = "failure"
         description = "Task failed: $?=" + str(retcode)
 
+    test_result = TestResult(description, "OK")
     if "fail" in status:
-        test_result = [(description, "FAIL")]
-    else:
-        test_result = [(description, "OK")]
+        test_result.status = "FAIL"
 
     ch_helper = ClickHouseHelper()
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
-        test_result,
+        [test_result],
         status,
         stopwatch.duration_seconds,
         stopwatch.start_time_str,
diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py
index 6bdf3b1f7d2..14ea58500bc 100644
--- a/tests/ci/bugfix_validate_check.py
+++ b/tests/ci/bugfix_validate_check.py
@@ -1,18 +1,19 @@
 #!/usr/bin/env python3
 
+from typing import List, Tuple
 import argparse
 import csv
-import itertools
 import logging
 import os
 
 from github import Github
 
-from s3_helper import S3Helper
+from commit_status_helper import post_commit_status
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
+from report import TestResults, TestResult
+from s3_helper import S3Helper
 from upload_result_helper import upload_results
-from commit_status_helper import post_commit_status
 
 
 def parse_args():
@@ -21,11 +22,9 @@ def parse_args():
     return parser.parse_args()
 
 
-def post_commit_status_from_file(file_path):
-    res = []
+def post_commit_status_from_file(file_path: str) -> List[str]:
     with open(file_path, "r", encoding="utf-8") as f:
-        fin = csv.reader(f, delimiter="\t")
-        res = list(itertools.islice(fin, 1))
+        res = list(csv.reader(f, delimiter="\t"))
     if len(res) < 1:
         raise Exception(f'Can\'t read from "{file_path}"')
     if len(res[0]) != 3:
@@ -33,22 +32,22 @@ def post_commit_status_from_file(file_path):
     return res[0]
 
 
-def process_result(file_path):
-    test_results = []
+def process_result(file_path: str) -> Tuple[bool, TestResults]:
+    test_results = []  # type: TestResults
     state, report_url, description = post_commit_status_from_file(file_path)
     prefix = os.path.basename(os.path.dirname(file_path))
     is_ok = state == "success"
     if is_ok and report_url == "null":
-        return is_ok, None
+        return is_ok, test_results
 
     status = f'OK: Bug reproduced (<a href="{report_url}">Report</a>)'
     if not is_ok:
         status = f'Bug is not reproduced (<a href="{report_url}">Report</a>)'
-    test_results.append([f"{prefix}: {description}", status])
+    test_results.append(TestResult(f"{prefix}: {description}", status))
     return is_ok, test_results
 
 
-def process_all_results(file_paths):
+def process_all_results(file_paths: str) -> Tuple[bool, TestResults]:
     any_ok = False
     all_results = []
     for status_path in file_paths:
diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py
index c82d9da05e9..f914bb42d99 100644
--- a/tests/ci/clickhouse_helper.py
+++ b/tests/ci/clickhouse_helper.py
@@ -1,10 +1,14 @@
 #!/usr/bin/env python3
-import time
-import logging
+from typing import List
 import json
+import logging
+import time
 
 import requests  # type: ignore
+
 from get_robot_token import get_parameter_from_ssm
+from pr_info import PRInfo
+from report import TestResults
 
 
 class InsertException(Exception):
@@ -129,14 +133,14 @@ class ClickHouseHelper:
 
 
 def prepare_tests_results_for_clickhouse(
-    pr_info,
-    test_results,
-    check_status,
-    check_duration,
-    check_start_time,
-    report_url,
-    check_name,
-):
+    pr_info: PRInfo,
+    test_results: TestResults,
+    check_status: str,
+    check_duration: float,
+    check_start_time: str,
+    report_url: str,
+    check_name: str,
+) -> List[dict]:
 
     pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master"
     base_ref = "master"
@@ -172,13 +176,11 @@ def prepare_tests_results_for_clickhouse(
     result = [common_properties]
     for test_result in test_results:
         current_row = common_properties.copy()
-        test_name = test_result[0]
-        test_status = test_result[1]
+        test_name = test_result.name
+        test_status = test_result.status
 
-        test_time = 0
-        if len(test_result) > 2 and test_result[2]:
-            test_time = test_result[2]
-        current_row["test_duration_ms"] = int(float(test_time) * 1000)
+        test_time = test_result.time or 0
+        current_row["test_duration_ms"] = int(test_time * 1000)
         current_row["test_name"] = test_name
         current_row["test_status"] = test_status
         result.append(current_row)
@@ -186,7 +188,9 @@ def prepare_tests_results_for_clickhouse(
     return result
 
 
-def mark_flaky_tests(clickhouse_helper, check_name, test_results):
+def mark_flaky_tests(
+    clickhouse_helper: ClickHouseHelper, check_name: str, test_results: TestResults
+) -> None:
     try:
         query = f"""SELECT DISTINCT test_name
 FROM checks
@@ -202,7 +206,7 @@ WHERE
         logging.info("Found flaky tests: %s", ", ".join(master_failed_tests))
 
         for test_result in test_results:
-            if test_result[1] == "FAIL" and test_result[0] in master_failed_tests:
-                test_result[1] = "FLAKY"
+            if test_result.status == "FAIL" and test_result.name in master_failed_tests:
+                test_result.status = "FLAKY"
     except Exception as ex:
         logging.error("Exception happened during flaky tests fetch %s", ex)
diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py
index a86749c794c..9fa202a357c 100644
--- a/tests/ci/codebrowser_check.py
+++ b/tests/ci/codebrowser_check.py
@@ -7,6 +7,8 @@ import logging
 
 from github import Github
 
+from commit_status_helper import post_commit_status
+from docker_pull_helper import get_image_with_version
 from env_helper import (
     IMAGES_PATH,
     REPO_COPY,
@@ -14,10 +16,9 @@ from env_helper import (
     S3_TEST_REPORTS_BUCKET,
     TEMP_PATH,
 )
-from commit_status_helper import post_commit_status
-from docker_pull_helper import get_image_with_version
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
+from report import TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from tee_popen import TeePopen
@@ -80,9 +81,9 @@ if __name__ == "__main__":
         "HTML report</a>"
     )
 
-    test_results = [(index_html, "Look at the report")]
+    test_result = TestResult(index_html, "Look at the report")
 
-    report_url = upload_results(s3_helper, 0, pr_info.sha, test_results, [], NAME)
+    report_url = upload_results(s3_helper, 0, pr_info.sha, [test_result], [], NAME)
 
     print(f"::notice ::Report url: {report_url}")
 
diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py
index 2b61501a0dd..7d8086973bb 100644
--- a/tests/ci/compatibility_check.py
+++ b/tests/ci/compatibility_check.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 
 from distutils.version import StrictVersion
+from typing import List, Tuple
 import logging
 import os
 import subprocess
@@ -8,21 +9,22 @@ import sys
 
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
-from s3_helper import S3Helper
-from get_robot_token import get_best_robot_token
-from pr_info import PRInfo
 from build_download_helper import download_builds_filter
-from upload_result_helper import upload_results
-from docker_pull_helper import get_images_with_versions
-from commit_status_helper import post_commit_status
 from clickhouse_helper import (
     ClickHouseHelper,
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from stopwatch import Stopwatch
+from commit_status_helper import post_commit_status
+from docker_pull_helper import get_images_with_versions
+from env_helper import TEMP_PATH, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
+from upload_result_helper import upload_results
 
 IMAGE_UBUNTU = "clickhouse/test-old-ubuntu"
 IMAGE_CENTOS = "clickhouse/test-old-centos"
@@ -31,18 +33,18 @@ DOWNLOAD_RETRIES_COUNT = 5
 CHECK_NAME = "Compatibility check"
 
 
-def process_os_check(log_path):
+def process_os_check(log_path: str) -> TestResult:
     name = os.path.basename(log_path)
     with open(log_path, "r") as log:
         line = log.read().split("\n")[0].strip()
         if line != "OK":
-            return (name, "FAIL")
+            return TestResult(name, "FAIL")
         else:
-            return (name, "OK")
+            return TestResult(name, "OK")
 
 
-def process_glibc_check(log_path):
-    bad_lines = []
+def process_glibc_check(log_path: str) -> TestResults:
+    test_results = []  # type: TestResults
     with open(log_path, "r") as log:
         for line in log:
             if line.strip():
@@ -50,32 +52,36 @@ def process_glibc_check(log_path):
                 symbol_with_glibc = columns[-2]  # sysconf@GLIBC_2.2.5
                 _, version = symbol_with_glibc.split("@GLIBC_")
                 if version == "PRIVATE":
-                    bad_lines.append((symbol_with_glibc, "FAIL"))
+                    test_results.append(TestResult(symbol_with_glibc, "FAIL"))
                 elif StrictVersion(version) > MAX_GLIBC_VERSION:
-                    bad_lines.append((symbol_with_glibc, "FAIL"))
-    if not bad_lines:
-        bad_lines.append(("glibc check", "OK"))
-    return bad_lines
+                    test_results.append(TestResult(symbol_with_glibc, "FAIL"))
+    if not test_results:
+        test_results.append(TestResult("glibc check", "OK"))
+    return test_results
 
 
-def process_result(result_folder, server_log_folder):
-    summary = process_glibc_check(os.path.join(result_folder, "glibc.log"))
+def process_result(
+    result_folder: str, server_log_folder: str
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = process_glibc_check(os.path.join(result_folder, "glibc.log"))
 
     status = "success"
     description = "Compatibility check passed"
-    if len(summary) > 1 or summary[0][1] != "OK":
+    if len(test_results) > 1 or test_results[0].status != "OK":
         status = "failure"
         description = "glibc check failed"
 
     if status == "success":
         for operating_system in ("ubuntu:12.04", "centos:5"):
-            result = process_os_check(os.path.join(result_folder, operating_system))
-            if result[1] != "OK":
+            test_result = process_os_check(
+                os.path.join(result_folder, operating_system)
+            )
+            if test_result.status != "OK":
                 status = "failure"
                 description = f"Old {operating_system} failed"
-                summary += [result]
+                test_results += [test_result]
                 break
-            summary += [result]
+            test_results += [test_result]
 
     server_log_path = os.path.join(server_log_folder, "clickhouse-server.log")
     stderr_log_path = os.path.join(server_log_folder, "stderr.log")
@@ -90,7 +96,7 @@ def process_result(result_folder, server_log_folder):
     if os.path.exists(client_stderr_log_path):
         result_logs.append(client_stderr_log_path)
 
-    return status, description, summary, result_logs
+    return status, description, test_results, result_logs
 
 
 def get_run_commands(
@@ -109,13 +115,12 @@ def get_run_commands(
     ]
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
 
     temp_path = TEMP_PATH
-    repo_path = REPO_COPY
     reports_path = REPORTS_PATH
 
     pr_info = PRInfo()
@@ -201,5 +206,9 @@ if __name__ == "__main__":
 
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if state == "error":
+    if state == "failure":
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py
index 51cbbf6f0af..f5b707be48f 100644
--- a/tests/ci/docker_images_check.py
+++ b/tests/ci/docker_images_check.py
@@ -8,6 +8,7 @@ import shutil
 import subprocess
 import time
 import sys
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Set, Tuple, Union
 
 from github import Github
@@ -17,6 +18,7 @@ from commit_status_helper import post_commit_status
 from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
+from report import TestResults, TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
@@ -182,11 +184,12 @@ def build_and_push_dummy_image(
     image: DockerImage,
     version_string: str,
     push: bool,
-) -> Tuple[bool, str]:
+) -> Tuple[bool, Path]:
     dummy_source = "ubuntu:20.04"
     logging.info("Building docker image %s as %s", image.repo, dummy_source)
-    build_log = os.path.join(
-        TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}"
+    build_log = (
+        Path(TEMP_PATH)
+        / f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}.log"
     )
     with open(build_log, "wb") as bl:
         cmd = (
@@ -213,7 +216,7 @@ def build_and_push_one_image(
     additional_cache: str,
     push: bool,
     child: bool,
-) -> Tuple[bool, str]:
+) -> Tuple[bool, Path]:
     if image.only_amd64 and platform.machine() not in ["amd64", "x86_64"]:
         return build_and_push_dummy_image(image, version_string, push)
     logging.info(
@@ -222,8 +225,9 @@ def build_and_push_one_image(
         version_string,
         image.full_path,
     )
-    build_log = os.path.join(
-        TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}"
+    build_log = (
+        Path(TEMP_PATH)
+        / f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}.log"
     )
     push_arg = ""
     if push:
@@ -273,27 +277,42 @@ def process_single_image(
     additional_cache: str,
     push: bool,
     child: bool,
-) -> List[Tuple[str, str, str]]:
+) -> TestResults:
     logging.info("Image will be pushed with versions %s", ", ".join(versions))
-    result = []
+    results = []  # type: TestResults
     for ver in versions:
+        stopwatch = Stopwatch()
         for i in range(5):
             success, build_log = build_and_push_one_image(
                 image, ver, additional_cache, push, child
             )
             if success:
-                result.append((image.repo + ":" + ver, build_log, "OK"))
+                results.append(
+                    TestResult(
+                        image.repo + ":" + ver,
+                        "OK",
+                        stopwatch.duration_seconds,
+                        [build_log],
+                    )
+                )
                 break
             logging.info(
                 "Got error will retry %s time and sleep for %s seconds", i, i * 5
             )
             time.sleep(i * 5)
         else:
-            result.append((image.repo + ":" + ver, build_log, "FAIL"))
+            results.append(
+                TestResult(
+                    image.repo + ":" + ver,
+                    "FAIL",
+                    stopwatch.duration_seconds,
+                    [build_log],
+                )
+            )
 
     logging.info("Processing finished")
     image.built = True
-    return result
+    return results
 
 
 def process_image_with_parents(
@@ -302,41 +321,19 @@ def process_image_with_parents(
     additional_cache: str,
     push: bool,
     child: bool = False,
-) -> List[Tuple[str, str, str]]:
-    result = []  # type: List[Tuple[str,str,str]]
+) -> TestResults:
+    results = []  # type: TestResults
     if image.built:
-        return result
+        return results
 
     if image.parent is not None:
-        result += process_image_with_parents(
+        results += process_image_with_parents(
             image.parent, versions, additional_cache, push, False
         )
         child = True
 
-    result += process_single_image(image, versions, additional_cache, push, child)
-    return result
-
-
-def process_test_results(
-    s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str
-) -> Tuple[str, List[Tuple[str, str]]]:
-    overall_status = "success"
-    processed_test_results = []
-    for image, build_log, status in test_results:
-        if status != "OK":
-            overall_status = "failure"
-        url_part = ""
-        if build_log is not None and os.path.exists(build_log):
-            build_url = s3_client.upload_test_report_to_s3(
-                build_log, s3_path_prefix + "/" + os.path.basename(build_log)
-            )
-            url_part += f'<a href="{build_url}">build_log</a>'
-        if url_part:
-            test_name = image + " (" + url_part + ")"
-        else:
-            test_name = image
-        processed_test_results.append((test_name, status))
-    return overall_status, processed_test_results
+    results += process_single_image(image, versions, additional_cache, push, child)
+    return results
 
 
 def parse_args() -> argparse.Namespace:
@@ -440,7 +437,7 @@ def main():
     image_versions, result_version = gen_versions(pr_info, args.suffix)
 
     result_images = {}
-    images_processing_result = []
+    test_results = []  # type: TestResults
     additional_cache = ""
     if pr_info.release_pr or pr_info.merged_pr:
         additional_cache = str(pr_info.release_pr or pr_info.merged_pr)
@@ -448,7 +445,7 @@ def main():
     for image in changed_images:
         # If we are in backport PR, then pr_info.release_pr is defined
         # We use it as tag to reduce rebuilding time
-        images_processing_result += process_image_with_parents(
+        test_results += process_image_with_parents(
             image, image_versions, additional_cache, args.push
         )
         result_images[image.repo] = result_version
@@ -466,12 +463,9 @@ def main():
 
     s3_helper = S3Helper()
 
-    s3_path_prefix = (
-        str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_")
-    )
-    status, test_results = process_test_results(
-        s3_helper, images_processing_result, s3_path_prefix
-    )
+    status = "success"
+    if [r for r in test_results if r.status != "OK"]:
+        status = "failure"
 
     url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME)
 
@@ -495,7 +489,7 @@ def main():
     ch_helper = ClickHouseHelper()
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if status == "error":
+    if status == "failure":
         sys.exit(1)
 
 
diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py
index e39731c9ff3..9a77a91647e 100644
--- a/tests/ci/docker_manifests_merge.py
+++ b/tests/ci/docker_manifests_merge.py
@@ -14,6 +14,7 @@ from commit_status_helper import post_commit_status
 from env_helper import RUNNER_TEMP
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
+from report import TestResults, TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
@@ -189,11 +190,11 @@ def main():
     merged = merge_images(to_merge)
 
     status = "success"
-    test_results = []  # type: List[Tuple[str, str]]
+    test_results = []  # type: TestResults
     for image, versions in merged.items():
         for tags in versions:
             manifest, test_result = create_manifest(image, tags, args.push)
-            test_results.append((manifest, test_result))
+            test_results.append(TestResult(manifest, test_result))
             if test_result != "OK":
                 status = "failure"
 
diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py
index fd28e5a1890..544ab4e0a90 100644
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@@ -8,7 +8,7 @@ import subprocess
 import sys
 import time
 from os import path as p, makedirs
-from typing import List, Tuple
+from typing import List
 
 from github import Github
 
@@ -20,6 +20,7 @@ from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOW
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from git_helper import Git
 from pr_info import PRInfo
+from report import TestResults, TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
 from upload_result_helper import upload_results
@@ -235,8 +236,8 @@ def build_and_push_image(
     os: str,
     tag: str,
     version: ClickHouseVersion,
-) -> List[Tuple[str, str]]:
-    result = []
+) -> TestResults:
+    result = []  # type: TestResults
     if os != "ubuntu":
         tag += f"-{os}"
     init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"]
@@ -270,9 +271,9 @@ def build_and_push_image(
         cmd = " ".join(cmd_args)
         logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd)
         if retry_popen(cmd) != 0:
-            result.append((f"{image.repo}:{tag}-{arch}", "FAIL"))
+            result.append(TestResult(f"{image.repo}:{tag}-{arch}", "FAIL"))
             return result
-        result.append((f"{image.repo}:{tag}-{arch}", "OK"))
+        result.append(TestResult(f"{image.repo}:{tag}-{arch}", "OK"))
         with open(metadata_path, "rb") as m:
             metadata = json.load(m)
             digests.append(metadata["containerimage.digest"])
@@ -283,7 +284,7 @@ def build_and_push_image(
         )
         logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd)
         if retry_popen(cmd) != 0:
-            result.append((f"{image.repo}:{tag}", "FAIL"))
+            result.append(TestResult(f"{image.repo}:{tag}", "FAIL"))
             return result
     else:
         logging.info(
@@ -323,7 +324,7 @@ def main():
 
     logging.info("Following tags will be created: %s", ", ".join(tags))
     status = "success"
-    test_results = []  # type: List[Tuple[str, str]]
+    test_results = []  # type: TestResults
     for os in args.os:
         for tag in tags:
             test_results.extend(
@@ -331,7 +332,7 @@ def main():
                     image, args.push, args.bucket_prefix, os, tag, args.version
                 )
             )
-            if test_results[-1][1] != "OK":
+            if test_results[-1].status != "OK":
                 status = "failure"
 
     pr_info = pr_info or PRInfo()
diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py
index 8b18a580ed7..e7b54652272 100644
--- a/tests/ci/docker_test.py
+++ b/tests/ci/docker_test.py
@@ -3,9 +3,11 @@
 import os
 import unittest
 from unittest.mock import patch, MagicMock
+from pathlib import Path
 
 from env_helper import GITHUB_RUN_URL
 from pr_info import PRInfo
+from report import TestResult
 import docker_images_check as di
 
 with patch("git_helper.Git"):
@@ -223,40 +225,48 @@ class TestDockerImageCheck(unittest.TestCase):
 
     @patch("docker_images_check.build_and_push_one_image")
     def test_process_image_with_parents(self, mock_build):
-        mock_build.side_effect = lambda v, w, x, y, z: (True, f"{v.repo}_{w}.log")
+        mock_build.side_effect = lambda v, w, x, y, z: (True, Path(f"{v.repo}_{w}.log"))
         im1 = di.DockerImage("path1", "repo1", False)
         im2 = di.DockerImage("path2", "repo2", False, im1)
         im3 = di.DockerImage("path3", "repo3", False, im2)
         im4 = di.DockerImage("path4", "repo4", False, im1)
         # We use list to have determined order of image builgings
         images = [im4, im1, im3, im2, im1]
-        results = [
+        test_results = [
             di.process_image_with_parents(im, ["v1", "v2", "latest"], "", True)
             for im in images
         ]
+        # The time is random, so we check it's not None and greater than 0,
+        # and then set to 1
+        for results in test_results:
+            for result in results:
+                self.assertIsNotNone(result.time)
+                self.assertGreater(result.time, 0)  # type: ignore
+                result.time = 1
 
+        self.maxDiff = None
         expected = [
             [  # repo4 -> repo1
-                ("repo1:v1", "repo1_v1.log", "OK"),
-                ("repo1:v2", "repo1_v2.log", "OK"),
-                ("repo1:latest", "repo1_latest.log", "OK"),
-                ("repo4:v1", "repo4_v1.log", "OK"),
-                ("repo4:v2", "repo4_v2.log", "OK"),
-                ("repo4:latest", "repo4_latest.log", "OK"),
+                TestResult("repo1:v1", "OK", 1, [Path("repo1_v1.log")]),
+                TestResult("repo1:v2", "OK", 1, [Path("repo1_v2.log")]),
+                TestResult("repo1:latest", "OK", 1, [Path("repo1_latest.log")]),
+                TestResult("repo4:v1", "OK", 1, [Path("repo4_v1.log")]),
+                TestResult("repo4:v2", "OK", 1, [Path("repo4_v2.log")]),
+                TestResult("repo4:latest", "OK", 1, [Path("repo4_latest.log")]),
             ],
             [],  # repo1 is built
             [  # repo3 -> repo2 -> repo1
-                ("repo2:v1", "repo2_v1.log", "OK"),
-                ("repo2:v2", "repo2_v2.log", "OK"),
-                ("repo2:latest", "repo2_latest.log", "OK"),
-                ("repo3:v1", "repo3_v1.log", "OK"),
-                ("repo3:v2", "repo3_v2.log", "OK"),
-                ("repo3:latest", "repo3_latest.log", "OK"),
+                TestResult("repo2:v1", "OK", 1, [Path("repo2_v1.log")]),
+                TestResult("repo2:v2", "OK", 1, [Path("repo2_v2.log")]),
+                TestResult("repo2:latest", "OK", 1, [Path("repo2_latest.log")]),
+                TestResult("repo3:v1", "OK", 1, [Path("repo3_v1.log")]),
+                TestResult("repo3:v2", "OK", 1, [Path("repo3_v2.log")]),
+                TestResult("repo3:latest", "OK", 1, [Path("repo3_latest.log")]),
             ],
             [],  # repo2 -> repo1 are built
             [],  # repo1 is built
         ]
-        self.assertEqual(results, expected)
+        self.assertEqual(test_results, expected)
 
 
 class TestDockerServer(unittest.TestCase):
diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py
index cac1c3aea7c..4378c857afe 100644
--- a/tests/ci/docs_check.py
+++ b/tests/ci/docs_check.py
@@ -4,24 +4,27 @@ import logging
 import subprocess
 import os
 import sys
+
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY
-from s3_helper import S3Helper
-from pr_info import PRInfo
-from get_robot_token import get_best_robot_token
-from upload_result_helper import upload_results
-from docker_pull_helper import get_image_with_version
-from commit_status_helper import post_commit_status, get_commit
 from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from stopwatch import Stopwatch
+from commit_status_helper import post_commit_status, get_commit
+from docker_pull_helper import get_image_with_version
+from env_helper import TEMP_PATH, REPO_COPY
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 from tee_popen import TeePopen
+from upload_result_helper import upload_results
 
 
 NAME = "Docs Check"
 
-if __name__ == "__main__":
+
+def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
         description="Script to check the docs integrity",
@@ -98,7 +101,7 @@ if __name__ == "__main__":
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
     files = os.listdir(test_output)
-    lines = []
+    test_results = []  # type: TestResults
     additional_files = []
     if not files:
         logging.error("No output files after docs check")
@@ -111,27 +114,27 @@ if __name__ == "__main__":
             with open(path, "r", encoding="utf-8") as check_file:
                 for line in check_file:
                     if "ERROR" in line:
-                        lines.append((line.split(":")[-1], "FAIL"))
-        if lines:
+                        test_results.append(TestResult(line.split(":")[-1], "FAIL"))
+        if test_results:
             status = "failure"
             description = "Found errors in docs"
         elif status != "failure":
-            lines.append(("No errors found", "OK"))
+            test_results.append(TestResult("No errors found", "OK"))
         else:
-            lines.append(("Non zero exit code", "FAIL"))
+            test_results.append(TestResult("Non zero exit code", "FAIL"))
 
     s3_helper = S3Helper()
     ch_helper = ClickHouseHelper()
 
     report_url = upload_results(
-        s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
+        s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
     )
     print("::notice ::Report url: {report_url}")
     post_commit_status(gh, pr_info.sha, NAME, description, status, report_url)
 
     prepared_events = prepare_tests_results_for_clickhouse(
         pr_info,
-        lines,
+        test_results,
         status,
         stopwatch.duration_seconds,
         stopwatch.start_time_str,
@@ -140,5 +143,9 @@ if __name__ == "__main__":
     )
 
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
-    if status == "error":
+    if status == "failure":
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py
index f1f420318be..1b93aba99ba 100644
--- a/tests/ci/docs_release.py
+++ b/tests/ci/docs_release.py
@@ -7,16 +7,17 @@ import sys
 
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN
-from s3_helper import S3Helper
-from pr_info import PRInfo
-from get_robot_token import get_best_robot_token
-from ssh import SSHKey
-from upload_result_helper import upload_results
-from docker_pull_helper import get_image_with_version
 from commit_status_helper import get_commit
+from docker_pull_helper import get_image_with_version
+from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from ssh import SSHKey
 from tee_popen import TeePopen
+from upload_result_helper import upload_results
 
 NAME = "Docs Release"
 
@@ -32,7 +33,7 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
     args = parse_args()
 
@@ -84,7 +85,7 @@ if __name__ == "__main__":
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
     files = os.listdir(test_output)
-    lines = []
+    test_results = []  # type: TestResults
     additional_files = []
     if not files:
         logging.error("No output files after docs release")
@@ -97,19 +98,19 @@ if __name__ == "__main__":
             with open(path, "r", encoding="utf-8") as check_file:
                 for line in check_file:
                     if "ERROR" in line:
-                        lines.append((line.split(":")[-1], "FAIL"))
-        if lines:
+                        test_results.append(TestResult(line.split(":")[-1], "FAIL"))
+        if test_results:
             status = "failure"
             description = "Found errors in docs"
         elif status != "failure":
-            lines.append(("No errors found", "OK"))
+            test_results.append(TestResult("No errors found", "OK"))
         else:
-            lines.append(("Non zero exit code", "FAIL"))
+            test_results.append(TestResult("Non zero exit code", "FAIL"))
 
     s3_helper = S3Helper()
 
     report_url = upload_results(
-        s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME
+        s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME
     )
     print("::notice ::Report url: {report_url}")
     commit = get_commit(gh, pr_info.sha)
@@ -119,3 +120,7 @@ if __name__ == "__main__":
 
     if status == "failure":
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py
index 0f4c1b19707..7a87a93c26d 100644
--- a/tests/ci/fast_test_check.py
+++ b/tests/ci/fast_test_check.py
@@ -6,29 +6,31 @@ import os
 import csv
 import sys
 import atexit
+from pathlib import Path
 from typing import List, Tuple
 
 from github import Github
 
-from env_helper import CACHES_PATH, TEMP_PATH
-from pr_info import FORCE_TESTS_LABEL, PRInfo
-from s3_helper import S3Helper
-from get_robot_token import get_best_robot_token
-from upload_result_helper import upload_results
-from docker_pull_helper import get_image_with_version
-from commit_status_helper import (
-    post_commit_status,
-    update_mergeable_check,
-)
+from ccache_utils import get_ccache_if_not_exists, upload_ccache
 from clickhouse_helper import (
     ClickHouseHelper,
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from stopwatch import Stopwatch
+from commit_status_helper import (
+    post_commit_status,
+    update_mergeable_check,
+)
+from docker_pull_helper import get_image_with_version
+from env_helper import CACHES_PATH, TEMP_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import FORCE_TESTS_LABEL, PRInfo
+from report import TestResults, read_test_results
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 from tee_popen import TeePopen
-from ccache_utils import get_ccache_if_not_exists, upload_ccache
+from upload_result_helper import upload_results
 
 NAME = "Fast test"
 
@@ -53,8 +55,8 @@ def get_fasttest_cmd(
 
 def process_results(
     result_folder: str,
-) -> Tuple[str, str, List[Tuple[str, str]], List[str]]:
-    test_results = []  # type: List[Tuple[str, str]]
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_folder.
     # If task provides processed results, then it's responsible for content of
@@ -78,17 +80,15 @@ def process_results(
         return "error", "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
-    results_path = os.path.join(result_folder, "test_results.tsv")
-    if os.path.exists(results_path):
-        with open(results_path, "r", encoding="utf-8") as results_file:
-            test_results = list(csv.reader(results_file, delimiter="\t"))  # type: ignore
+    results_path = Path(result_folder) / "test_results.tsv"
+    test_results = read_test_results(results_path)
     if len(test_results) == 0:
         return "error", "Empty test_results.tsv", test_results, additional_files
 
     return state, description, test_results, additional_files
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
@@ -175,7 +175,6 @@ if __name__ == "__main__":
         "test_log.txt" in test_output_files or "test_result.txt" in test_output_files
     )
     test_result_exists = "test_results.tsv" in test_output_files
-    test_results = []  # type: List[Tuple[str, str]]
     if "submodule_log.txt" not in test_output_files:
         description = "Cannot clone repository"
         state = "failure"
@@ -210,7 +209,6 @@ if __name__ == "__main__":
         test_results,
         [run_log_path] + additional_logs,
         NAME,
-        True,
     )
     print(f"::notice ::Report url: {report_url}")
     post_commit_status(gh, pr_info.sha, NAME, description, state, report_url)
@@ -232,3 +230,7 @@ if __name__ == "__main__":
             print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
         else:
             sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py
index e7689a198cd..3653aefeb77 100644
--- a/tests/ci/functional_test_check.py
+++ b/tests/ci/functional_test_check.py
@@ -7,18 +7,17 @@ import os
 import subprocess
 import sys
 import atexit
+from pathlib import Path
 from typing import List, Tuple
 
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
-from s3_helper import S3Helper
-from get_robot_token import get_best_robot_token
-from pr_info import FORCE_TESTS_LABEL, PRInfo
 from build_download_helper import download_all_deb_packages
-from download_release_packages import download_last_release
-from upload_result_helper import upload_results
-from docker_pull_helper import get_image_with_version
+from clickhouse_helper import (
+    ClickHouseHelper,
+    mark_flaky_tests,
+    prepare_tests_results_for_clickhouse,
+)
 from commit_status_helper import (
     post_commit_status,
     get_commit,
@@ -26,14 +25,17 @@ from commit_status_helper import (
     post_commit_status_to_file,
     update_mergeable_check,
 )
-from clickhouse_helper import (
-    ClickHouseHelper,
-    mark_flaky_tests,
-    prepare_tests_results_for_clickhouse,
-)
-from stopwatch import Stopwatch
+from docker_pull_helper import get_image_with_version
+from download_release_packages import download_last_release
+from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import FORCE_TESTS_LABEL, PRInfo
+from report import TestResults, read_test_results
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 from tee_popen import TeePopen
+from upload_result_helper import upload_results
 
 NO_CHANGES_MSG = "Nothing to run"
 
@@ -126,8 +128,8 @@ def get_tests_to_run(pr_info):
 def process_results(
     result_folder: str,
     server_log_path: str,
-) -> Tuple[str, str, List[Tuple[str, str]], List[str]]:
-    test_results = []  # type: List[Tuple[str, str]]
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_folder.
     # If task provides processed results, then it's responsible for content of result_folder.
@@ -161,16 +163,15 @@ def process_results(
         return "error", "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
-    results_path = os.path.join(result_folder, "test_results.tsv")
+    results_path = Path(result_folder) / "test_results.tsv"
 
-    if os.path.exists(results_path):
+    if results_path.exists():
         logging.info("Found test_results.tsv")
     else:
         logging.info("Files in result folder %s", os.listdir(result_folder))
         return "error", "Not found test_results.tsv", test_results, additional_files
 
-    with open(results_path, "r", encoding="utf-8") as results_file:
-        test_results = list(csv.reader(results_file, delimiter="\t"))  # type: ignore
+    test_results = read_test_results(results_path)
     if len(test_results) == 0:
         return "error", "Empty test_results.tsv", test_results, additional_files
 
@@ -195,7 +196,7 @@ def parse_args():
     return parser.parse_args()
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
@@ -377,3 +378,7 @@ if __name__ == "__main__":
             print(f"'{FORCE_TESTS_LABEL}' enabled, will report success")
         else:
             sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py
index a6935e22091..85933e27309 100644
--- a/tests/ci/integration_test_check.py
+++ b/tests/ci/integration_test_check.py
@@ -7,31 +7,33 @@ import logging
 import os
 import subprocess
 import sys
+from pathlib import Path
 from typing import List, Tuple
 
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
-from s3_helper import S3Helper
-from get_robot_token import get_best_robot_token
-from pr_info import PRInfo
 from build_download_helper import download_all_deb_packages
-from download_release_packages import download_last_release
-from upload_result_helper import upload_results
-from docker_pull_helper import get_images_with_versions
-from commit_status_helper import (
-    post_commit_status,
-    override_status,
-    post_commit_status_to_file,
-)
 from clickhouse_helper import (
     ClickHouseHelper,
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from stopwatch import Stopwatch
+from commit_status_helper import (
+    post_commit_status,
+    override_status,
+    post_commit_status_to_file,
+)
+from docker_pull_helper import get_images_with_versions
+from download_release_packages import download_last_release
+from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, read_test_results
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 from tee_popen import TeePopen
+from upload_result_helper import upload_results
 
 
 # When update, update
@@ -90,8 +92,8 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path):
 
 def process_results(
     result_folder: str,
-) -> Tuple[str, str, List[Tuple[str, str]], List[str]]:
-    test_results = []  # type: List[Tuple[str, str]]
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_folder.
     # If task provides processed results, then it's responsible for content of result_folder.
@@ -115,10 +117,8 @@ def process_results(
         return "error", "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
-    results_path = os.path.join(result_folder, "test_results.tsv")
-    if os.path.exists(results_path):
-        with open(results_path, "r", encoding="utf-8") as results_file:
-            test_results = list(csv.reader(results_file, delimiter="\t"))  # type: ignore
+    results_path = Path(result_folder) / "test_results.tsv"
+    test_results = read_test_results(results_path, False)
     if len(test_results) == 0:
         return "error", "Empty test_results.tsv", test_results, additional_files
 
@@ -142,7 +142,7 @@ def parse_args():
     return parser.parse_args()
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
@@ -271,7 +271,6 @@ if __name__ == "__main__":
         test_results,
         [output_path_log] + additional_logs,
         check_name_with_group,
-        False,
     )
 
     print(f"::notice:: {check_name} Report url: {report_url}")
@@ -303,5 +302,9 @@ if __name__ == "__main__":
 
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if state == "error":
+    if state == "failure":
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py
index 3ddc0089791..fc18cc4a5ca 100644
--- a/tests/ci/jepsen_check.py
+++ b/tests/ci/jepsen_check.py
@@ -11,20 +11,21 @@ import boto3  # type: ignore
 import requests  # type: ignore
 from github import Github
 
+from build_download_helper import get_build_name_for_check
+from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
+from commit_status_helper import post_commit_status
+from compress_files import compress_fast
 from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD
-from stopwatch import Stopwatch
-from upload_result_helper import upload_results
-from s3_helper import S3Helper
 from get_robot_token import get_best_robot_token, get_parameter_from_ssm
 from pr_info import PRInfo
-from compress_files import compress_fast
-from commit_status_helper import post_commit_status
-from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from version_helper import get_version_from_repo
-from tee_popen import TeePopen
-from ssh import SSHKey
-from build_download_helper import get_build_name_for_check
+from report import TestResults, TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from ssh import SSHKey
+from stopwatch import Stopwatch
+from tee_popen import TeePopen
+from upload_result_helper import upload_results
+from version_helper import get_version_from_repo
 
 JEPSEN_GROUP_NAME = "jepsen_group"
 
@@ -44,8 +45,8 @@ CRASHED_TESTS_ANCHOR = "# Crashed tests"
 FAILED_TESTS_ANCHOR = "# Failed tests"
 
 
-def _parse_jepsen_output(path):
-    test_results = []
+def _parse_jepsen_output(path: str) -> TestResults:
+    test_results = []  # type: TestResults
     current_type = ""
     with open(path, "r") as f:
         for line in f:
@@ -59,7 +60,7 @@ def _parse_jepsen_output(path):
             if (
                 line.startswith("store/clickhouse") or line.startswith("clickhouse")
             ) and current_type:
-                test_results.append((line.strip(), current_type))
+                test_results.append(TestResult(line.strip(), current_type))
 
     return test_results
 
@@ -266,7 +267,7 @@ if __name__ == "__main__":
     additional_data = []
     try:
         test_result = _parse_jepsen_output(jepsen_log_path)
-        if any(r[1] == "FAIL" for r in test_result):
+        if any(r.status == "FAIL" for r in test_result):
             status = "failure"
             description = "Found invalid analysis (ﾉಥ益ಥ）ﾉ ┻━┻"
 
@@ -279,7 +280,7 @@ if __name__ == "__main__":
         print("Exception", ex)
         status = "failure"
         description = "No Jepsen output log"
-        test_result = [("No Jepsen output log", "FAIL")]
+        test_result = [TestResult("No Jepsen output log", "FAIL")]
 
     s3_helper = S3Helper()
     report_url = upload_results(
diff --git a/tests/ci/report.py b/tests/ci/report.py
index 6c152c927ef..2409d1ba6d8 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -1,4 +1,9 @@
 # -*- coding: utf-8 -*-
+from ast import literal_eval
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List, Optional, Tuple
+import csv
 import os
 import datetime
 
@@ -167,6 +172,66 @@ HTML_TEST_PART = """
 BASE_HEADERS = ["Test name", "Test status"]
 
 
+@dataclass
+class TestResult:
+    name: str
+    status: str
+    # the following fields are optional
+    time: Optional[float] = None
+    log_files: Optional[List[Path]] = None
+    raw_logs: Optional[str] = None
+    # the field for uploaded logs URLs
+    log_urls: Optional[List[str]] = None
+
+    def set_raw_logs(self, raw_logs: str) -> None:
+        self.raw_logs = raw_logs
+
+    def set_log_files(self, log_files_literal: str) -> None:
+        self.log_files = []
+        log_paths = literal_eval(log_files_literal)
+        if not isinstance(log_paths, list):
+            raise ValueError(
+                f"Malformed input: must be a list literal: {log_files_literal}"
+            )
+        for log_path in log_paths:
+            file = Path(log_path)
+            assert file.exists()
+            self.log_files.append(file)
+
+
+TestResults = List[TestResult]
+
+
+def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults:
+    results = []  # type: TestResults
+    with open(results_path, "r", encoding="utf-8") as descriptor:
+        reader = csv.reader(descriptor, delimiter="\t")
+        for line in reader:
+            name = line[0]
+            status = line[1]
+            time = None
+            if len(line) >= 3 and line[2]:
+                # The value can be emtpy, but when it's not,
+                # it's the time spent on the test
+                try:
+                    time = float(line[2])
+                except ValueError:
+                    pass
+
+            result = TestResult(name, status, time)
+            if len(line) == 4 and line[3]:
+                # The value can be emtpy, but when it's not,
+                # the 4th value is a pythonic list, e.g. ['file1', 'file2']
+                if with_raw_logs:
+                    result.set_raw_logs(line[3])
+                else:
+                    result.set_log_files(line[3])
+
+            results.append(result)
+
+    return results
+
+
 class ReportColorTheme:
     class ReportColor:
         yellow = "#FFB400"
@@ -178,6 +243,9 @@ class ReportColorTheme:
     bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue)
 
 
+ColorTheme = Tuple[str, str, str]
+
+
 def _format_header(header, branch_name, branch_url=None):
     result = " ".join([w.capitalize() for w in header.split(" ")])
     result = result.replace("Clickhouse", "ClickHouse")
@@ -192,7 +260,7 @@ def _format_header(header, branch_name, branch_url=None):
     return result
 
 
-def _get_status_style(status, colortheme=None):
+def _get_status_style(status: str, colortheme: Optional[ColorTheme] = None) -> str:
     ok_statuses = ("OK", "success", "PASSED")
     fail_statuses = ("FAIL", "failure", "error", "FAILED", "Timeout")
 
@@ -230,80 +298,80 @@ def _get_html_url(url):
 
 
 def create_test_html_report(
-    header,
-    test_result,
-    raw_log_url,
-    task_url,
-    job_url,
-    branch_url,
-    branch_name,
-    commit_url,
-    additional_urls=None,
-    with_raw_logs=False,
-    statuscolors=None,
-):
+    header: str,
+    test_results: TestResults,
+    raw_log_url: str,
+    task_url: str,
+    job_url: str,
+    branch_url: str,
+    branch_name: str,
+    commit_url: str,
+    additional_urls: Optional[List[str]] = None,
+    statuscolors: Optional[ColorTheme] = None,
+) -> str:
     if additional_urls is None:
         additional_urls = []
 
-    if test_result:
+    if test_results:
         rows_part = ""
         num_fails = 0
         has_test_time = False
-        has_test_logs = False
+        has_log_urls = False
 
-        if with_raw_logs:
-            # Display entires with logs at the top (they correspond to failed tests)
-            test_result.sort(key=lambda result: len(result) <= 3)
+        # Display entires with logs at the top (they correspond to failed tests)
+        test_results.sort(
+            key=lambda result: result.raw_logs is not None
+            or result.log_files is not None
+        )
 
-        for result in test_result:
-            test_name = result[0]
-            test_status = result[1]
-
-            test_logs = None
-            test_time = None
-            if len(result) > 2:
-                test_time = result[2]
-                has_test_time = True
-
-            if len(result) > 3:
-                test_logs = result[3]
-                has_test_logs = True
+        for test_result in test_results:
+            colspan = 0
+            if test_result.log_files is not None:
+                has_log_urls = True
 
             row = "<tr>"
-            is_fail = test_status in ("FAIL", "FLAKY")
-            if is_fail and with_raw_logs and test_logs is not None:
+            is_fail = test_result.status in ("FAIL", "FLAKY")
+            if is_fail and test_result.raw_logs is not None:
                 row = '<tr class="failed">'
-            row += "<td>" + test_name + "</td>"
-            style = _get_status_style(test_status, colortheme=statuscolors)
+            row += "<td>" + test_result.name + "</td>"
+            colspan += 1
+            style = _get_status_style(test_result.status, colortheme=statuscolors)
 
             # Allow to quickly scroll to the first failure.
-            is_fail_id = ""
+            fail_id = ""
             if is_fail:
                 num_fails = num_fails + 1
-                is_fail_id = 'id="fail' + str(num_fails) + '" '
+                fail_id = f'id="fail{num_fails}" '
 
-            row += f'<td {is_fail_id}style="{style}">{test_status}</td>'
+            row += f'<td {fail_id}style="{style}">{test_result.status}</td>'
+            colspan += 1
 
-            if test_time is not None:
-                row += "<td>" + test_time + "</td>"
+            if test_result.time is not None:
+                has_test_time = True
+                row += f"<td>{test_result.time}</td>"
+                colspan += 1
 
-            if test_logs is not None and not with_raw_logs:
-                test_logs_html = "<br>".join([_get_html_url(url) for url in test_logs])
+            if test_result.log_urls is not None:
+                test_logs_html = "<br>".join(
+                    [_get_html_url(url) for url in test_result.log_urls]
+                )
                 row += "<td>" + test_logs_html + "</td>"
+                colspan += 1
 
             row += "</tr>"
             rows_part += row
-            if test_logs is not None and with_raw_logs:
-                row = '<tr class="failed-content">'
-                # TODO: compute colspan too
-                row += '<td colspan="3"><pre>' + test_logs + "</pre></td>"
-                row += "</tr>"
+            if test_result.raw_logs is not None:
+                row = (
+                    '<tr class="failed-content">'
+                    f'<td colspan="{colspan}"><pre>{test_result.raw_logs}</pre></td>'
+                    "</tr>"
+                )
                 rows_part += row
 
         headers = BASE_HEADERS
         if has_test_time:
             headers.append("Test time, sec.")
-        if has_test_logs and not with_raw_logs:
+        if has_log_urls:
             headers.append("Logs")
 
         headers_html = "".join(["<th>" + h + "</th>" for h in headers])
@@ -319,7 +387,7 @@ def create_test_html_report(
     if "?" in raw_log_name:
         raw_log_name = raw_log_name.split("?")[0]
 
-    result = HTML_BASE_TEST_TEMPLATE.format(
+    html = HTML_BASE_TEST_TEMPLATE.format(
         title=_format_header(header, branch_name),
         header=_format_header(header, branch_name, branch_url),
         raw_log_name=raw_log_name,
@@ -331,7 +399,7 @@ def create_test_html_report(
         commit_url=commit_url,
         additional_urls=additional_html_urls,
     )
-    return result
+    return html
 
 
 HTML_BASE_BUILD_TEMPLATE = """
diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
index ce6d89a7267..66a61ae9991 100644
--- a/tests/ci/sqlancer_check.py
+++ b/tests/ci/sqlancer_check.py
@@ -4,27 +4,27 @@ import logging
 import subprocess
 import os
 import sys
-from typing import List, Tuple
+from typing import List
 
 from github import Github
 
+from build_download_helper import get_build_name_for_check, read_build_urls
+from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
+from commit_status_helper import post_commit_status
+from docker_pull_helper import get_image_with_version
 from env_helper import (
     GITHUB_REPOSITORY,
     GITHUB_RUN_URL,
     REPORTS_PATH,
-    REPO_COPY,
     TEMP_PATH,
 )
-from s3_helper import S3Helper
 from get_robot_token import get_best_robot_token
 from pr_info import PRInfo
-from build_download_helper import get_build_name_for_check, read_build_urls
-from docker_pull_helper import get_image_with_version
-from commit_status_helper import post_commit_status
-from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse
-from upload_result_helper import upload_results
-from stopwatch import Stopwatch
+from report import TestResults, TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
+from upload_result_helper import upload_results
 
 IMAGE_NAME = "clickhouse/sqlancer-test"
 
@@ -48,13 +48,12 @@ def get_commit(gh, commit_sha):
     return commit
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
 
     temp_path = TEMP_PATH
-    repo_path = REPO_COPY
     reports_path = REPORTS_PATH
 
     check_name = sys.argv[1]
@@ -108,11 +107,6 @@ if __name__ == "__main__":
 
     subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True)
 
-    check_name_lower = (
-        check_name.lower().replace("(", "").replace(")", "").replace(" ", "")
-    )
-    s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_lower}/"
-
     tests = [
         "TLPGroupBy",
         "TLPHaving",
@@ -138,7 +132,7 @@ if __name__ == "__main__":
     report_url = GITHUB_RUN_URL
 
     status = "success"
-    test_results = []  # type: List[Tuple[str, str]]
+    test_results = []  # type: TestResults
     # Try to get status message saved by the SQLancer
     try:
         # with open(
@@ -146,13 +140,13 @@ if __name__ == "__main__":
         # ) as status_f:
         #     status = status_f.readline().rstrip("\n")
         if os.path.exists(os.path.join(workspace_path, "server_crashed.log")):
-            test_results.append(("Server crashed", "FAIL"))
+            test_results.append(TestResult("Server crashed", "FAIL"))
         with open(
             os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8"
         ) as summary_f:
             for line in summary_f:
                 l = line.rstrip("\n").split("\t")
-                test_results.append((l[0], l[1]))
+                test_results.append(TestResult(l[0], l[1]))
 
         with open(
             os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8"
@@ -169,7 +163,6 @@ if __name__ == "__main__":
         test_results,
         paths,
         check_name,
-        False,
     )
 
     post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
@@ -192,3 +185,7 @@ if __name__ == "__main__":
 
     print(f"::notice Result: '{status}', '{description}', '{report_url}'")
     post_commit_status(gh, pr_info.sha, check_name, description, status, report_url)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py
index 37277538867..4116dbc52ce 100644
--- a/tests/ci/stress_check.py
+++ b/tests/ci/stress_check.py
@@ -5,26 +5,28 @@ import logging
 import subprocess
 import os
 import sys
+from pathlib import Path
 from typing import List, Tuple
 
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
-from s3_helper import S3Helper
-from get_robot_token import get_best_robot_token
-from pr_info import PRInfo
 from build_download_helper import download_all_deb_packages
-from upload_result_helper import upload_results
-from docker_pull_helper import get_image_with_version
-from commit_status_helper import post_commit_status
 from clickhouse_helper import (
     ClickHouseHelper,
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from stopwatch import Stopwatch
+from commit_status_helper import post_commit_status
+from docker_pull_helper import get_image_with_version
+from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, read_test_results
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 from tee_popen import TeePopen
+from upload_result_helper import upload_results
 
 
 def get_run_command(
@@ -48,8 +50,8 @@ def get_run_command(
 
 def process_results(
     result_folder: str, server_log_path: str, run_log_path: str
-) -> Tuple[str, str, List[Tuple[str, str]], List[str]]:
-    test_results = []  # type: List[Tuple[str, str]]
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_folder.
     # If task provides processed results, then it's responsible for content
@@ -91,16 +93,15 @@ def process_results(
         return "error", "Invalid check_status.tsv", test_results, additional_files
     state, description = status[0][0], status[0][1]
 
-    results_path = os.path.join(result_folder, "test_results.tsv")
-    with open(results_path, "r", encoding="utf-8") as results_file:
-        test_results = list(csv.reader(results_file, delimiter="\t"))  # type: ignore
+    results_path = Path(result_folder) / "test_results.tsv"
+    test_results = read_test_results(results_path, False)
     if len(test_results) == 0:
         raise Exception("Empty results")
 
     return state, description, test_results, additional_files
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
@@ -185,5 +186,9 @@ if __name__ == "__main__":
     )
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if state == "error":
+    if state == "failure":
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py
index 78c98813a72..9350785b33b 100644
--- a/tests/ci/style_check.py
+++ b/tests/ci/style_check.py
@@ -6,7 +6,7 @@ import logging
 import os
 import subprocess
 import sys
-
+from pathlib import Path
 from typing import List, Tuple
 
 
@@ -22,6 +22,7 @@ from get_robot_token import get_best_robot_token
 from github_helper import GitHub
 from git_helper import git_runner
 from pr_info import PRInfo
+from report import TestResults, read_test_results
 from rerun_helper import RerunHelper
 from s3_helper import S3Helper
 from ssh import SSHKey
@@ -40,8 +41,8 @@ GIT_PREFIX = (  # All commits to remote are done as robot-clickhouse
 
 def process_result(
     result_folder: str,
-) -> Tuple[str, str, List[Tuple[str, str]], List[str]]:
-    test_results = []  # type: List[Tuple[str, str]]
+) -> Tuple[str, str, TestResults, List[str]]:
+    test_results = []  # type: TestResults
     additional_files = []
     # Just upload all files from result_folder.
     # If task provides processed results, then it's responsible
@@ -57,7 +58,7 @@ def process_result(
     status = []
     status_path = os.path.join(result_folder, "check_status.tsv")
     if os.path.exists(status_path):
-        logging.info("Found test_results.tsv")
+        logging.info("Found check_status.tsv")
         with open(status_path, "r", encoding="utf-8") as status_file:
             status = list(csv.reader(status_file, delimiter="\t"))
     if len(status) != 1 or len(status[0]) != 2:
@@ -66,9 +67,8 @@ def process_result(
     state, description = status[0][0], status[0][1]
 
     try:
-        results_path = os.path.join(result_folder, "test_results.tsv")
-        with open(results_path, "r", encoding="utf-8") as fd:
-            test_results = list(csv.reader(fd, delimiter="\t"))  # type: ignore
+        results_path = Path(result_folder) / "test_results.tsv"
+        test_results = read_test_results(results_path)
         if len(test_results) == 0:
             raise Exception("Empty results")
 
@@ -134,7 +134,7 @@ def commit_push_staged(pr_info: PRInfo) -> None:
         git_runner(push_cmd)
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
     logging.getLogger("git_helper").setLevel(logging.DEBUG)
     args = parse_args()
@@ -205,3 +205,7 @@ if __name__ == "__main__":
 
     if state in ["error", "failure"]:
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py
index 7c4fa0e9fe4..915a77f3d48 100644
--- a/tests/ci/unit_tests_check.py
+++ b/tests/ci/unit_tests_check.py
@@ -9,22 +9,23 @@ from typing import List, Tuple
 
 from github import Github
 
-from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH
-from s3_helper import S3Helper
-from get_robot_token import get_best_robot_token
-from pr_info import PRInfo
 from build_download_helper import download_unit_tests
-from upload_result_helper import upload_results
-from docker_pull_helper import get_image_with_version
-from commit_status_helper import post_commit_status, update_mergeable_check
 from clickhouse_helper import (
     ClickHouseHelper,
     mark_flaky_tests,
     prepare_tests_results_for_clickhouse,
 )
-from stopwatch import Stopwatch
+from commit_status_helper import post_commit_status, update_mergeable_check
+from docker_pull_helper import get_image_with_version
+from env_helper import TEMP_PATH, REPORTS_PATH
+from get_robot_token import get_best_robot_token
+from pr_info import PRInfo
+from report import TestResults, TestResult
 from rerun_helper import RerunHelper
+from s3_helper import S3Helper
+from stopwatch import Stopwatch
 from tee_popen import TeePopen
+from upload_result_helper import upload_results
 
 
 IMAGE_NAME = "clickhouse/unit-test"
@@ -40,20 +41,20 @@ def get_test_name(line):
 
 def process_results(
     result_folder: str,
-) -> Tuple[str, str, List[Tuple[str, str]], List[str]]:
+) -> Tuple[str, str, TestResults, List[str]]:
     OK_SIGN = "OK ]"
     FAILED_SIGN = "FAILED  ]"
     SEGFAULT = "Segmentation fault"
     SIGNAL = "received signal SIG"
     PASSED = "PASSED"
 
-    summary = []  # type: List[Tuple[str, str]]
+    test_results = []  # type: TestResults
     total_counter = 0
     failed_counter = 0
     result_log_path = f"{result_folder}/test_result.txt"
     if not os.path.exists(result_log_path):
         logging.info("No output log on path %s", result_log_path)
-        return "error", "No output log", summary, []
+        return "error", "No output log", test_results, []
 
     status = "success"
     description = ""
@@ -64,13 +65,13 @@ def process_results(
                 logging.info("Found ok line: '%s'", line)
                 test_name = get_test_name(line.strip())
                 logging.info("Test name: '%s'", test_name)
-                summary.append((test_name, "OK"))
+                test_results.append(TestResult(test_name, "OK"))
                 total_counter += 1
             elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line:
                 logging.info("Found fail line: '%s'", line)
                 test_name = get_test_name(line.strip())
                 logging.info("Test name: '%s'", test_name)
-                summary.append((test_name, "FAIL"))
+                test_results.append(TestResult(test_name, "FAIL"))
                 total_counter += 1
                 failed_counter += 1
             elif SEGFAULT in line:
@@ -99,16 +100,15 @@ def process_results(
             f"fail: {failed_counter}, passed: {total_counter - failed_counter}"
         )
 
-    return status, description, summary, [result_log_path]
+    return status, description, test_results, [result_log_path]
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
 
     stopwatch = Stopwatch()
 
     temp_path = TEMP_PATH
-    repo_path = REPO_COPY
     reports_path = REPORTS_PATH
 
     check_name = sys.argv[1]
@@ -182,5 +182,9 @@ if __name__ == "__main__":
 
     ch_helper.insert_events_into(db="default", table="checks", events=prepared_events)
 
-    if state == "error":
+    if state == "failure":
         sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py
index 9fcd3733acb..d6476865bba 100644
--- a/tests/ci/upload_result_helper.py
+++ b/tests/ci/upload_result_helper.py
@@ -1,6 +1,6 @@
+from typing import List
 import os
 import logging
-import ast
 
 from env_helper import (
     GITHUB_JOB_URL,
@@ -8,34 +8,35 @@ from env_helper import (
     GITHUB_RUN_URL,
     GITHUB_SERVER_URL,
 )
-from report import ReportColorTheme, create_test_html_report
+from report import ReportColorTheme, TestResults, create_test_html_report
+from s3_helper import S3Helper
 
 
 def process_logs(
-    s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs
-):
+    s3_client: S3Helper,
+    additional_logs: List[str],
+    s3_path_prefix: str,
+    test_results: TestResults,
+) -> List[str]:
     logging.info("Upload files to s3 %s", additional_logs)
 
     processed_logs = {}  # type: ignore
     # Firstly convert paths of logs from test_results to urls to s3.
     for test_result in test_results:
-        if len(test_result) <= 3 or with_raw_logs:
+        if test_result.log_files is None:
             continue
 
         # Convert from string repr of list to list.
-        test_log_paths = ast.literal_eval(test_result[3])
-        test_log_urls = []
-        for log_path in test_log_paths:
-            if log_path in processed_logs:
-                test_log_urls.append(processed_logs[log_path])
-            elif log_path:
+        test_result.log_urls = []
+        for path in test_result.log_files:
+            if path.as_posix() in processed_logs:
+                test_result.log_urls.append(processed_logs[path])
+            elif path:
                 url = s3_client.upload_test_report_to_s3(
-                    log_path, s3_path_prefix + "/" + os.path.basename(log_path)
+                    path.as_posix(), s3_path_prefix + "/" + path.name
                 )
-                test_log_urls.append(url)
-                processed_logs[log_path] = url
-
-        test_result[3] = test_log_urls
+                test_result.log_urls.append(url)
+                processed_logs[path] = url
 
     additional_urls = []
     for log_path in additional_logs:
@@ -50,20 +51,18 @@ def process_logs(
 
 
 def upload_results(
-    s3_client,
-    pr_number,
-    commit_sha,
-    test_results,
-    additional_files,
-    check_name,
-    with_raw_logs=True,
-    statuscolors=None,
-):
+    s3_client: S3Helper,
+    pr_number: int,
+    commit_sha: str,
+    test_results: TestResults,
+    additional_files: List[str],
+    check_name: str,
+) -> str:
     s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace(
         " ", "_"
     ).replace("(", "_").replace(")", "_").replace(",", "_")
     additional_urls = process_logs(
-        s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs
+        s3_client, additional_files, s3_path_prefix, test_results
     )
 
     branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master"
@@ -74,8 +73,7 @@ def upload_results(
     commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}"
 
     if additional_urls:
-        raw_log_url = additional_urls[0]
-        additional_urls.pop(0)
+        raw_log_url = additional_urls.pop(0)
     else:
         raw_log_url = GITHUB_JOB_URL()
 
@@ -93,7 +91,6 @@ def upload_results(
         branch_name,
         commit_url,
         additional_urls,
-        with_raw_logs,
         statuscolors=statuscolors,
     )
     with open("report.html", "w", encoding="utf-8") as f:

From 84861c2b7c590b6689972e403e92162cced9c91a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Tue, 3 Jan 2023 23:47:46 +0100
Subject: [PATCH 116/262] Add TODO to style-check image

---
 docker/test/style/process_style_check_result.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py
index 2edf6ba3591..bc06df1af31 100755
--- a/docker/test/style/process_style_check_result.py
+++ b/docker/test/style/process_style_check_result.py
@@ -6,6 +6,8 @@ import argparse
 import csv
 
 
+# TODO: add typing and log files to the fourth column, think about launching
+# everything from the python and not bash
 def process_result(result_folder):
     status = "success"
     description = ""

From 36e402b10db0be81ad064ddc2ea0fb62c3fa870f Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 4 Jan 2023 12:18:53 +0100
Subject: [PATCH 117/262] Add typing to create_build_html_report

---
 tests/ci/build_report_check.py | 35 ++++++++--------------------------
 tests/ci/report.py             | 30 ++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 36 deletions(-)

diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py
index 1de401cde9c..0bdfb7c9ac0 100644
--- a/tests/ci/build_report_check.py
+++ b/tests/ci/build_report_check.py
@@ -10,13 +10,14 @@ from typing import Dict, List, Tuple
 from github import Github
 
 from env_helper import (
+    GITHUB_JOB_URL,
     GITHUB_REPOSITORY,
     GITHUB_RUN_URL,
     GITHUB_SERVER_URL,
     REPORTS_PATH,
     TEMP_PATH,
 )
-from report import create_build_html_report
+from report import create_build_html_report, BuildResult, BuildResults
 from s3_helper import S3Helper
 from get_robot_token import get_best_robot_token
 from pr_info import NeedsDataType, PRInfo
@@ -31,24 +32,6 @@ from rerun_helper import RerunHelper
 NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "")
 
 
-class BuildResult:
-    def __init__(
-        self,
-        compiler,
-        build_type,
-        sanitizer,
-        status,
-        elapsed_seconds,
-        with_coverage,
-    ):
-        self.compiler = compiler
-        self.build_type = build_type
-        self.sanitizer = sanitizer
-        self.status = status
-        self.elapsed_seconds = elapsed_seconds
-        self.with_coverage = with_coverage
-
-
 def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]:
     groups = {
         "apk": [],
@@ -81,7 +64,7 @@ def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]:
 
 def get_failed_report(
     job_name: str,
-) -> Tuple[List[BuildResult], List[List[str]], List[str]]:
+) -> Tuple[BuildResults, List[List[str]], List[str]]:
     message = f"{job_name} failed"
     build_result = BuildResult(
         compiler="unknown",
@@ -89,14 +72,13 @@ def get_failed_report(
         sanitizer="unknown",
         status=message,
         elapsed_seconds=0,
-        with_coverage=False,
     )
     return [build_result], [[""]], [GITHUB_RUN_URL]
 
 
 def process_report(
     build_report: dict,
-) -> Tuple[List[BuildResult], List[List[str]], List[str]]:
+) -> Tuple[BuildResults, List[List[str]], List[str]]:
     build_config = build_report["build_config"]
     build_result = BuildResult(
         compiler=build_config["compiler"],
@@ -104,7 +86,6 @@ def process_report(
         sanitizer=build_config["sanitizer"],
         status="success" if build_report["status"] else "failure",
         elapsed_seconds=build_report["elapsed_seconds"],
-        with_coverage=False,
     )
     build_results = []
     build_urls = []
@@ -207,9 +188,9 @@ def main():
         logging.info("Got exactly %s builds", len(builds_report_map))
 
     # Group build artifacts by groups
-    build_results = []  # type: List[BuildResult]
-    build_artifacts = []  #
-    build_logs = []
+    build_results = []  # type: BuildResults
+    build_artifacts = []  # type: List[List[str]]
+    build_logs = []  # type: List[str]
 
     for build_report in build_reports:
         _build_results, build_artifacts_url, build_logs_url = process_report(
@@ -244,7 +225,7 @@ def main():
         branch_name = f"PR #{pr_info.number}"
         branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}"
     commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}"
-    task_url = GITHUB_RUN_URL
+    task_url = GITHUB_JOB_URL()
     report = create_build_html_report(
         build_check_name,
         build_results,
diff --git a/tests/ci/report.py b/tests/ci/report.py
index 2409d1ba6d8..d7f6e1e71d0 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -232,6 +232,18 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes
     return results
 
 
+@dataclass
+class BuildResult:
+    compiler: str
+    build_type: str
+    sanitizer: str
+    status: str
+    elapsed_seconds: int
+
+
+BuildResults = List[BuildResult]
+
+
 class ReportColorTheme:
     class ReportColor:
         yellow = "#FFB400"
@@ -447,15 +459,15 @@ LINK_TEMPLATE = '<a href="{url}">{text}</a>'
 
 
 def create_build_html_report(
-    header,
-    build_results,
-    build_logs_urls,
-    artifact_urls_list,
-    task_url,
-    branch_url,
-    branch_name,
-    commit_url,
-):
+    header: str,
+    build_results: BuildResults,
+    build_logs_urls: List[str],
+    artifact_urls_list: List[List[str]],
+    task_url: str,
+    branch_url: str,
+    branch_name: str,
+    commit_url: str,
+) -> str:
     rows = ""
     for (build_result, build_log_url, artifact_urls) in zip(
         build_results, build_logs_urls, artifact_urls_list

From fc4d6e41cff684e1960e5a24082575be326aed51 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 4 Jan 2023 16:52:32 +0100
Subject: [PATCH 118/262] Fix possible issue in process_logs by strict typing

---
 tests/ci/report.py               | 2 +-
 tests/ci/upload_result_helper.py | 7 ++++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/ci/report.py b/tests/ci/report.py
index d7f6e1e71d0..95f60794448 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -187,7 +187,7 @@ class TestResult:
         self.raw_logs = raw_logs
 
     def set_log_files(self, log_files_literal: str) -> None:
-        self.log_files = []
+        self.log_files = []  # type: Optional[List[Path]]
         log_paths = literal_eval(log_files_literal)
         if not isinstance(log_paths, list):
             raise ValueError(
diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py
index d6476865bba..b988e240b0e 100644
--- a/tests/ci/upload_result_helper.py
+++ b/tests/ci/upload_result_helper.py
@@ -1,4 +1,5 @@
-from typing import List
+from pathlib import Path
+from typing import Dict, List
 import os
 import logging
 
@@ -20,7 +21,7 @@ def process_logs(
 ) -> List[str]:
     logging.info("Upload files to s3 %s", additional_logs)
 
-    processed_logs = {}  # type: ignore
+    processed_logs = {}  # type: Dict[Path, str]
     # Firstly convert paths of logs from test_results to urls to s3.
     for test_result in test_results:
         if test_result.log_files is None:
@@ -29,7 +30,7 @@ def process_logs(
         # Convert from string repr of list to list.
         test_result.log_urls = []
         for path in test_result.log_files:
-            if path.as_posix() in processed_logs:
+            if path in processed_logs:
                 test_result.log_urls.append(processed_logs[path])
             elif path:
                 url = s3_client.upload_test_report_to_s3(

From 425f7459d3f740cbb22d04cf6769451e33e7d420 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 5 Jan 2023 13:54:31 +0100
Subject: [PATCH 119/262] Improve report for docker_server.py

---
 tests/ci/docker_server.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py
index 544ab4e0a90..031b7bb61ab 100644
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@@ -251,7 +251,9 @@ def build_and_push_image(
     # `docker buildx build --load` does not support multiple images currently
     # images must be built separately and merged together with `docker manifest`
     digests = []
+    multiplatform_sw = Stopwatch()
     for arch in BUCKETS:
+        single_sw = Stopwatch()
         arch_tag = f"{tag}-{arch}"
         metadata_path = p.join(TEMP_PATH, arch_tag)
         dockerfile = p.join(image.full_path, f"Dockerfile.{os}")
@@ -271,9 +273,15 @@ def build_and_push_image(
         cmd = " ".join(cmd_args)
         logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd)
         if retry_popen(cmd) != 0:
-            result.append(TestResult(f"{image.repo}:{tag}-{arch}", "FAIL"))
+            result.append(
+                TestResult(
+                    f"{image.repo}:{tag}-{arch}", "FAIL", single_sw.duration_seconds
+                )
+            )
             return result
-        result.append(TestResult(f"{image.repo}:{tag}-{arch}", "OK"))
+        result.append(
+            TestResult(f"{image.repo}:{tag}-{arch}", "OK", single_sw.duration_seconds)
+        )
         with open(metadata_path, "rb") as m:
             metadata = json.load(m)
             digests.append(metadata["containerimage.digest"])
@@ -284,8 +292,15 @@ def build_and_push_image(
         )
         logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd)
         if retry_popen(cmd) != 0:
-            result.append(TestResult(f"{image.repo}:{tag}", "FAIL"))
+            result.append(
+                TestResult(
+                    f"{image.repo}:{tag}", "FAIL", multiplatform_sw.duration_seconds
+                )
+            )
             return result
+        result.append(
+            TestResult(f"{image.repo}:{tag}", "OK", multiplatform_sw.duration_seconds)
+        )
     else:
         logging.info(
             "Merging is available only on push, separate %s images are created",

From c38bb5ec00b8d3d39209eb921407235a96b080db Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 5 Jan 2023 14:16:07 +0100
Subject: [PATCH 120/262] Add Path as an option for TeePopen log_file

---
 tests/ci/tee_popen.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py
index b74069c16ab..f80678fe8ba 100644
--- a/tests/ci/tee_popen.py
+++ b/tests/ci/tee_popen.py
@@ -1,10 +1,11 @@
 #!/usr/bin/env python3
 
 from io import TextIOWrapper
+from pathlib import Path
 from subprocess import Popen, PIPE, STDOUT
 from threading import Thread
 from time import sleep
-from typing import Optional
+from typing import Optional, Union
 import logging
 import os
 import sys
@@ -18,7 +19,7 @@ class TeePopen:
     def __init__(
         self,
         command: str,
-        log_file: str,
+        log_file: Union[str, Path],
         env: Optional[dict] = None,
         timeout: Optional[int] = None,
     ):
@@ -63,7 +64,7 @@ class TeePopen:
         self.wait()
         self.log_file.close()
 
-    def wait(self):
+    def wait(self) -> int:
         if self.process.stdout is not None:
             for line in self.process.stdout:
                 sys.stdout.write(line)

From 64bbdee8248e87c436f5af0447e608f9fb68e48c Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 9 Jan 2023 19:36:51 +0800
Subject: [PATCH 121/262] fix stule

---
 src/Functions/toDayOfWeek.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp
index 09271cbe55d..06343714b9d 100644
--- a/src/Functions/toDayOfWeek.cpp
+++ b/src/Functions/toDayOfWeek.cpp
@@ -3,8 +3,6 @@
 #include <DataTypes/DataTypesNumber.h>
 #include <Functions/FunctionCustomWeekToSomething.h>
 
-
-
 namespace DB
 {
 

From 7764fd9ac9f2388cc94d382909a49f76e98fbf83 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 9 Jan 2023 12:47:55 +0100
Subject: [PATCH 122/262] Fix possible cannot-read-all-data

---
 src/Storages/FileLog/StorageFileLog.cpp | 130 ++++++++++++------------
 src/Storages/FileLog/StorageFileLog.h   |  10 +-
 2 files changed, 74 insertions(+), 66 deletions(-)

diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp
index 94d5f7441ec..b1b54a1700a 100644
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@@ -224,76 +224,48 @@ void StorageFileLog::loadFiles()
 void StorageFileLog::serialize() const
 {
     for (const auto & [inode, meta] : file_infos.meta_by_inode)
-    {
-        auto full_name = getFullMetaPath(meta.file_name);
-        if (!disk->exists(full_name))
-        {
-            disk->createFile(full_name);
-        }
-        else
-        {
-            checkOffsetIsValid(full_name, meta.last_writen_position);
-        }
-        auto out = disk->writeFile(full_name);
-        writeIntText(inode, *out);
-        writeChar('\n', *out);
-        writeIntText(meta.last_writen_position, *out);
-    }
+        serialize(inode, meta);
 }
 
 void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const
 {
-    auto full_name = getFullMetaPath(file_meta.file_name);
-    if (!disk->exists(full_name))
+    auto full_path = getFullMetaPath(file_meta.file_name);
+    if (disk->exists(full_path))
     {
-        disk->createFile(full_name);
+        checkOffsetIsValid(file_meta.file_name, file_meta.last_writen_position);
     }
     else
     {
-        checkOffsetIsValid(full_name, file_meta.last_writen_position);
+        disk->createFile(full_path);
+    }
+
+    try
+    {
+        auto out = disk->writeFile(full_path);
+        writeIntText(inode, *out);
+        writeChar('\n', *out);
+        writeIntText(file_meta.last_writen_position, *out);
+    }
+    catch (...)
+    {
+        disk->removeFile(full_path);
+        throw;
     }
-    auto out = disk->writeFile(full_name);
-    writeIntText(inode, *out);
-    writeChar('\n', *out);
-    writeIntText(file_meta.last_writen_position, *out);
 }
 
 void StorageFileLog::deserialize()
 {
     if (!disk->exists(metadata_base_path))
         return;
+
     /// In case of single file (not a watched directory),
     /// iterated directory always has one file inside.
     for (const auto dir_iter = disk->iterateDirectory(metadata_base_path); dir_iter->isValid(); dir_iter->next())
     {
-        auto full_name = getFullMetaPath(dir_iter->name());
-        if (!disk->isFile(full_name))
-        {
-            throw Exception(
-                ErrorCodes::BAD_FILE_TYPE,
-                "The file {} under {} is not a regular file when deserializing meta files",
-                dir_iter->name(),
-                metadata_base_path);
-        }
-
-        auto in = disk->readFile(full_name);
-        FileMeta meta;
-        UInt64 inode, last_written_pos;
-
-        if (!tryReadIntText(inode, *in))
-        {
-            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path());
-        }
-        assertChar('\n', *in);
-        if (!tryReadIntText(last_written_pos, *in))
-        {
-            throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path());
-        }
-
-        meta.file_name = dir_iter->name();
-        meta.last_writen_position = last_written_pos;
-
-        file_infos.meta_by_inode.emplace(inode, meta);
+        auto [metadata, inode] = readMetadata(dir_iter->name());
+        if (!metadata)
+            continue;
+        file_infos.meta_by_inode.emplace(inode, metadata);
     }
 }
 
@@ -488,23 +460,51 @@ void StorageFileLog::storeMetas(size_t start, size_t end)
     }
 }
 
-void StorageFileLog::checkOffsetIsValid(const String & full_name, UInt64 offset) const
+void StorageFileLog::checkOffsetIsValid(const String & filename, UInt64 offset) const
 {
-    auto in = disk->readFile(full_name);
-    UInt64 _, last_written_pos;
-
-    if (!tryReadIntText(_, *in))
+    auto [metadata, _] = readMetadata(filename);
+    if (metadata.last_writen_position > offset)
     {
-        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name);
-    }
-    assertChar('\n', *in);
-    if (!tryReadIntText(last_written_pos, *in))
-    {
-        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name);
-    }
-    if (last_written_pos > offset)
         throw Exception(
-            ErrorCodes::LOGICAL_ERROR, "Last stored last_written_pos in meta file {} is bigger than current last_written_pos", full_name);
+            ErrorCodes::LOGICAL_ERROR,
+            "Last stored last_written_position in meta file {} is bigger than current last_written_pos ({} > {})",
+            filename, metadata.last_writen_position, offset);
+    }
+}
+
+StorageFileLog::ReadMetadataResult StorageFileLog::readMetadata(const String & filename) const
+{
+    auto full_path = getFullMetaPath(filename);
+    if (!disk->isFile(full_path))
+    {
+        throw Exception(
+            ErrorCodes::BAD_FILE_TYPE,
+            "The file {} under {} is not a regular file",
+            filename, metadata_base_path);
+    }
+
+    auto in = disk->readFile(full_path);
+    FileMeta metadata;
+    UInt64 inode, last_written_pos;
+
+    if (in->eof()) /// File is empty.
+    {
+        disk->removeFile(full_path);
+        return {};
+    }
+
+    if (!tryReadIntText(inode, *in))
+        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (1)", full_path);
+
+    if (!checkChar('\n', *in))
+        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (2)", full_path);
+
+    if (!tryReadIntText(last_written_pos, *in))
+        throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (3)", full_path);
+
+    metadata.file_name = filename;
+    metadata.last_writen_position = last_written_pos;
+    return { metadata, inode };
 }
 
 size_t StorageFileLog::getMaxBlockSize() const
diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h
index 9737c31acb6..c0c5ac904b5 100644
--- a/src/Storages/FileLog/StorageFileLog.h
+++ b/src/Storages/FileLog/StorageFileLog.h
@@ -82,6 +82,7 @@ public:
         String file_name;
         UInt64 last_writen_position = 0;
         UInt64 last_open_end = 0;
+        bool operator!() const { return file_name.empty(); }
     };
 
     using InodeToFileMeta = std::unordered_map<UInt64, FileMeta>;
@@ -202,7 +203,14 @@ private:
     void serialize(UInt64 inode, const FileMeta & file_meta) const;
 
     void deserialize();
-    void checkOffsetIsValid(const String & full_name, UInt64 offset) const;
+    void checkOffsetIsValid(const String & filename, UInt64 offset) const;
+
+    struct ReadMetadataResult
+    {
+        FileMeta metadata;
+        UInt64 inode = 0;
+    };
+    ReadMetadataResult readMetadata(const String & filename) const;
 };
 
 }

From 1e4fe038f562029fc24a0e7a33e5d428ea0474f9 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Thu, 5 Jan 2023 14:16:31 +0100
Subject: [PATCH 123/262] Add logs to docker_server reports

---
 tests/ci/docker_server.py | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py
index 031b7bb61ab..fbe934367b4 100644
--- a/tests/ci/docker_server.py
+++ b/tests/ci/docker_server.py
@@ -7,6 +7,7 @@ import logging
 import subprocess
 import sys
 import time
+from pathlib import Path
 from os import path as p, makedirs
 from typing import List
 
@@ -23,6 +24,7 @@ from pr_info import PRInfo
 from report import TestResults, TestResult
 from s3_helper import S3Helper
 from stopwatch import Stopwatch
+from tee_popen import TeePopen
 from upload_result_helper import upload_results
 from version_helper import (
     ClickHouseVersion,
@@ -117,7 +119,7 @@ def parse_args() -> argparse.Namespace:
     return parser.parse_args()
 
 
-def retry_popen(cmd: str) -> int:
+def retry_popen(cmd: str, log_file: Path) -> int:
     max_retries = 5
     for retry in range(max_retries):
         # From time to time docker build may failed. Curl issues, or even push
@@ -130,18 +132,14 @@ def retry_popen(cmd: str) -> int:
                 cmd,
             )
             time.sleep(progressive_sleep)
-        with subprocess.Popen(
+        with TeePopen(
             cmd,
-            shell=True,
-            stderr=subprocess.STDOUT,
-            stdout=subprocess.PIPE,
-            universal_newlines=True,
+            log_file=log_file,
         ) as process:
-            for line in process.stdout:  # type: ignore
-                print(line, end="")
             retcode = process.wait()
             if retcode == 0:
                 return 0
+
     return retcode
 
 
@@ -272,15 +270,24 @@ def build_and_push_image(
         )
         cmd = " ".join(cmd_args)
         logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd)
-        if retry_popen(cmd) != 0:
+        log_file = Path(TEMP_PATH) / f"{image.repo.replace('/', '__')}:{tag}-{arch}.log"
+        if retry_popen(cmd, log_file) != 0:
             result.append(
                 TestResult(
-                    f"{image.repo}:{tag}-{arch}", "FAIL", single_sw.duration_seconds
+                    f"{image.repo}:{tag}-{arch}",
+                    "FAIL",
+                    single_sw.duration_seconds,
+                    [log_file],
                 )
             )
             return result
         result.append(
-            TestResult(f"{image.repo}:{tag}-{arch}", "OK", single_sw.duration_seconds)
+            TestResult(
+                f"{image.repo}:{tag}-{arch}",
+                "OK",
+                single_sw.duration_seconds,
+                [log_file],
+            )
         )
         with open(metadata_path, "rb") as m:
             metadata = json.load(m)
@@ -291,7 +298,7 @@ def build_and_push_image(
             f"--tag {image.repo}:{tag} {' '.join(digests)}"
         )
         logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd)
-        if retry_popen(cmd) != 0:
+        if retry_popen(cmd, Path("/dev/null")) != 0:
             result.append(
                 TestResult(
                     f"{image.repo}:{tag}", "FAIL", multiplatform_sw.duration_seconds

From 4bc2c614068055df675262eeb1fc9f2f56b8c635 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 9 Jan 2023 20:37:16 +0800
Subject: [PATCH 124/262] change as request

---
 src/Common/DateLUTImpl.h           | 23 ++++++++++++++---------
 src/Functions/DateTimeTransforms.h |  4 ----
 src/Functions/dateName.cpp         |  2 +-
 src/Functions/formatDateTime.cpp   |  8 ++++----
 4 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h
index 6bf530008dc..1e7f11d3c9e 100644
--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@@ -39,6 +39,15 @@ enum class WeekModeFlag : UInt8
 };
 using YearWeek = std::pair<UInt16, UInt8>;
 
+/// Modes for toDayOfWeek() function.
+enum class WeekDayMode
+{
+    WeekStartsMonday1 = 0,
+    WeekStartsMonday0 = 1,
+    WeekStartsSunday0 = 2,
+    WeekStartsSunday1 = 3
+};
+
 /** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
   * First time was implemented for OLAPServer, that needed to do billions of such transformations.
   */
@@ -625,15 +634,11 @@ public:
     template <typename DateOrTime>
     inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
     {
-        /// 0: Sun = 7, Mon = 1
-        /// 1: Sun = 6, Mon = 0
-        /// 2: Sun = 0, Mon = 1
-        /// 3: Sun = 1, Mon = 2
-        week_day_mode = check_week_day_mode(week_day_mode);
+        WeekDayMode mode = check_week_day_mode(week_day_mode);
         auto res = toDayOfWeek(v);
 
-        bool start_from_sunday = week_day_mode & (1 << 1);
-        bool zero_based = (week_day_mode == 1 || week_day_mode == 2);
+        bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1);
+        bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0);
         if (start_from_sunday)
             res = res % 7 + 1;
         if (zero_based)
@@ -864,9 +869,9 @@ public:
     }
 
     /// Check and change mode to effective.
-    inline UInt8 check_week_day_mode(UInt8 mode) const /// NOLINT
+    inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
     {
-        return mode & 3;
+        return static_cast<WeekDayMode>(mode & 3);
     }
 
 
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 56e4a0e2668..56a7a960ac9 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -786,10 +786,6 @@ struct ToDayOfWeekImpl
 {
     static constexpr auto name = "toDayOfWeek";
 
-    static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); }
-    static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); }
-    static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(ExtendedDayNum(d)); }
-    static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(DayNum(d)); }
     static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
     {
         return time_zone.toDayOfWeek(t, week_day_mode);
diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp
index 36c0be49190..bfb190b9a08 100644
--- a/src/Functions/dateName.cpp
+++ b/src/Functions/dateName.cpp
@@ -276,7 +276,7 @@ private:
     {
         static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
-            const auto day = ToDayOfWeekImpl::execute(source, timezone);
+            const auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
             static constexpr std::string_view day_names[] =
             {
                 "Monday",
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index e7c9a1b5103..c01f32f68ae 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -344,13 +344,13 @@ private:
 
         static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            *dest = '0' + ToDayOfWeekImpl::execute(source, timezone);
+            *dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone);
             return 1;
         }
 
         static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto day = ToDayOfWeekImpl::execute(source, timezone);
+            auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
             *dest = '0' + (day == 7 ? 0 : day);
             return 1;
         }
@@ -499,13 +499,13 @@ private:
 
         static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto week_day = ToDayOfWeekImpl::execute(source, timezone);
+            auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
             return writeNumberWithPadding(dest, week_day, min_represent_digits);
         }
 
         static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto week_day = ToDayOfWeekImpl::execute(source, timezone);
+            auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
             if (week_day == 7)
                 week_day = 0;
 

From a8da7b4c20964a08eccb83b56afaa56df41146f2 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Mon, 9 Jan 2023 20:39:46 +0800
Subject: [PATCH 125/262] change as request

---
 src/Common/DateLUTImpl.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h
index 1e7f11d3c9e..3d496e088bb 100644
--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@@ -628,6 +628,7 @@ public:
     template <typename DateOrTime>
     inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
 
+    /// 1-based, starts on Monday
     template <typename DateOrTime>
     inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
 
@@ -635,7 +636,7 @@ public:
     inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
     {
         WeekDayMode mode = check_week_day_mode(week_day_mode);
-        auto res = toDayOfWeek(v);
+        UInt8 res = toDayOfWeek(v);
 
         bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1);
         bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0);

From 88c3c2946b172de296824ac5dd6cbbfb3ed8b380 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 9 Jan 2023 14:58:44 +0000
Subject: [PATCH 126/262] review fixes + split into files

---
 src/Common/CancelToken.cpp                | 243 +++++++++++
 src/Common/{Threading.h => CancelToken.h} | 135 ++----
 src/Common/CancelableSharedMutex.cpp      | 115 +++++
 src/Common/CancelableSharedMutex.h        |  62 +++
 src/Common/ErrorCodes.cpp                 |   2 +-
 src/Common/SharedMutex.cpp                |  80 ++++
 src/Common/SharedMutex.h                  |  47 +++
 src/Common/Threading.cpp                  | 484 ----------------------
 src/Common/futex.h                        |  97 +++++
 src/Common/tests/gtest_threading.cpp      |  56 +--
 10 files changed, 710 insertions(+), 611 deletions(-)
 create mode 100644 src/Common/CancelToken.cpp
 rename src/Common/{Threading.h => CancelToken.h} (61%)
 create mode 100644 src/Common/CancelableSharedMutex.cpp
 create mode 100644 src/Common/CancelableSharedMutex.h
 create mode 100644 src/Common/SharedMutex.cpp
 create mode 100644 src/Common/SharedMutex.h
 delete mode 100644 src/Common/Threading.cpp
 create mode 100644 src/Common/futex.h

diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp
new file mode 100644
index 00000000000..87bcdc26bd4
--- /dev/null
+++ b/src/Common/CancelToken.cpp
@@ -0,0 +1,243 @@
+#include <Common/CancelToken.h>
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int THREAD_WAS_CANCELED;
+}
+}
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <base/getThreadId.h>
+
+#include <linux/futex.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+namespace
+{
+    inline Int64 futexWait(void * address, UInt32 value)
+    {
+        return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+    }
+
+    inline Int64 futexWake(void * address, int count)
+    {
+        return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+    }
+}
+
+void CancelToken::Registry::insert(CancelToken * token)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    threads[token->thread_id] = token;
+}
+
+void CancelToken::Registry::remove(CancelToken * token)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    threads.erase(token->thread_id);
+}
+
+void CancelToken::Registry::signal(UInt64 tid)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    if (auto it = threads.find(tid); it != threads.end())
+        it->second->signalImpl();
+}
+
+void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
+{
+    std::lock_guard<std::mutex> lock(mutex);
+    if (auto it = threads.find(tid); it != threads.end())
+        it->second->signalImpl(code, message);
+}
+
+const std::shared_ptr<CancelToken::Registry> & CancelToken::Registry::instance()
+{
+    static std::shared_ptr<Registry> registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry
+    return registry;
+}
+
+CancelToken::CancelToken()
+    : state(disabled)
+    , thread_id(getThreadId())
+    , registry(Registry::instance())
+{
+    registry->insert(this);
+}
+
+CancelToken::~CancelToken()
+{
+    registry->remove(this);
+}
+
+void CancelToken::signal(UInt64 tid)
+{
+    Registry::instance()->signal(tid);
+}
+
+void CancelToken::signal(UInt64 tid, int code, const String & message)
+{
+    Registry::instance()->signal(tid, code, message);
+}
+
+bool CancelToken::wait(UInt32 * address, UInt32 value)
+{
+    chassert((reinterpret_cast<UInt64>(address) & canceled) == 0); // An `address` must be 2-byte aligned
+    if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread
+        return true; // Spin-wait unless signal is handled
+
+    UInt64 s = state.load();
+    while (true)
+    {
+        if (s & disabled)
+        {
+            // Start non-cancelable wait on futex. Spurious wake-up is possible.
+            futexWait(address, value);
+            return true; // Disabled - true is forced
+        }
+        if (s & canceled)
+            return false; // Has already been canceled
+        if (state.compare_exchange_strong(s, reinterpret_cast<UInt64>(address)))
+            break; // This futex has been "acquired" by this token
+    }
+
+    // Start cancelable wait. Spurious wake-up is possible.
+    futexWait(address, value);
+
+    // "Release" futex and check for cancelation
+    s = state.load();
+    while (true)
+    {
+        chassert((s & disabled) != disabled); // `disable()` must not be called from another thread
+        if (s & canceled)
+        {
+            if (s == canceled)
+                break; // Signaled; futex "release" has been done by the signaling thread
+            else
+            {
+                s = state.load();
+                continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish
+            }
+        }
+        if (state.compare_exchange_strong(s, 0))
+            return true; // There was no cancelation; futex "released"
+    }
+
+    // Reset signaled bit
+    reinterpret_cast<std::atomic<UInt32> *>(address)->fetch_and(~signaled);
+    return false;
+}
+
+void CancelToken::raise()
+{
+    std::unique_lock<std::mutex> lock(signal_mutex);
+    if (exception_code != 0)
+        throw DB::Exception(
+            std::exchange(exception_code, 0),
+            std::exchange(exception_message, {}));
+    else
+        throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled");
+}
+
+void CancelToken::notifyOne(UInt32 * address)
+{
+    futexWake(address, 1);
+}
+
+void CancelToken::notifyAll(UInt32 * address)
+{
+    futexWake(address, INT_MAX);
+}
+
+void CancelToken::signalImpl()
+{
+    signalImpl(0, {});
+}
+
+std::mutex CancelToken::signal_mutex;
+
+void CancelToken::signalImpl(int code, const String & message)
+{
+    // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls
+    std::unique_lock<std::mutex> lock(signal_mutex);
+
+    UInt64 s = state.load();
+    while (true)
+    {
+        if (s & canceled)
+            return; // Already canceled - don't signal twice
+        if (state.compare_exchange_strong(s, s | canceled))
+            break; // It is the canceling thread - should deliver signal if necessary
+    }
+
+    exception_code = code;
+    exception_message = message;
+
+    if ((s & disabled) == disabled)
+        return; // Cancelation is disabled - just signal token for later, but don't wake
+    std::atomic<UInt32> * address = reinterpret_cast<std::atomic<UInt32> *>(s & disabled);
+    if (address == nullptr)
+        return; // Thread is currently not waiting on futex - wake-up not required
+
+    // Set signaled bit
+    UInt32 value = address->load();
+    while (true)
+    {
+        if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter
+            value = address->load();
+        else if (address->compare_exchange_strong(value, value | signaled))
+            break;
+    }
+
+    // Wake all threads waiting on `address`, one of them will be canceled and others will get spurious wake-ups
+    // Woken canceled thread will reset signaled bit
+    futexWake(address, INT_MAX);
+
+    // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return.
+    // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed.
+    state.store(canceled);
+}
+
+Cancelable::Cancelable()
+{
+    CancelToken::local().reset();
+}
+
+Cancelable::~Cancelable()
+{
+    CancelToken::local().disable();
+}
+
+NonCancelable::NonCancelable()
+{
+    CancelToken::local().disable();
+}
+
+NonCancelable::~NonCancelable()
+{
+    CancelToken::local().enable();
+}
+
+}
+
+#else
+
+namespace DB
+{
+
+void CancelToken::raise()
+{
+    throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled");
+}
+
+}
+
+#endif
diff --git a/src/Common/Threading.h b/src/Common/CancelToken.h
similarity index 61%
rename from src/Common/Threading.h
rename to src/Common/CancelToken.h
index d5d32e73b67..27b9d41f0f3 100644
--- a/src/Common/Threading.h
+++ b/src/Common/CancelToken.h
@@ -15,23 +15,25 @@
 namespace DB
 {
 
-// Scoped object, enabling thread cancellation (cannot be nested)
-struct Cancellable
+// Scoped object, enabling thread cancelation (cannot be nested).
+// Intended to be used once per cancelable task. It erases any previously held cancelation signal.
+// Note that by default thread is not cancelable.
+struct Cancelable
 {
-    Cancellable();
-    ~Cancellable();
+    Cancelable();
+    ~Cancelable();
 };
 
-// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancellable` region)
-struct NonCancellable
+// Scoped object, disabling thread cancelation (cannot be nested; must be inside `Cancelable` region)
+struct NonCancelable
 {
-    NonCancellable();
-    ~NonCancellable();
+    NonCancelable();
+    ~NonCancelable();
 };
 
-// Responsible for synchronization needed to deliver thread cancellation signal.
-// Basic building block for cancellable synchronization primitives.
-// Allows to perform cancellable wait on memory addresses (think futex)
+// Responsible for synchronization needed to deliver thread cancelation signal.
+// Basic building block for cancelable synchronization primitives.
+// Allows to perform cancelable wait on memory addresses (think futex)
 class CancelToken
 {
 public:
@@ -39,6 +41,7 @@ public:
     CancelToken(const CancelToken &) = delete;
     CancelToken(CancelToken &&) = delete;
     CancelToken & operator=(const CancelToken &) = delete;
+    CancelToken & operator=(CancelToken &&) = delete;
     ~CancelToken();
 
     // Returns token for the current thread
@@ -48,17 +51,17 @@ public:
         return token;
     }
 
-    // Cancellable wait on memory address (futex word).
+    // Cancelable wait on memory address (futex word).
     //   Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()`
     //   or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`.
-    //   Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`.
+    //   Note that spurious wake-ups are also possible due to cancelation of other waiters on the same `address`.
     //   WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero.
     // Return value:
     //   true - woken by either notify or spurious wakeup;
-    //   false - iff cancellation signal has been received.
+    //   false - iff cancelation signal has been received.
     // Implementation details:
-    //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal.
-    //   Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation.
+    //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancelation signal.
+    //   Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancelation.
     // Intended to be called only by thread associated with this token.
     bool wait(UInt32 * address, UInt32 value);
 
@@ -72,27 +75,27 @@ public:
     static void notifyAll(UInt32 * address);
 
     // Send cancel signal to thread with specified `tid`.
-    // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled).
+    // If thread was waiting using `wait()` it will be woken up (unless cancelation is disabled).
     // Can be called from any thread.
     static void signal(UInt64 tid);
     static void signal(UInt64 tid, int code, const String & message);
 
-    // Flag used to deliver cancellation into memory address to wake a thread.
+    // Flag used to deliver cancelation into memory address to wake a thread.
     // Note that most significant bit at `addresses` to be used with `wait()` is reserved.
     static constexpr UInt32 signaled = 1u << 31u;
 
 private:
-    friend struct Cancellable;
-    friend struct NonCancellable;
+    friend struct Cancelable;
+    friend struct NonCancelable;
 
-    // Restores initial state for token to be reused. See `Cancellable` struct.
+    // Restores initial state for token to be reused. See `Cancelable` struct.
     // Intended to be called only by thread associated with this token.
     void reset()
     {
         state.store(0);
     }
 
-    // Enable thread cancellation. See `NonCancellable` struct.
+    // Enable thread cancelation. See `NonCancelable` struct.
     // Intended to be called only by thread associated with this token.
     void enable()
     {
@@ -100,7 +103,7 @@ private:
         state.fetch_and(~disabled);
     }
 
-    // Disable thread cancellation. See `NonCancellable` struct.
+    // Disable thread cancelation. See `NonCancelable` struct.
     // Intended to be called only by thread associated with this token.
     void disable()
     {
@@ -109,8 +112,6 @@ private:
     }
 
     // Singleton. Maps thread IDs to tokens.
-    struct Registry;
-    friend struct Registry;
     struct Registry
     {
         std::mutex mutex;
@@ -134,7 +135,7 @@ private:
 
     // Upper bits - possible values:
     // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address;
-    // 2) all ones: token is disabled, i.e. wait() call cannot be cancelled;
+    // 2) all ones: token is disabled, i.e. wait() call cannot be canceled;
     // 3) specific `address`: token is enabled and thread is currently waiting on this `address`.
     static constexpr UInt64 disabled = ~canceled;
     static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address
@@ -142,11 +143,11 @@ private:
     // All signal handling logic should be globally serialized using this mutex
     static std::mutex signal_mutex;
 
-    // Cancellation state
+    // Cancelation state
     alignas(64) std::atomic<UInt64> state;
     [[maybe_unused]] char padding[64 - sizeof(state)];
 
-    // Cancellation exception
+    // Cancelation exception
     int exception_code;
     String exception_message;
 
@@ -157,86 +158,25 @@ private:
     const std::shared_ptr<Registry> registry;
 };
 
-class CancellableSharedMutex
-{
-public:
-    CancellableSharedMutex();
-    ~CancellableSharedMutex() = default;
-    CancellableSharedMutex(const CancellableSharedMutex &) = delete;
-    CancellableSharedMutex & operator=(const CancellableSharedMutex &) = delete;
-
-    // Exclusive ownership
-    void lock();
-    bool try_lock();
-    void unlock();
-
-    // Shared ownership
-    void lock_shared();
-    bool try_lock_shared();
-    void unlock_shared();
-
-private:
-    // State 64-bits layout:
-    //    1b    -   31b   -    1b    -   31b
-    // signaled - writers - signaled - readers
-    // 63------------------------------------0
-    // Two 32-bit words are used for cancellable waiting, so each has its own separate signaled bit
-    static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled;
-    static constexpr UInt64 readers_signaled = CancelToken::signaled;
-    static constexpr UInt64 writers = readers << 32ull;
-    static constexpr UInt64 writers_signaled = readers_signaled << 32ull;
-
-    alignas(64) std::atomic<UInt64> state;
-    std::atomic<UInt32> waiters;
-};
-
-class FastSharedMutex
-{
-public:
-    FastSharedMutex();
-    ~FastSharedMutex() = default;
-    FastSharedMutex(const FastSharedMutex &) = delete;
-    FastSharedMutex & operator=(const FastSharedMutex &) = delete;
-
-    // Exclusive ownership
-    void lock();
-    bool try_lock();
-    void unlock();
-
-    // Shared ownership
-    void lock_shared();
-    bool try_lock_shared();
-    void unlock_shared();
-
-private:
-    static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state
-    static constexpr UInt64 writers = ~readers; // Upper 32 bits of state
-
-    alignas(64) std::atomic<UInt64> state;
-    std::atomic<UInt32> waiters;
-};
-
 }
 
 #else
 
-#include <shared_mutex>
-
-// WARNING: We support cancellable synchronization primitives only on linux for now
+// WARNING: We support cancelable synchronization primitives only on linux for now
 
 namespace DB
 {
 
-struct Cancellable
+struct Cancelable
 {
-    Cancellable() = default;
-    ~Cancellable() = default;
+    Cancelable() = default;
+    ~Cancelable() = default;
 };
 
-struct NonCancellable
+struct NonCancelable
 {
-    NonCancellable() = default;
-    ~NonCancellable() = default;
+    NonCancelable() = default;
+    ~NonCancelable() = default;
 };
 
 class CancelToken
@@ -262,9 +202,6 @@ public:
     static void signal(UInt64, int, const String &) {}
 };
 
-using CancellableSharedMutex = std::shared_mutex;
-using FastSharedMutex = std::shared_mutex;
-
 }
 
 #endif
diff --git a/src/Common/CancelableSharedMutex.cpp b/src/Common/CancelableSharedMutex.cpp
new file mode 100644
index 00000000000..c8ca93309ee
--- /dev/null
+++ b/src/Common/CancelableSharedMutex.cpp
@@ -0,0 +1,115 @@
+#include <Common/CancelableSharedMutex.h>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <Common/futex.h>
+
+namespace DB
+{
+
+namespace
+{
+    inline bool cancelableWaitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        bool res = CancelToken::local().wait(upperHalfAddress(&address), upperHalf(value));
+        value = address.load();
+        return res;
+    }
+
+    inline bool cancelableWaitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
+    {
+        bool res = CancelToken::local().wait(lowerHalfAddress(&address), lowerHalf(value));
+        value = address.load();
+        return res;
+    }
+}
+
+CancelableSharedMutex::CancelableSharedMutex()
+    : state(0)
+    , waiters(0)
+{}
+
+void CancelableSharedMutex::lock()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            if (!cancelableWaitUpperFetch(state, value))
+            {
+                waiters--;
+                CancelToken::local().raise();
+            }
+            else
+                waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value | writers))
+            break;
+    }
+
+    value |= writers;
+    while (value & readers)
+    {
+        if (!cancelableWaitLowerFetch(state, value))
+        {
+            state.fetch_and(~writers);
+            futexWakeUpperAll(state);
+            CancelToken::local().raise();
+        }
+    }
+}
+
+bool CancelableSharedMutex::try_lock()
+{
+    UInt64 value = state.load();
+    return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers);
+}
+
+void CancelableSharedMutex::unlock()
+{
+    state.fetch_and(~writers);
+    if (waiters)
+        futexWakeUpperAll(state);
+}
+
+void CancelableSharedMutex::lock_shared()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            if (!cancelableWaitUpperFetch(state, value))
+            {
+                waiters--;
+                CancelToken::local().raise();
+            }
+            else
+                waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+            break;
+    }
+}
+
+bool CancelableSharedMutex::try_lock_shared()
+{
+    UInt64 value = state.load();
+    if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
+        return true;
+    return false;
+}
+
+void CancelableSharedMutex::unlock_shared()
+{
+    UInt64 value = state.fetch_sub(1) - 1;
+    if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers
+        futexWakeLowerOne(state); // Wake writer
+}
+
+}
+
+#endif
diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h
new file mode 100644
index 00000000000..f989e8d5beb
--- /dev/null
+++ b/src/Common/CancelableSharedMutex.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <Common/CancelToken.h>
+#include <base/types.h>
+#include <atomic>
+#include <shared_mutex> // for std::unique_lock and std::shared_lock
+
+namespace DB
+{
+
+// Reimplementation of `std::shared_mutex` that can interoperate with thread cancelation via `CancelToken::signal()`.
+// It has cancelation point on waiting during `lock()` and `shared_lock()`.
+// NOTE: It has NO cancelation points on fast code path, when locking does not require waiting.
+class CancelableSharedMutex
+{
+public:
+    CancelableSharedMutex();
+    ~CancelableSharedMutex() = default;
+    CancelableSharedMutex(const CancelableSharedMutex &) = delete;
+    CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete;
+
+    // Exclusive ownership
+    void lock();
+    bool try_lock();
+    void unlock();
+
+    // Shared ownership
+    void lock_shared();
+    bool try_lock_shared();
+    void unlock_shared();
+
+private:
+    // State 64-bits layout:
+    //    1b    -   31b   -    1b    -   31b
+    // signaled - writers - signaled - readers
+    // 63------------------------------------0
+    // Two 32-bit words are used for cancelable waiting, so each has its own separate signaled bit
+    static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled;
+    static constexpr UInt64 readers_signaled = CancelToken::signaled;
+    static constexpr UInt64 writers = readers << 32ull;
+    static constexpr UInt64 writers_signaled = readers_signaled << 32ull;
+
+    alignas(64) std::atomic<UInt64> state;
+    std::atomic<UInt32> waiters;
+};
+
+}
+
+#else
+
+// WARNING: We support cancelable synchronization primitives only on linux for now
+
+namespace DB
+{
+
+using CancelableSharedMutex = std::shared_mutex;
+
+}
+
+#endif
diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp
index 6dbeefe1823..0ad4cbb9e6f 100644
--- a/src/Common/ErrorCodes.cpp
+++ b/src/Common/ErrorCodes.cpp
@@ -645,7 +645,7 @@
     M(674, RESOURCE_NOT_FOUND) \
     M(675, CANNOT_PARSE_IPV4) \
     M(676, CANNOT_PARSE_IPV6) \
-    M(677, THREAD_WAS_CANCELLED) \
+    M(677, THREAD_WAS_CANCELED) \
     \
     M(999, KEEPER_EXCEPTION) \
     M(1000, POCO_EXCEPTION) \
diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp
new file mode 100644
index 00000000000..3a69c106800
--- /dev/null
+++ b/src/Common/SharedMutex.cpp
@@ -0,0 +1,80 @@
+#include <Common/SharedMutex.h>
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <bit>
+
+#include <Common/futex.h>
+
+namespace DB
+{
+
+void SharedMutex::lock()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            futexWaitUpperFetch(state, value);
+            waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value | writers))
+            break;
+    }
+
+    value |= writers;
+    while (value & readers)
+        futexWaitLowerFetch(state, value);
+}
+
+bool SharedMutex::try_lock()
+{
+    UInt64 value = 0;
+    if (state.compare_exchange_strong(value, writers))
+        return true;
+    return false;
+}
+
+void SharedMutex::unlock()
+{
+    state.store(0);
+    if (waiters)
+        futexWakeUpperAll(state);
+}
+
+void SharedMutex::lock_shared()
+{
+    UInt64 value = state.load();
+    while (true)
+    {
+        if (value & writers)
+        {
+            waiters++;
+            futexWaitUpperFetch(state, value);
+            waiters--;
+        }
+        else if (state.compare_exchange_strong(value, value + 1))
+            break;
+    }
+}
+
+bool SharedMutex::try_lock_shared()
+{
+    UInt64 value = state.load();
+    if (!(value & writers) && state.compare_exchange_strong(value, value + 1))
+        return true;
+    return false;
+}
+
+void SharedMutex::unlock_shared()
+{
+    UInt64 value = state.fetch_sub(1) - 1;
+    if (value == writers)
+        futexWakeLowerOne(state); // Wake writer
+}
+
+}
+
+#endif
diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h
new file mode 100644
index 00000000000..ebe730ca419
--- /dev/null
+++ b/src/Common/SharedMutex.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#ifdef OS_LINUX /// Because of futex
+
+#include <base/types.h>
+#include <atomic>
+#include <shared_mutex> // for std::unique_lock and std::shared_lock
+
+namespace DB
+{
+
+// Faster implementation of `std::shared_mutex` based on a pair of futexes
+class SharedMutex
+{
+public:
+    SharedMutex();
+    ~SharedMutex() = default;
+    SharedMutex(const SharedMutex &) = delete;
+    SharedMutex & operator=(const SharedMutex &) = delete;
+
+    // Exclusive ownership
+    void lock();
+    bool try_lock();
+    void unlock();
+
+    // Shared ownership
+    void lock_shared();
+    bool try_lock_shared();
+    void unlock_shared();
+
+private:
+    static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state
+    static constexpr UInt64 writers = ~readers; // Upper 32 bits of state
+
+    alignas(64) std::atomic<UInt64> state;
+    std::atomic<UInt32> waiters;
+};
+
+}
+
+#else
+
+using SharedMutex = std::shared_mutex;
+
+}
+
+#endif
diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp
deleted file mode 100644
index ae32a1a1052..00000000000
--- a/src/Common/Threading.cpp
+++ /dev/null
@@ -1,484 +0,0 @@
-#include <Common/Threading.h>
-
-namespace DB
-{
-namespace ErrorCodes
-{
-    extern const int THREAD_WAS_CANCELLED;
-}
-}
-
-#ifdef OS_LINUX /// Because of futex
-
-#include <base/getThreadId.h>
-
-#include <bit>
-
-#include <linux/futex.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-
-namespace DB
-{
-
-namespace
-{
-    inline Int64 futexWait(void * address, UInt32 value)
-    {
-        return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
-    }
-
-    inline Int64 futexWake(void * address, int count)
-    {
-        return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
-    }
-
-    // inline void waitFetch(std::atomic<UInt32> & address, UInt32 & value)
-    // {
-    //     futexWait(&address, value);
-    //     value = address.load();
-    // }
-
-    // inline void wakeOne(std::atomic<UInt32> & address)
-    // {
-    //     futexWake(&address, 1);
-    // }
-
-    // inline void wakeAll(std::atomic<UInt32> & address)
-    // {
-    //      futexWake(&address, INT_MAX);
-    // }
-
-    inline constexpr UInt32 lowerValue(UInt64 value)
-    {
-        return static_cast<UInt32>(value & 0xffffffffull);
-    }
-
-    inline constexpr UInt32 upperValue(UInt64 value)
-    {
-        return static_cast<UInt32>(value >> 32ull);
-    }
-
-    inline UInt32 * lowerAddress(void * address)
-    {
-        return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::big);
-    }
-
-    inline UInt32 * upperAddress(void * address)
-    {
-        return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::little);
-    }
-
-    inline void waitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
-    {
-        futexWait(lowerAddress(&address), lowerValue(value));
-        value = address.load();
-    }
-
-    inline bool cancellableWaitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
-    {
-        bool res = CancelToken::local().wait(lowerAddress(&address), lowerValue(value));
-        value = address.load();
-        return res;
-    }
-
-    inline void wakeLowerOne(std::atomic<UInt64> & address)
-    {
-        syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
-    }
-
-    // inline void wakeLowerAll(std::atomic<UInt64> & address)
-    // {
-    //     syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
-    // }
-
-    inline void waitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
-    {
-        futexWait(upperAddress(&address), upperValue(value));
-        value = address.load();
-    }
-
-    inline bool cancellableWaitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
-    {
-        bool res = CancelToken::local().wait(upperAddress(&address), upperValue(value));
-        value = address.load();
-        return res;
-    }
-
-    // inline void wakeUpperOne(std::atomic<UInt64> & address)
-    // {
-    //     syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0);
-    // }
-
-    inline void wakeUpperAll(std::atomic<UInt64> & address)
-    {
-        syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0);
-    }
-}
-
-void CancelToken::Registry::insert(CancelToken * token)
-{
-    std::lock_guard<std::mutex> lock(mutex);
-    threads[token->thread_id] = token;
-}
-
-void CancelToken::Registry::remove(CancelToken * token)
-{
-    std::lock_guard<std::mutex> lock(mutex);
-    threads.erase(token->thread_id);
-}
-
-void CancelToken::Registry::signal(UInt64 tid)
-{
-    std::lock_guard<std::mutex> lock(mutex);
-    if (auto it = threads.find(tid); it != threads.end())
-        it->second->signalImpl();
-}
-
-void CancelToken::Registry::signal(UInt64 tid, int code, const String & message)
-{
-    std::lock_guard<std::mutex> lock(mutex);
-    if (auto it = threads.find(tid); it != threads.end())
-        it->second->signalImpl(code, message);
-}
-
-const std::shared_ptr<CancelToken::Registry> & CancelToken::Registry::instance()
-{
-    static std::shared_ptr<Registry> registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry
-    return registry;
-}
-
-CancelToken::CancelToken()
-    : state(disabled)
-    , thread_id(getThreadId())
-    , registry(Registry::instance())
-{
-    registry->insert(this);
-}
-
-CancelToken::~CancelToken()
-{
-    registry->remove(this);
-}
-
-void CancelToken::signal(UInt64 tid)
-{
-    Registry::instance()->signal(tid);
-}
-
-void CancelToken::signal(UInt64 tid, int code, const String & message)
-{
-    Registry::instance()->signal(tid, code, message);
-}
-
-bool CancelToken::wait(UInt32 * address, UInt32 value)
-{
-    chassert((reinterpret_cast<UInt64>(address) & canceled) == 0); // An `address` must be 2-byte aligned
-    if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread
-        return true; // Spin-wait unless signal is handled
-
-    UInt64 s = state.load();
-    while (true)
-    {
-        if (s & disabled)
-        {
-            // Start non-cancellable wait on futex. Spurious wake-up is possible.
-            futexWait(address, value);
-            return true; // Disabled - true is forced
-        }
-        if (s & canceled)
-            return false; // Has already been canceled
-        if (state.compare_exchange_strong(s, reinterpret_cast<UInt64>(address)))
-            break; // This futex has been "acquired" by this token
-    }
-
-    // Start cancellable wait. Spurious wake-up is possible.
-    futexWait(address, value);
-
-    // "Release" futex and check for cancellation
-    s = state.load();
-    while (true)
-    {
-        chassert((s & disabled) != disabled); // `disable()` must not be called from another thread
-        if (s & canceled)
-        {
-            if (s == canceled)
-                break; // Signaled; futex "release" has been done by the signaling thread
-            else
-            {
-                s = state.load();
-                continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish
-            }
-        }
-        if (state.compare_exchange_strong(s, 0))
-            return true; // There was no cancellation; futex "released"
-    }
-
-    // Reset signaled bit
-    reinterpret_cast<std::atomic<UInt32> *>(address)->fetch_and(~signaled);
-    return false;
-}
-
-void CancelToken::raise()
-{
-    std::unique_lock<std::mutex> lock(signal_mutex);
-    if (exception_code != 0)
-        throw DB::Exception(
-            std::exchange(exception_code, 0),
-            std::exchange(exception_message, {}));
-    else
-        throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled");
-}
-
-void CancelToken::notifyOne(UInt32 * address)
-{
-    futexWake(address, 1);
-}
-
-void CancelToken::notifyAll(UInt32 * address)
-{
-    futexWake(address, INT_MAX);
-}
-
-void CancelToken::signalImpl()
-{
-    signalImpl(0, {});
-}
-
-std::mutex CancelToken::signal_mutex;
-
-void CancelToken::signalImpl(int code, const String & message)
-{
-    // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls
-    std::unique_lock<std::mutex> lock(signal_mutex);
-
-    UInt64 s = state.load();
-    while (true)
-    {
-        if (s & canceled)
-            return; // Already cancelled - don't signal twice
-        if (state.compare_exchange_strong(s, s | canceled))
-            break; // It is the cancelling thread - should deliver signal if necessary
-    }
-
-    exception_code = code;
-    exception_message = message;
-
-    if ((s & disabled) == disabled)
-        return; // Cancellation is disabled - just signal token for later, but don't wake
-    std::atomic<UInt32> * address = reinterpret_cast<std::atomic<UInt32> *>(s & disabled);
-    if (address == nullptr)
-        return; // Thread is currently not waiting on futex - wake-up not required
-
-    // Set signaled bit
-    UInt32 value = address->load();
-    while (true)
-    {
-        if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter
-            value = address->load();
-        else if (address->compare_exchange_strong(value, value | signaled))
-            break;
-    }
-
-    // Wake all threads waiting on `address`, one of them will be cancelled and others will get spurious wake-ups
-    // Woken canceled thread will reset signaled bit
-    futexWake(address, INT_MAX);
-
-    // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return.
-    // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed.
-    state.store(canceled);
-}
-
-Cancellable::Cancellable()
-{
-    CancelToken::local().reset();
-}
-
-Cancellable::~Cancellable()
-{
-    CancelToken::local().disable();
-}
-
-NonCancellable::NonCancellable()
-{
-    CancelToken::local().disable();
-}
-
-NonCancellable::~NonCancellable()
-{
-    CancelToken::local().enable();
-}
-
-CancellableSharedMutex::CancellableSharedMutex()
-    : state(0)
-    , waiters(0)
-{}
-
-void CancellableSharedMutex::lock()
-{
-    UInt64 value = state.load();
-    while (true)
-    {
-        if (value & writers)
-        {
-            waiters++;
-            if (!cancellableWaitUpperFetch(state, value))
-            {
-                waiters--;
-                CancelToken::local().raise();
-            }
-            else
-                waiters--;
-        }
-        else if (state.compare_exchange_strong(value, value | writers))
-            break;
-    }
-
-    value |= writers;
-    while (value & readers)
-    {
-        if (!cancellableWaitLowerFetch(state, value))
-        {
-            state.fetch_and(~writers);
-            wakeUpperAll(state);
-            CancelToken::local().raise();
-        }
-    }
-}
-
-bool CancellableSharedMutex::try_lock()
-{
-    UInt64 value = state.load();
-    return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers);
-}
-
-void CancellableSharedMutex::unlock()
-{
-    state.fetch_and(~writers);
-    if (waiters)
-        wakeUpperAll(state);
-}
-
-void CancellableSharedMutex::lock_shared()
-{
-    UInt64 value = state.load();
-    while (true)
-    {
-        if (value & writers)
-        {
-            waiters++;
-            if (!cancellableWaitUpperFetch(state, value))
-            {
-                waiters--;
-                CancelToken::local().raise();
-            }
-            else
-                waiters--;
-        }
-        else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
-            break;
-    }
-}
-
-bool CancellableSharedMutex::try_lock_shared()
-{
-    UInt64 value = state.load();
-    if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic
-        return true;
-    return false;
-}
-
-void CancellableSharedMutex::unlock_shared()
-{
-    UInt64 value = state.fetch_sub(1) - 1;
-    if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers
-        wakeLowerOne(state); // Wake writer
-}
-
-FastSharedMutex::FastSharedMutex()
-    : state(0)
-    , waiters(0)
-{}
-
-void FastSharedMutex::lock()
-{
-    UInt64 value = state.load();
-    while (true)
-    {
-        if (value & writers)
-        {
-            waiters++;
-            waitUpperFetch(state, value);
-            waiters--;
-        }
-        else if (state.compare_exchange_strong(value, value | writers))
-            break;
-    }
-
-    value |= writers;
-    while (value & readers)
-        waitLowerFetch(state, value);
-}
-
-bool FastSharedMutex::try_lock()
-{
-    UInt64 value = 0;
-    if (state.compare_exchange_strong(value, writers))
-        return true;
-    return false;
-}
-
-void FastSharedMutex::unlock()
-{
-    state.store(0);
-    if (waiters)
-        wakeUpperAll(state);
-}
-
-void FastSharedMutex::lock_shared()
-{
-    UInt64 value = state.load();
-    while (true)
-    {
-        if (value & writers)
-        {
-            waiters++;
-            waitUpperFetch(state, value);
-            waiters--;
-        }
-        else if (state.compare_exchange_strong(value, value + 1))
-            break;
-    }
-}
-
-bool FastSharedMutex::try_lock_shared()
-{
-    UInt64 value = state.load();
-    if (!(value & writers) && state.compare_exchange_strong(value, value + 1))
-        return true;
-    return false;
-}
-
-void FastSharedMutex::unlock_shared()
-{
-    UInt64 value = state.fetch_sub(1) - 1;
-    if (value == writers)
-        wakeLowerOne(state); // Wake writer
-}
-
-}
-
-#else
-
-namespace DB
-{
-
-void CancelToken::raise()
-{
-    throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled");
-}
-
-}
-
-#endif
diff --git a/src/Common/futex.h b/src/Common/futex.h
new file mode 100644
index 00000000000..33279ff4831
--- /dev/null
+++ b/src/Common/futex.h
@@ -0,0 +1,97 @@
+#pragma once
+
+#ifdef OS_LINUX
+
+#include <base/types.h>
+
+#include <bit>
+
+#include <linux/futex.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+namespace DB
+{
+
+inline Int64 futexWait(void * address, UInt32 value)
+{
+    return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0);
+}
+
+inline Int64 futexWake(void * address, int count)
+{
+    return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0);
+}
+
+inline void futexWaitFetch(std::atomic<UInt32> & address, UInt32 & value)
+{
+    futexWait(&address, value);
+    value = address.load();
+}
+
+inline void futexWakeOne(std::atomic<UInt32> & address)
+{
+    futexWake(&address, 1);
+}
+
+inline void futexWakeAll(std::atomic<UInt32> & address)
+{
+     futexWake(&address, INT_MAX);
+}
+
+inline constexpr UInt32 lowerHalf(UInt64 value)
+{
+    return static_cast<UInt32>(value & 0xffffffffull);
+}
+
+inline constexpr UInt32 upperHalf(UInt64 value)
+{
+    return static_cast<UInt32>(value >> 32ull);
+}
+
+inline UInt32 * lowerHalfAddress(void * address)
+{
+    return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::big);
+}
+
+inline UInt32 * upperHalfAddress(void * address)
+{
+    return reinterpret_cast<UInt32 *>(address) + (std::endian::native == std::endian::little);
+}
+
+inline void futexWaitLowerFetch(std::atomic<UInt64> & address, UInt64 & value)
+{
+    futexWait(lowerHalfAddress(&address), lowerHalf(value));
+    value = address.load();
+}
+
+inline void futexWakeLowerOne(std::atomic<UInt64> & address)
+{
+    futexWake(lowerHalfAddress(&address), 1);
+}
+
+inline void futexWakeLowerAll(std::atomic<UInt64> & address)
+{
+    futexWake(lowerHalfAddress(&address), INT_MAX);
+}
+
+inline void futexWaitUpperFetch(std::atomic<UInt64> & address, UInt64 & value)
+{
+    futexWait(upperHalfAddress(&address), upperHalf(value));
+    value = address.load();
+}
+
+inline void futexWakeUpperOne(std::atomic<UInt64> & address)
+{
+    futexWake(upperHalfAddress(&address), 1);
+}
+
+inline void futexWakeUpperAll(std::atomic<UInt64> & address)
+{
+    futexWake(upperHalfAddress(&address), INT_MAX);
+}
+
+}
+
+#endif
diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
index 767739deb46..a4ed7ad6642 100644
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@@ -7,7 +7,9 @@
 #include <atomic>
 
 #include "Common/Exception.h"
-#include <Common/Threading.h>
+#include <Common/CancelToken.h>
+#include <Common/SharedMutex.h>
+#include <Common/CancelableSharedMutex.h>
 #include <Common/Stopwatch.h>
 
 #include <base/demangle.h>
@@ -18,7 +20,7 @@ namespace DB
 {
     namespace ErrorCodes
     {
-        extern const int THREAD_WAS_CANCELLED;
+        extern const int THREAD_WAS_CANCELED;
     }
 }
 
@@ -126,7 +128,7 @@ void TestSharedMutexCancelReader()
             }
             catch (DB::Exception & e)
             {
-                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED);
+                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED);
                 ASSERT_EQ(e.message(), "test");
                 cancels++;
                 cancel_sync.arrive_and_wait(); // (C) sync with writer
@@ -148,13 +150,13 @@ void TestSharedMutexCancelReader()
             sync.arrive_and_wait(); // (B) sync with readers
             //std::unique_lock lock(m); // not needed, already synced using barrier
             for (UInt64 tid : tids_to_cancel)
-                DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test");
+                DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test");
 
             // This sync is crucial. It is needed to hold `lock` long enough.
-            // It guarantees that every cancelled thread will find `sm` blocked by writer, and thus will begin to wait.
-            // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+            // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait.
+            // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception.
             // And this is the desired behaviour.
-            cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock.
+            cancel_sync.arrive_and_wait(); // (C) wait for cancelation to finish, before unlock.
         }
     }
 
@@ -199,18 +201,18 @@ void TestSharedMutexCancelWriter()
                 for (UInt64 tid : all_tids)
                 {
                     if (tid != getThreadId())
-                        DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test");
+                        DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test");
                 }
 
                 // This sync is crucial. It is needed to hold `lock` long enough.
-                // It guarantees that every cancelled thread will find `sm` blocked, and thus will begin to wait.
-                // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
+                // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait.
+                // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception.
                 // And this is the desired behaviour.
-                sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock.
+                sync.arrive_and_wait(); // (B) wait for cancelation to finish, before unlock.
             }
             catch (DB::Exception & e)
             {
-                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED);
+                ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED);
                 ASSERT_EQ(e.message(), "test");
                 cancels++;
                 sync.arrive_and_wait(); // (B) sync with race winner
@@ -341,29 +343,29 @@ void PerfTestSharedMutexRW()
     }
 }
 
-TEST(Threading, SharedMutexSmokeCancellableEnabled) { TestSharedMutex<DB::CancellableSharedMutex, DB::Cancellable>(); }
-TEST(Threading, SharedMutexSmokeCancellableDisabled) { TestSharedMutex<DB::CancellableSharedMutex>(); }
-TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex<DB::FastSharedMutex>(); }
+TEST(Threading, SharedMutexSmokeCancelableEnabled) { TestSharedMutex<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, SharedMutexSmokeCancelableDisabled) { TestSharedMutex<DB::CancelableSharedMutex>(); }
+TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex<DB::SharedMutex>(); }
 TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex<std::shared_mutex>(); }
 
-TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableEnabled) { PerfTestSharedMutexReadersOnly<DB::CancellableSharedMutex, DB::Cancellable>(); }
-TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableDisabled) { PerfTestSharedMutexReadersOnly<DB::CancellableSharedMutex>(); }
-TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly<DB::FastSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableEnabled) { PerfTestSharedMutexReadersOnly<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableDisabled) { PerfTestSharedMutexReadersOnly<DB::CancelableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly<DB::SharedMutex>(); }
 TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly<std::shared_mutex>(); }
 
-TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableEnabled) { PerfTestSharedMutexWritersOnly<DB::CancellableSharedMutex, DB::Cancellable>(); }
-TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableDisabled) { PerfTestSharedMutexWritersOnly<DB::CancellableSharedMutex>(); }
-TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly<DB::FastSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableEnabled) { PerfTestSharedMutexWritersOnly<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableDisabled) { PerfTestSharedMutexWritersOnly<DB::CancelableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly<DB::SharedMutex>(); }
 TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly<std::shared_mutex>(); }
 
-TEST(Threading, PerfTestSharedMutexRWCancellableEnabled) { PerfTestSharedMutexRW<DB::CancellableSharedMutex, DB::Cancellable>(); }
-TEST(Threading, PerfTestSharedMutexRWCancellableDisabled) { PerfTestSharedMutexRW<DB::CancellableSharedMutex>(); }
-TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW<DB::FastSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexRWCancelableEnabled) { PerfTestSharedMutexRW<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW<DB::CancelableSharedMutex>(); }
+TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW<DB::SharedMutex>(); }
 TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW<std::shared_mutex>(); }
 
-#ifdef OS_LINUX /// These tests require cancellability
+#ifdef OS_LINUX /// These tests require cancelability
 
-TEST(Threading, SharedMutexCancelReaderCancellableEnabled) { TestSharedMutexCancelReader<DB::CancellableSharedMutex, DB::Cancellable>(); }
-TEST(Threading, SharedMutexCancelWriterCancellableEnabled) { TestSharedMutexCancelWriter<DB::CancellableSharedMutex, DB::Cancellable>(); }
+TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader<DB::CancelableSharedMutex, DB::Cancelable>(); }
+TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter<DB::CancelableSharedMutex, DB::Cancelable>(); }
 
 #endif

From 534db794c1953e9fe89d2fae6517504ca86be93d Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 9 Jan 2023 15:05:41 +0000
Subject: [PATCH 127/262] more review fixes

---
 src/Common/CancelToken.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp
index 87bcdc26bd4..0d80bdcb5b2 100644
--- a/src/Common/CancelToken.cpp
+++ b/src/Common/CancelToken.cpp
@@ -138,7 +138,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
 
 void CancelToken::raise()
 {
-    std::unique_lock<std::mutex> lock(signal_mutex);
+    std::unique_lock lock(signal_mutex);
     if (exception_code != 0)
         throw DB::Exception(
             std::exchange(exception_code, 0),
@@ -167,7 +167,7 @@ std::mutex CancelToken::signal_mutex;
 void CancelToken::signalImpl(int code, const String & message)
 {
     // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls
-    std::unique_lock<std::mutex> lock(signal_mutex);
+    std::unique_lock lock(signal_mutex);
 
     UInt64 s = state.load();
     while (true)

From 4236bc32ee29c34af6b6d25d4a64c547208fe3f7 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 22 Dec 2022 11:06:38 +0100
Subject: [PATCH 128/262] Analyzer duplicate alias crash fix

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp       | 17 ++++-------------
 ...analyzer_duplicate_alias_crash_fix.reference |  0
 ...02513_analyzer_duplicate_alias_crash_fix.sql |  4 ++++
 3 files changed, 8 insertions(+), 13 deletions(-)
 create mode 100644 tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference
 create mode 100644 tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index e93548d34ed..d7a686d4dfa 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2020,7 +2020,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
 
     StorageID storage_id(database_name, table_name);
     storage_id = context->resolveStorageID(storage_id);
-    auto storage = DatabaseCatalog::instance().getTable(storage_id, context);
+    auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context);
+    if (!storage)
+        return {};
+
     auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
     auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
 
@@ -4084,8 +4087,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
         auto & in_second_argument = function_in_arguments_nodes[1];
         auto * table_node = in_second_argument->as<TableNode>();
         auto * table_function_node = in_second_argument->as<TableFunctionNode>();
-        auto * query_node = in_second_argument->as<QueryNode>();
-        auto * union_node = in_second_argument->as<UnionNode>();
 
         if (table_node && dynamic_cast<StorageSet *>(table_node->getStorage().get()) != nullptr)
         {
@@ -4118,16 +4119,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
 
             in_second_argument = std::move(in_second_argument_query_node);
         }
-        else if (query_node || union_node)
-        {
-            IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/);
-            subquery_scope.subquery_depth = scope.subquery_depth + 1;
-
-            if (query_node)
-                resolveQuery(in_second_argument, subquery_scope);
-            else if (union_node)
-                resolveUnion(in_second_argument, subquery_scope);
-        }
     }
 
     /// Initialize function argument columns
diff --git a/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql
new file mode 100644
index 00000000000..fb50ea2c4ca
--- /dev/null
+++ b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql
@@ -0,0 +1,4 @@
+SET allow_experimental_analyzer = 1;
+
+SELECT toUInt64(NULL) AS x FROM (SELECT 1) HAVING x IN
+    (SELECT NULL FROM (SELECT x IN (SELECT x IN (SELECT 1), x IN (SELECT 1) FROM (SELECT 1 WHERE x IN (SELECT NULL FROM (SELECT NULL))))));

From ae56ac1b56d3258ab7af1afaae1222c31367745a Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 9 Jan 2023 16:46:07 +0000
Subject: [PATCH 129/262] add TSA support

---
 src/Common/CancelableSharedMutex.h | 15 ++++++++-------
 src/Common/SharedMutex.h           | 15 ++++++++-------
 2 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h
index f989e8d5beb..0e5f48b4a93 100644
--- a/src/Common/CancelableSharedMutex.h
+++ b/src/Common/CancelableSharedMutex.h
@@ -4,6 +4,7 @@
 
 #include <Common/CancelToken.h>
 #include <base/types.h>
+#include <base/defines.h>
 #include <atomic>
 #include <shared_mutex> // for std::unique_lock and std::shared_lock
 
@@ -13,7 +14,7 @@ namespace DB
 // Reimplementation of `std::shared_mutex` that can interoperate with thread cancelation via `CancelToken::signal()`.
 // It has cancelation point on waiting during `lock()` and `shared_lock()`.
 // NOTE: It has NO cancelation points on fast code path, when locking does not require waiting.
-class CancelableSharedMutex
+class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex
 {
 public:
     CancelableSharedMutex();
@@ -22,14 +23,14 @@ public:
     CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete;
 
     // Exclusive ownership
-    void lock();
-    bool try_lock();
-    void unlock();
+    void lock() TSA_ACQUIRE();
+    bool try_lock() TSA_TRY_ACQUIRE(true);
+    void unlock() TSA_RELEASE();
 
     // Shared ownership
-    void lock_shared();
-    bool try_lock_shared();
-    void unlock_shared();
+    void lock_shared() TSA_ACQUIRE_SHARED();
+    bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true);
+    void unlock_shared() TSA_RELEASE_SHARED();
 
 private:
     // State 64-bits layout:
diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h
index ebe730ca419..26c649c6fa8 100644
--- a/src/Common/SharedMutex.h
+++ b/src/Common/SharedMutex.h
@@ -3,6 +3,7 @@
 #ifdef OS_LINUX /// Because of futex
 
 #include <base/types.h>
+#include <base/defines.h>
 #include <atomic>
 #include <shared_mutex> // for std::unique_lock and std::shared_lock
 
@@ -10,7 +11,7 @@ namespace DB
 {
 
 // Faster implementation of `std::shared_mutex` based on a pair of futexes
-class SharedMutex
+class TSA_CAPABILITY("SharedMutex") SharedMutex
 {
 public:
     SharedMutex();
@@ -19,14 +20,14 @@ public:
     SharedMutex & operator=(const SharedMutex &) = delete;
 
     // Exclusive ownership
-    void lock();
-    bool try_lock();
-    void unlock();
+    void lock() TSA_ACQUIRE();
+    bool try_lock() TSA_TRY_ACQUIRE(true);
+    void unlock() TSA_RELEASE();
 
     // Shared ownership
-    void lock_shared();
-    bool try_lock_shared();
-    void unlock_shared();
+    void lock_shared() TSA_ACQUIRE_SHARED();
+    bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true);
+    void unlock_shared() TSA_RELEASE_SHARED();
 
 private:
     static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state

From d561f66419868928769e4dbf47b8d751e6d47e26 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Mon, 9 Jan 2023 16:48:39 +0000
Subject: [PATCH 130/262] more TSA support

---
 base/base/defines.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/base/base/defines.h b/base/base/defines.h
index 52310362991..a516e4f575c 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -144,6 +144,13 @@
 #    define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__)))  /// thread needs shared possession of given capability
 #    define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__)))               /// annotated lock must be locked after given lock
 #    define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis))           /// disable TSA for a function
+#    define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__)))                       /// object of a class can be used as capability
+#    define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__)))                        /// function acquires a capability, but does not release it
+#    define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__)))                /// function tries to acquire a capability and returns a boolean value indicating success or failure
+#    define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__)))                        /// function releases the given capability
+#    define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__)))          /// function acquires a shared capability, but does not release it
+#    define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__)))  /// function tries to acquire a shared capability and returns a boolean value indicating success or failure
+#    define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__)))          /// function releases the given shared capability
 
 /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function)
 /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of

From b1407b1070e5eb44daac8cd8df40ff7d54e1d888 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 10 Jan 2023 01:19:42 +0000
Subject: [PATCH 131/262] fix TSA support

---
 base/base/defines.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/base/base/defines.h b/base/base/defines.h
index a516e4f575c..391e97ab406 100644
--- a/base/base/defines.h
+++ b/base/base/defines.h
@@ -171,6 +171,13 @@
 #    define TSA_REQUIRES(...)
 #    define TSA_REQUIRES_SHARED(...)
 #    define TSA_NO_THREAD_SAFETY_ANALYSIS
+#    define TSA_CAPABILITY(...)
+#    define TSA_ACQUIRE(...)
+#    define TSA_TRY_ACQUIRE(...)
+#    define TSA_RELEASE(...)
+#    define TSA_ACQUIRE_SHARED(...)
+#    define TSA_TRY_ACQUIRE_SHARED(...)
+#    define TSA_RELEASE_SHARED(...)
 
 #    define TSA_SUPPRESS_WARNING_FOR_READ(x) (x)
 #    define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x)

From 0f80ad6e069a68117487f3108022d0bfe2abe4dd Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 10 Jan 2023 01:26:59 +0000
Subject: [PATCH 132/262] make style-check happy

---
 src/Common/CancelToken.cpp           |  6 +++---
 src/Common/CancelToken.h             | 28 ++++++++++++++--------------
 src/Common/CancelableSharedMutex.h   |  6 +++---
 src/Common/tests/gtest_threading.cpp | 10 +++++-----
 4 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp
index 0d80bdcb5b2..f1d2b9d119f 100644
--- a/src/Common/CancelToken.cpp
+++ b/src/Common/CancelToken.cpp
@@ -112,7 +112,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
     // Start cancelable wait. Spurious wake-up is possible.
     futexWait(address, value);
 
-    // "Release" futex and check for cancelation
+    // "Release" futex and check for cancellation
     s = state.load();
     while (true)
     {
@@ -128,7 +128,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value)
             }
         }
         if (state.compare_exchange_strong(s, 0))
-            return true; // There was no cancelation; futex "released"
+            return true; // There was no cancellation; futex "released"
     }
 
     // Reset signaled bit
@@ -182,7 +182,7 @@ void CancelToken::signalImpl(int code, const String & message)
     exception_message = message;
 
     if ((s & disabled) == disabled)
-        return; // Cancelation is disabled - just signal token for later, but don't wake
+        return; // cancellation is disabled - just signal token for later, but don't wake
     std::atomic<UInt32> * address = reinterpret_cast<std::atomic<UInt32> *>(s & disabled);
     if (address == nullptr)
         return; // Thread is currently not waiting on futex - wake-up not required
diff --git a/src/Common/CancelToken.h b/src/Common/CancelToken.h
index 27b9d41f0f3..22afdfe38f4 100644
--- a/src/Common/CancelToken.h
+++ b/src/Common/CancelToken.h
@@ -15,8 +15,8 @@
 namespace DB
 {
 
-// Scoped object, enabling thread cancelation (cannot be nested).
-// Intended to be used once per cancelable task. It erases any previously held cancelation signal.
+// Scoped object, enabling thread cancellation (cannot be nested).
+// Intended to be used once per cancelable task. It erases any previously held cancellation signal.
 // Note that by default thread is not cancelable.
 struct Cancelable
 {
@@ -24,14 +24,14 @@ struct Cancelable
     ~Cancelable();
 };
 
-// Scoped object, disabling thread cancelation (cannot be nested; must be inside `Cancelable` region)
+// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancelable` region)
 struct NonCancelable
 {
     NonCancelable();
     ~NonCancelable();
 };
 
-// Responsible for synchronization needed to deliver thread cancelation signal.
+// Responsible for synchronization needed to deliver thread cancellation signal.
 // Basic building block for cancelable synchronization primitives.
 // Allows to perform cancelable wait on memory addresses (think futex)
 class CancelToken
@@ -54,14 +54,14 @@ public:
     // Cancelable wait on memory address (futex word).
     //   Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()`
     //   or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`.
-    //   Note that spurious wake-ups are also possible due to cancelation of other waiters on the same `address`.
+    //   Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`.
     //   WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero.
     // Return value:
     //   true - woken by either notify or spurious wakeup;
-    //   false - iff cancelation signal has been received.
+    //   false - iff cancellation signal has been received.
     // Implementation details:
-    //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancelation signal.
-    //   Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancelation.
+    //   It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal.
+    //   Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation.
     // Intended to be called only by thread associated with this token.
     bool wait(UInt32 * address, UInt32 value);
 
@@ -75,12 +75,12 @@ public:
     static void notifyAll(UInt32 * address);
 
     // Send cancel signal to thread with specified `tid`.
-    // If thread was waiting using `wait()` it will be woken up (unless cancelation is disabled).
+    // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled).
     // Can be called from any thread.
     static void signal(UInt64 tid);
     static void signal(UInt64 tid, int code, const String & message);
 
-    // Flag used to deliver cancelation into memory address to wake a thread.
+    // Flag used to deliver cancellation into memory address to wake a thread.
     // Note that most significant bit at `addresses` to be used with `wait()` is reserved.
     static constexpr UInt32 signaled = 1u << 31u;
 
@@ -95,7 +95,7 @@ private:
         state.store(0);
     }
 
-    // Enable thread cancelation. See `NonCancelable` struct.
+    // Enable thread cancellation. See `NonCancelable` struct.
     // Intended to be called only by thread associated with this token.
     void enable()
     {
@@ -103,7 +103,7 @@ private:
         state.fetch_and(~disabled);
     }
 
-    // Disable thread cancelation. See `NonCancelable` struct.
+    // Disable thread cancellation. See `NonCancelable` struct.
     // Intended to be called only by thread associated with this token.
     void disable()
     {
@@ -143,11 +143,11 @@ private:
     // All signal handling logic should be globally serialized using this mutex
     static std::mutex signal_mutex;
 
-    // Cancelation state
+    // Cancellation state
     alignas(64) std::atomic<UInt64> state;
     [[maybe_unused]] char padding[64 - sizeof(state)];
 
-    // Cancelation exception
+    // Cancellation exception
     int exception_code;
     String exception_message;
 
diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h
index 0e5f48b4a93..dfd9631c564 100644
--- a/src/Common/CancelableSharedMutex.h
+++ b/src/Common/CancelableSharedMutex.h
@@ -11,9 +11,9 @@
 namespace DB
 {
 
-// Reimplementation of `std::shared_mutex` that can interoperate with thread cancelation via `CancelToken::signal()`.
-// It has cancelation point on waiting during `lock()` and `shared_lock()`.
-// NOTE: It has NO cancelation points on fast code path, when locking does not require waiting.
+// Reimplementation of `std::shared_mutex` that can interoperate with thread cancellation via `CancelToken::signal()`.
+// It has cancellation point on waiting during `lock()` and `shared_lock()`.
+// NOTE: It has NO cancellation points on fast code path, when locking does not require waiting.
 class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex
 {
 public:
diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp
index a4ed7ad6642..8662e93e81b 100644
--- a/src/Common/tests/gtest_threading.cpp
+++ b/src/Common/tests/gtest_threading.cpp
@@ -154,9 +154,9 @@ void TestSharedMutexCancelReader()
 
             // This sync is crucial. It is needed to hold `lock` long enough.
             // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait.
-            // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception.
+            // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
             // And this is the desired behaviour.
-            cancel_sync.arrive_and_wait(); // (C) wait for cancelation to finish, before unlock.
+            cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock.
         }
     }
 
@@ -206,9 +206,9 @@ void TestSharedMutexCancelWriter()
 
                 // This sync is crucial. It is needed to hold `lock` long enough.
                 // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait.
-                // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception.
+                // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception.
                 // And this is the desired behaviour.
-                sync.arrive_and_wait(); // (B) wait for cancelation to finish, before unlock.
+                sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock.
             }
             catch (DB::Exception & e)
             {
@@ -363,7 +363,7 @@ TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW
 TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW<DB::SharedMutex>(); }
 TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW<std::shared_mutex>(); }
 
-#ifdef OS_LINUX /// These tests require cancelability
+#ifdef OS_LINUX /// These tests require cancellability
 
 TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader<DB::CancelableSharedMutex, DB::Cancelable>(); }
 TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter<DB::CancelableSharedMutex, DB::Cancelable>(); }

From 09c1cecb01f2b4ad339f4d0641351b3c0363ad3a Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Tue, 10 Jan 2023 10:56:13 +0800
Subject: [PATCH 133/262] fix build error

---
 src/Functions/dateDiff.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp
index 60668f81edf..d43ef2d4caf 100644
--- a/src/Functions/dateDiff.cpp
+++ b/src/Functions/dateDiff.cpp
@@ -225,8 +225,8 @@ public:
             }
             else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
             {
-                auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
-                auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
+                auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x);
+                auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y);
                 if ((x_day_of_week > y_day_of_week)
                     || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
                     || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)

From 7aef7d95de16d6e69d603f14c2fae31d30f98e17 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Tue, 10 Jan 2023 09:15:07 +0000
Subject: [PATCH 134/262] Small fixes for keeper_map tests

---
 tests/integration/test_keeper_map/test.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index 71f6343101a..d2a3d4f3748 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -47,13 +47,25 @@ def remove_children(client, path):
 
 
 def test_create_keeper_map(started_cluster):
+    node.query("DROP TABLE IF EXISTS test_keeper_map SYNC")
+    node.query("DROP TABLE IF EXISTS test_keeper_map_another SYNC")
+
     node.query(
         "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);"
     )
     zk_client = get_genuine_zk()
 
     def assert_children_size(path, expected_size):
-        assert len(zk_client.get_children(path)) == expected_size
+        children_size = 0
+        # 4 secs should be more than enough for replica to sync
+        for _ in range(10):
+            children_size = len(zk_client.get_children(path))
+            if children_size == expected_size:
+                return
+            sleep(0.4)
+        assert (
+            False
+        ), f"Invalid number of children for '{path}': actual {children_size}, expected {expected_size}"
 
     def assert_root_children_size(expected_size):
         assert_children_size("/test_keeper_map/test1", expected_size)
@@ -138,6 +150,8 @@ def test_create_drop_keeper_map_concurrent(started_cluster):
 
 
 def test_keeper_map_without_zk(started_cluster):
+    node.query("DROP TABLE IF EXISTS test_keeper_map SYNC")
+
     def assert_keeper_exception_after_partition(query):
         with PartitionManager() as pm:
             pm.drop_instance_zk_connections(node)

From 74ba9d1f2bdced9ea69093f4745f44d1336c3298 Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Tue, 10 Jan 2023 10:15:38 +0100
Subject: [PATCH 135/262] Updated test to .sh to use unique database name -
 40907 Parameterized views as table functions

---
 .../02428_parameterized_view.reference        |  7 ++
 .../0_stateless/02428_parameterized_view.sh   | 88 +++++++++++++++++++
 .../0_stateless/02428_parameterized_view.sql  | 88 -------------------
 3 files changed, 95 insertions(+), 88 deletions(-)
 create mode 100755 tests/queries/0_stateless/02428_parameterized_view.sh
 delete mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql

diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference
index da3ad8a9a3c..db3ffd0b01e 100644
--- a/tests/queries/0_stateless/02428_parameterized_view.reference
+++ b/tests/queries/0_stateless/02428_parameterized_view.reference
@@ -1,5 +1,6 @@
 20
 20
+ERROR
 10
 50
 SELECT
@@ -12,9 +13,15 @@ FROM
     FROM default.test_02428_Catalog
     WHERE Price = _CAST(10, \'UInt64\')
 ) AS test_02428_pv1
+ERROR
+ERROR
+ERROR
 50
+ERROR
 10
+ERROR
 20
+ERROR
 30
 20
 30
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh
new file mode 100755
index 00000000000..44c1976a654
--- /dev/null
+++ b/tests/queries/0_stateless/02428_parameterized_view.sh
@@ -0,0 +1,88 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+export CLICKHOUSE_TEST_UNIQUE_NAME="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}"
+
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv1"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv2"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv3"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv4"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv5"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv6"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv7"
+$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_v1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02428_Catalog"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog"
+$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}"
+
+$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory"
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3)"
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2)"
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1)"
+
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64}"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1(price=20)"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`test_02428_pv1\`(price=20)"
+
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 |  grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK'
+$CLICKHOUSE_CLIENT --param_p 10 -q "SELECT Price FROM test_02428_pv1(price={p:UInt64})"
+
+$CLICKHOUSE_CLIENT --param_l 1 -q "SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64})"
+$CLICKHOUSE_CLIENT -q "DETACH TABLE test_02428_pv1"
+$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_02428_pv1"
+
+$CLICKHOUSE_CLIENT -q "EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10)"
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2)" 2>&1 |  grep -Fq "NOT_IMPLEMENTED" && echo 'ERROR' || echo 'OK'
+
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM pv123(price=20)" 2>&1 |  grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK'
+
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10"
+
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_v1(price=10)" 2>&1 |  grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK'
+
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}"
+
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50,quantity=2)"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50)"  2>&1 |  grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK'
+
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv3(price=10)"
+
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}" 2>&1 |  grep -Fq "DUPLICATE_COLUMN" && echo 'ERROR' || echo 'OK'
+
+$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}"
+$CLICKHOUSE_CLIENT -q "CREATE TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory"
+$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Pen', 10, 3)"
+$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Book', 50, 2)"
+$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Paper', 20, 1)"
+$CLICKHOUSE_CLIENT -q "CREATE VIEW ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1 AS SELECT * FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog WHERE Price={price:UInt64}"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1(price=20)"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20)"  2>&1 |  grep -Fq "UNKNOWN_FUNCTION" &&  echo 'ERROR' || echo 'OK'
+
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8)"
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8)"
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}"
+$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv5(price=30, quantity=8, limit=1)"
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv6(price=10)"
+$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price"
+$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv7(price=10)"
+
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv1"
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv2"
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv3"
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv5"
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv6"
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv7"
+$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_v1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE test_02428_Catalog"
+$CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1"
+$CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog"
+$CLICKHOUSE_CLIENT -q "DROP DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}"
\ No newline at end of file
diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql
deleted file mode 100644
index fbc1d8b2970..00000000000
--- a/tests/queries/0_stateless/02428_parameterized_view.sql
+++ /dev/null
@@ -1,88 +0,0 @@
-DROP VIEW IF EXISTS test_02428_pv1;
-DROP VIEW IF EXISTS test_02428_pv2;
-DROP VIEW IF EXISTS test_02428_pv3;
-DROP VIEW IF EXISTS test_02428_pv4;
-DROP VIEW IF EXISTS test_02428_pv5;
-DROP VIEW IF EXISTS test_02428_pv6;
-DROP VIEW IF EXISTS test_02428_pv7;
-DROP VIEW IF EXISTS test_02428_v1;
-DROP TABLE IF EXISTS test_02428_Catalog;
-DROP TABLE IF EXISTS db_02428.pv1;
-DROP TABLE IF EXISTS db_02428.Catalog;
-DROP DATABASE IF EXISTS db_02428;
-
-CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
-
-INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3);
-INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2);
-INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1);
-
-CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64};
-SELECT Price FROM test_02428_pv1(price=20);
-SELECT Price FROM `test_02428_pv1`(price=20);
-
-set param_p=10;
-SELECT Price FROM test_02428_pv1;  -- { serverError UNKNOWN_QUERY_PARAMETER}
-SELECT Price FROM test_02428_pv1(price={p:UInt64});
-
-set param_l=1;
-SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64});
-
-DETACH TABLE test_02428_pv1;
-ATTACH TABLE test_02428_pv1;
-
-EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10);
-
-INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED}
-
-SELECT Price FROM pv123(price=20); -- { serverError UNKNOWN_FUNCTION }
-
-CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10;
-
-SELECT Price FROM test_02428_v1(price=10);  -- { serverError UNKNOWN_FUNCTION }
-
-CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64};
-SELECT Price FROM test_02428_pv2(price=50,quantity=2);
-
-SELECT Price FROM test_02428_pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER}
-
-CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3;
-SELECT Price FROM test_02428_pv3(price=10);
-
-CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN}
-
-CREATE DATABASE db_02428;
-
-CREATE TABLE db_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory;
-
-INSERT INTO db_02428.Catalog VALUES ('Pen', 10, 3);
-INSERT INTO db_02428.Catalog VALUES ('Book', 50, 2);
-INSERT INTO db_02428.Catalog VALUES ('Paper', 20, 1);
-
-CREATE VIEW db_02428.pv1 AS SELECT * FROM db_02428.Catalog WHERE Price={price:UInt64};
-SELECT Price FROM db_02428.pv1(price=20);
-SELECT Price FROM `db_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION }
-
-INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8);
-INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8);
-
-CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64};
-SELECT Price FROM test_02428_pv5(price=30, quantity=8,limit=1);
-
-CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64};
-SELECT * FROM test_02428_pv6(price=10);
-
-CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price;
-SELECT * FROM test_02428_pv7(price=10);
-
-DROP VIEW test_02428_pv1;
-DROP VIEW test_02428_pv2;
-DROP VIEW test_02428_pv3;
-DROP VIEW test_02428_pv5;
-DROP VIEW test_02428_pv6;
-DROP VIEW test_02428_pv7;
-DROP VIEW test_02428_v1;
-DROP TABLE test_02428_Catalog;
-DROP TABLE db_02428.pv1;
-DROP TABLE db_02428.Catalog;
-DROP DATABASE db_02428;
\ No newline at end of file

From 613ff3387dc885677f61d2a4250100c91dc06425 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Mon, 9 Jan 2023 16:33:26 +0100
Subject: [PATCH 136/262] Better

---
 src/Storages/FileLog/StorageFileLog.cpp | 30 +++++++++++++++++++------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp
index b1b54a1700a..5835dc3294f 100644
--- a/src/Storages/FileLog/StorageFileLog.cpp
+++ b/src/Storages/FileLog/StorageFileLog.cpp
@@ -47,6 +47,8 @@ namespace
     const auto MAX_THREAD_WORK_DURATION_MS = 60000;
 }
 
+static constexpr auto TMP_SUFFIX = ".tmp";
+
 StorageFileLog::StorageFileLog(
     const StorageID & table_id_,
     ContextPtr context_,
@@ -234,23 +236,24 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const
     {
         checkOffsetIsValid(file_meta.file_name, file_meta.last_writen_position);
     }
-    else
-    {
-        disk->createFile(full_path);
-    }
+
+    std::string tmp_path = full_path + TMP_SUFFIX;
+    disk->removeFileIfExists(tmp_path);
 
     try
     {
-        auto out = disk->writeFile(full_path);
+        disk->createFile(tmp_path);
+        auto out = disk->writeFile(tmp_path);
         writeIntText(inode, *out);
         writeChar('\n', *out);
         writeIntText(file_meta.last_writen_position, *out);
     }
     catch (...)
     {
-        disk->removeFile(full_path);
+        disk->removeFileIfExists(tmp_path);
         throw;
     }
+    disk->replaceFile(tmp_path, full_path);
 }
 
 void StorageFileLog::deserialize()
@@ -258,15 +261,28 @@ void StorageFileLog::deserialize()
     if (!disk->exists(metadata_base_path))
         return;
 
+    std::vector<std::string> files_to_remove;
+
     /// In case of single file (not a watched directory),
     /// iterated directory always has one file inside.
     for (const auto dir_iter = disk->iterateDirectory(metadata_base_path); dir_iter->isValid(); dir_iter->next())
     {
-        auto [metadata, inode] = readMetadata(dir_iter->name());
+        const auto & filename = dir_iter->name();
+        if (filename.ends_with(TMP_SUFFIX))
+        {
+            files_to_remove.push_back(getFullMetaPath(filename));
+            continue;
+        }
+
+        auto [metadata, inode] = readMetadata(filename);
         if (!metadata)
             continue;
+
         file_infos.meta_by_inode.emplace(inode, metadata);
     }
+
+    for (const auto & file : files_to_remove)
+        disk->removeFile(file);
 }
 
 UInt64 StorageFileLog::getInode(const String & file_name)

From c95925a5abae54412ea20746beae712d838d2825 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 10 Jan 2023 11:16:29 +0100
Subject: [PATCH 137/262] Fixed tests

---
 src/Analyzer/Passes/QueryAnalysisPass.cpp     | 51 ++++++++++++-------
 .../02337_analyzer_columns_basic.sql          |  2 +-
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index d7a686d4dfa..4aa6422b6b8 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -2870,7 +2870,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const
 
         if (resolved_identifier)
         {
-            bool is_cte = resolved_identifier->as<QueryNode>() && resolved_identifier->as<QueryNode>()->isCTE();
+            auto * subquery_node = resolved_identifier->as<QueryNode>();
+            auto * union_node = resolved_identifier->as<UnionNode>();
+
+            bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
 
             /** From parent scopes we can resolve table identifiers only as CTE.
               * Example: SELECT (SELECT 1 FROM a) FROM test_table AS a;
@@ -4119,6 +4122,10 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi
 
             in_second_argument = std::move(in_second_argument_query_node);
         }
+        else
+        {
+            resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
+        }
     }
 
     /// Initialize function argument columns
@@ -4708,13 +4715,29 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             {
                 node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier;
 
-                /// If table identifier is resolved as CTE clone it
-                bool resolved_as_cte = node && node->as<QueryNode>() && node->as<QueryNode>()->isCTE();
+                /// If table identifier is resolved as CTE clone it and resolve
+                auto * subquery_node = node->as<QueryNode>();
+                auto * union_node = node->as<UnionNode>();
+                bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE());
 
                 if (resolved_as_cte)
                 {
                     node = node->clone();
-                    node->as<QueryNode &>().setIsCTE(false);
+                    subquery_node = node->as<QueryNode>();
+                    union_node = node->as<UnionNode>();
+
+                    if (subquery_node)
+                        subquery_node->setIsCTE(false);
+                    else
+                        union_node->setIsCTE(false);
+
+                    IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
+                    subquery_scope.subquery_depth = scope.subquery_depth + 1;
+
+                    if (subquery_node)
+                        resolveQuery(node, subquery_scope);
+                    else
+                        resolveUnion(node, subquery_scope);
                 }
             }
 
@@ -4830,6 +4853,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/);
             subquery_scope.subquery_depth = scope.subquery_depth + 1;
 
+            ++subquery_counter;
+            std::string projection_name = "_subquery_" + std::to_string(subquery_counter);
+
             if (node_type == QueryTreeNodeType::QUERY)
                 resolveQuery(node, subquery_scope);
             else
@@ -4838,9 +4864,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id
             if (!allow_table_expression)
                 evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context);
 
-            ++subquery_counter;
             if (result_projection_names.empty())
-                result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter));
+                result_projection_names.push_back(std::move(projection_name));
 
             break;
         }
@@ -5187,11 +5212,6 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod
 
                 if (resolved_identifier_query_node || resolved_identifier_union_node)
                 {
-                    if (resolved_identifier_query_node)
-                        resolved_identifier_query_node->setIsCTE(false);
-                    else
-                        resolved_identifier_union_node->setIsCTE(false);
-
                     if (table_expression_modifiers.has_value())
                     {
                         throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
@@ -5428,14 +5448,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node,
             [[fallthrough]];
         case QueryTreeNodeType::UNION:
         {
-            IdentifierResolveScope subquery_scope(join_tree_node, &scope);
-            subquery_scope.subquery_depth = scope.subquery_depth + 1;
-
-            if (from_node_type == QueryTreeNodeType::QUERY)
-                resolveQuery(join_tree_node, subquery_scope);
-            else if (from_node_type == QueryTreeNodeType::UNION)
-                resolveUnion(join_tree_node, subquery_scope);
-
+            resolveExpressionNode(join_tree_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/);
             break;
         }
         case QueryTreeNodeType::TABLE_FUNCTION:
diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql
index 76f9f8b25e4..368a5670d17 100644
--- a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql
+++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql
@@ -31,7 +31,7 @@ INSERT INTO test_table VALUES (0, 'Value');
 SELECT 'Table access without table name qualification';
 
 SELECT test_id FROM test_table; -- { serverError 47 }
-SELECT test_id FROM test_unknown_table; -- { serverError 60 }
+SELECT test_id FROM test_unknown_table; -- { serverError 47 }
 
 DESCRIBE (SELECT id FROM test_table);
 SELECT id FROM test_table;

From d7ca0c04fac6a421c4e0570bae85dd38d9ea153a Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 10 Jan 2023 11:35:02 +0100
Subject: [PATCH 138/262] remove drop if exists

---
 tests/integration/test_keeper_map/test.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index d2a3d4f3748..859481de188 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -47,9 +47,6 @@ def remove_children(client, path):
 
 
 def test_create_keeper_map(started_cluster):
-    node.query("DROP TABLE IF EXISTS test_keeper_map SYNC")
-    node.query("DROP TABLE IF EXISTS test_keeper_map_another SYNC")
-
     node.query(
         "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);"
     )
@@ -150,8 +147,6 @@ def test_create_drop_keeper_map_concurrent(started_cluster):
 
 
 def test_keeper_map_without_zk(started_cluster):
-    node.query("DROP TABLE IF EXISTS test_keeper_map SYNC")
-
     def assert_keeper_exception_after_partition(query):
         with PartitionManager() as pm:
             pm.drop_instance_zk_connections(node)

From 134cc3e2736887cc41e5d4bfb7914031e774da00 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio2368@users.noreply.github.com>
Date: Tue, 10 Jan 2023 14:16:28 +0100
Subject: [PATCH 139/262] Change table name

---
 tests/integration/test_keeper_map/test.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index 859481de188..2e80ada963f 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -154,35 +154,35 @@ def test_keeper_map_without_zk(started_cluster):
             assert "Coordination::Exception" in error
 
     assert_keeper_exception_after_partition(
-        "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);"
+        "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);"
     )
 
     node.query(
-        "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);"
+        "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);"
     )
 
     assert_keeper_exception_after_partition(
-        "INSERT INTO test_keeper_map VALUES (1, 11)"
+        "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)"
     )
-    node.query("INSERT INTO test_keeper_map VALUES (1, 11)")
+    node.query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)")
 
-    assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map")
-    node.query("SELECT * FROM test_keeper_map")
+    assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk")
+    node.query("SELECT * FROM test_keeper_map_without_zk")
 
     with PartitionManager() as pm:
         pm.drop_instance_zk_connections(node)
         node.restart_clickhouse(60)
-        error = node.query_and_get_error("SELECT * FROM test_keeper_map")
+        error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk")
         assert "Failed to activate table because of connection issues" in error
 
-    node.query("SELECT * FROM test_keeper_map")
+    node.query("SELECT * FROM test_keeper_map_without_zk")
 
     client = get_genuine_zk()
-    remove_children(client, "/test_keeper_map/test1")
+    remove_children(client, "/test_keeper_map/test_without_zk")
     node.restart_clickhouse(60)
-    error = node.query_and_get_error("SELECT * FROM test_keeper_map")
+    error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk")
     assert "Failed to activate table because of invalid metadata in ZooKeeper" in error
 
-    node.query("DETACH TABLE test_keeper_map")
+    node.query("DETACH TABLE test_keeper_map_without_zk")
 
     client.stop()

From 4673b3fe1de44a030ca53ced88bd8d0efe9f94d6 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Tue, 10 Jan 2023 16:31:01 +0100
Subject: [PATCH 140/262] Revert "Revert "Custom reading for mutation""

---
 src/Interpreters/Context.h                    |   6 -
 src/Interpreters/MutationsInterpreter.cpp     | 418 +++++++++++++-----
 src/Interpreters/MutationsInterpreter.h       |  72 ++-
 .../QueryPlan/ReadFromMergeTree.cpp           |   1 -
 .../Sources/ThrowingExceptionSource.h         |  32 ++
 src/Storages/IStorage.h                       |   2 +
 src/Storages/MergeTree/MergeTreeData.h        |   2 +
 .../MergeTree/MergeTreeDataSelectExecutor.h   |  14 +-
 .../MergeTree/MergeTreeSequentialSource.cpp   | 112 ++++-
 .../MergeTree/MergeTreeSequentialSource.h     |  13 +
 src/Storages/MergeTree/MutateTask.cpp         |   9 +-
 11 files changed, 539 insertions(+), 142 deletions(-)
 create mode 100644 src/Processors/Sources/ThrowingExceptionSource.h

diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index 4b7d0685ba3..58478ab79b8 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -377,9 +377,6 @@ private:
 
     inline static ContextPtr global_context_instance;
 
-    /// A flag, used to mark if reader needs to apply deleted rows mask.
-    bool apply_deleted_mask = true;
-
     /// Temporary data for query execution accounting.
     TemporaryDataOnDiskScopePtr temp_data_on_disk;
 public:
@@ -973,9 +970,6 @@ public:
     bool isInternalQuery() const { return is_internal_query; }
     void setInternalQuery(bool internal) { is_internal_query = internal; }
 
-    bool applyDeletedMask() const { return apply_deleted_mask; }
-    void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
-
     ActionLocksManagerPtr getActionLocksManager() const;
 
     enum class ApplicationType
diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index 3960e0759d6..f8627f1ff85 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -30,6 +30,9 @@
 #include <DataTypes/NestedUtils.h>
 #include <Interpreters/PreparedSets.h>
 #include <Storages/LightweightDeleteDescription.h>
+#include <Storages/MergeTree/MergeTreeSequentialSource.h>
+#include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
+#include <Processors/Sources/ThrowingExceptionSource.h>
 
 
 namespace DB
@@ -190,7 +193,8 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_
 
 
 bool isStorageTouchedByMutations(
-    const StoragePtr & storage,
+    MergeTreeData & storage,
+    MergeTreeData::DataPartPtr source_part,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MutationCommand> & commands,
     ContextMutablePtr context_copy)
@@ -199,19 +203,15 @@ bool isStorageTouchedByMutations(
         return false;
 
     bool all_commands_can_be_skipped = true;
-    auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast<StorageFromMergeTreeDataPart>(storage);
     for (const MutationCommand & command : commands)
     {
         if (!command.predicate) /// The command touches all rows.
             return true;
 
-        if (command.partition && !storage_from_merge_tree_data_part)
-            throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED);
-
-        if (command.partition && storage_from_merge_tree_data_part)
+        if (command.partition)
         {
-            const String partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context_copy);
-            if (partition_id == storage_from_merge_tree_data_part->getPartitionId())
+            const String partition_id = storage.getPartitionIDFromQuery(command.partition, context_copy);
+            if (partition_id == source_part->info.partition_id)
                 all_commands_can_be_skipped = false;
         }
         else
@@ -229,13 +229,15 @@ bool isStorageTouchedByMutations(
     context_copy->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false);
     context_copy->setSetting("max_streams_for_merge_tree_reading", Field(0));
 
-    ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy);
+    ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context_copy);
+
+    auto storage_from_part = std::make_shared<StorageFromMergeTreeDataPart>(source_part);
 
     /// Interpreter must be alive, when we use result of execute() method.
     /// For some reason it may copy context and give it into ExpressionTransform
     /// after that we will use context from destroyed stack frame in our stream.
     InterpreterSelectQuery interpreter(
-        select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections());
+        select_query, context_copy, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections());
     auto io = interpreter.execute();
     PullingPipelineExecutor executor(io.pipeline);
 
@@ -288,6 +290,57 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
         return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func;
 }
 
+MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(storage_))
+{
+}
+
+MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_)
+    : data(&storage_), part(std::move(source_part_))
+{
+}
+
+StorageSnapshotPtr MutationsInterpreter::Source::getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const
+{
+    if (data)
+        return data->getStorageSnapshot(snapshot_, context_);
+
+    return storage->getStorageSnapshot(snapshot_, context_);
+}
+
+StoragePtr MutationsInterpreter::Source::getStorage() const
+{
+    if (data)
+        return data->shared_from_this();
+
+    return storage;
+}
+
+const MergeTreeData * MutationsInterpreter::Source::getMergeTreeData() const
+{
+    if (data)
+        return data;
+
+    return dynamic_cast<const MergeTreeData *>(storage.get());
+}
+
+bool MutationsInterpreter::Source::supportsLightweightDelete() const
+{
+    if (part)
+        return part->supportLightweightDeleteMutate();
+
+    return storage->supportsLightweightDelete();
+}
+
+
+bool MutationsInterpreter::Source::hasLightweightDeleteMask() const
+{
+    return part && part->hasLightweightDelete();
+}
+
+bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const
+{
+    return data && data->getSettings()->materialize_ttl_recalculate_only;
+}
 
 MutationsInterpreter::MutationsInterpreter(
     StoragePtr storage_,
@@ -297,7 +350,45 @@ MutationsInterpreter::MutationsInterpreter(
     bool can_execute_,
     bool return_all_columns_,
     bool return_deleted_rows_)
-    : storage(std::move(storage_))
+    : MutationsInterpreter(
+        Source(std::move(storage_)),
+        metadata_snapshot_, std::move(commands_), std::move(context_),
+        can_execute_, return_all_columns_, return_deleted_rows_)
+{
+    if (can_execute_ && dynamic_cast<const MergeTreeData *>(source.getStorage().get()))
+    {
+        throw Exception(
+            ErrorCodes::LOGICAL_ERROR,
+            "Cannot execute mutation for {}. Mutation should be applied to every part separately.",
+            source.getStorage()->getName());
+    }
+}
+
+MutationsInterpreter::MutationsInterpreter(
+    MergeTreeData & storage_,
+    MergeTreeData::DataPartPtr source_part_,
+    const StorageMetadataPtr & metadata_snapshot_,
+    MutationCommands commands_,
+    ContextPtr context_,
+    bool can_execute_,
+    bool return_all_columns_,
+    bool return_deleted_rows_)
+    : MutationsInterpreter(
+        Source(storage_, std::move(source_part_)),
+        metadata_snapshot_, std::move(commands_), std::move(context_),
+        can_execute_, return_all_columns_, return_deleted_rows_)
+{
+}
+
+MutationsInterpreter::MutationsInterpreter(
+    Source source_,
+    const StorageMetadataPtr & metadata_snapshot_,
+    MutationCommands commands_,
+    ContextPtr context_,
+    bool can_execute_,
+    bool return_all_columns_,
+    bool return_deleted_rows_)
+    : source(std::move(source_))
     , metadata_snapshot(metadata_snapshot_)
     , commands(std::move(commands_))
     , context(Context::createCopy(context_))
@@ -306,12 +397,12 @@ MutationsInterpreter::MutationsInterpreter(
     , return_all_columns(return_all_columns_)
     , return_deleted_rows(return_deleted_rows_)
 {
-    mutation_ast = prepare(!can_execute);
+    prepare(!can_execute);
 }
 
-static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot)
+static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const StorageMetadataPtr & metadata_snapshot)
 {
-    const MergeTreeData * merge_tree_data = dynamic_cast<const MergeTreeData *>(storage.get());
+    const MergeTreeData * merge_tree_data = source.getMergeTreeData();
     if (!merge_tree_data)
         return {};
 
@@ -333,21 +424,12 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt
     return key_columns;
 }
 
-static bool materializeTTLRecalculateOnly(const StoragePtr & storage)
-{
-    auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast<StorageFromMergeTreeDataPart>(storage);
-    if (!storage_from_merge_tree_data_part)
-        return false;
-
-    return storage_from_merge_tree_data_part->materializeTTLRecalculateOnly();
-}
-
 static void validateUpdateColumns(
-    const StoragePtr & storage,
+    const MutationsInterpreter::Source & source,
     const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns,
     const std::unordered_map<String, Names> & column_to_affected_materialized)
 {
-    NameSet key_columns = getKeyColumns(storage, metadata_snapshot);
+    NameSet key_columns = getKeyColumns(source, metadata_snapshot);
 
     for (const String & column_name : updated_columns)
     {
@@ -364,7 +446,7 @@ static void validateUpdateColumns(
         /// Allow to override value of lightweight delete filter virtual column
         if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name)
         {
-            if (!storage->supportsLightweightDelete())
+            if (!source.supportsLightweightDelete())
                 throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table");
             found = true;
         }
@@ -427,7 +509,7 @@ static std::optional<std::vector<ASTPtr>> getExpressionsOfUpdatedNestedSubcolumn
     return res;
 }
 
-ASTPtr MutationsInterpreter::prepare(bool dry_run)
+void MutationsInterpreter::prepare(bool dry_run)
 {
     if (is_prepared)
         throw Exception("MutationsInterpreter is already prepared. It is a bug.", ErrorCodes::LOGICAL_ERROR);
@@ -448,7 +530,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
     }
 
     NameSet updated_columns;
-    bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage);
+    bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly();
 
     for (const MutationCommand & command : commands)
     {
@@ -481,7 +563,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
             }
         }
 
-        validateUpdateColumns(storage, metadata_snapshot, updated_columns, column_to_affected_materialized);
+        validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized);
     }
 
     dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns);
@@ -778,15 +860,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
                     stages_copy.back().filters = stage.filters;
                 }
 
-                const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true);
-                InterpreterSelectQuery interpreter{
-                    select_query, context, storage, metadata_snapshot,
-                    SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits().ignoreProjections()};
+                prepareMutationStages(stages_copy, true);
 
-                auto first_stage_header = interpreter.getSampleBlock();
                 QueryPlan plan;
-                auto source = std::make_shared<NullSource>(first_stage_header);
-                plan.addStep(std::make_unique<ReadFromPreparedSource>(Pipe(std::move(source))));
+                initQueryPlan(stages_copy.front(), plan);
                 auto pipeline = addStreamsForLaterStages(stages_copy, plan);
                 updated_header = std::make_unique<Block>(pipeline.getHeader());
             }
@@ -801,21 +878,18 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run)
 
     is_prepared = true;
 
-    return prepareInterpreterSelectQuery(stages, dry_run);
+    prepareMutationStages(stages, dry_run);
 }
 
-ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> & prepared_stages, bool dry_run)
+void MutationsInterpreter::prepareMutationStages(std::vector<Stage> & prepared_stages, bool dry_run)
 {
-    auto storage_snapshot = storage->getStorageSnapshot(metadata_snapshot, context);
+    auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context);
     auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects();
     auto all_columns = storage_snapshot->getColumns(options);
 
     /// Add _row_exists column if it is present in the part
-    if (auto part_storage = dynamic_pointer_cast<DB::StorageFromMergeTreeDataPart>(storage))
-    {
-        if (part_storage->hasLightweightDeletedMask())
-            all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
-    }
+    if (source.hasLightweightDeleteMask())
+        all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
 
     /// Next, for each stage calculate columns changed by this and previous stages.
     for (size_t i = 0; i < prepared_stages.size(); ++i)
@@ -839,7 +913,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
 
     /// Now, calculate `expressions_chain` for each stage except the first.
     /// Do it backwards to propagate information about columns required as input for a stage to the previous stage.
-    for (size_t i = prepared_stages.size() - 1; i > 0; --i)
+    for (int64_t i = prepared_stages.size() - 1; i >= 0; --i)
     {
         auto & stage = prepared_stages[i];
 
@@ -859,7 +933,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
         /// e.g. ALTER referencing the same table in scalar subquery
         bool execute_scalar_subqueries = !dry_run;
         auto syntax_result = TreeRewriter(context).analyze(
-            all_asts, all_columns, storage, storage_snapshot,
+            all_asts, all_columns, source.getStorage(), storage_snapshot,
             false, true, execute_scalar_subqueries);
 
         if (execute_scalar_subqueries && context->hasQueryContext())
@@ -897,6 +971,9 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
             }
         }
 
+        if (i == 0 && actions_chain.steps.empty())
+            actions_chain.lastStep(syntax_result->required_source_columns);
+
         /// Remove all intermediate columns.
         actions_chain.addStep();
         actions_chain.getLastStep().required_output.clear();
@@ -908,49 +985,198 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector<Stage> &
 
         actions_chain.finalize();
 
-        /// Propagate information about columns needed as input.
-        for (const auto & column : actions_chain.steps.front()->getRequiredColumns())
-            prepared_stages[i - 1].output_columns.insert(column.name);
-    }
-
-    /// Execute first stage as a SELECT statement.
-
-    auto select = std::make_shared<ASTSelectQuery>();
-
-    select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
-    for (const auto & column_name : prepared_stages[0].output_columns)
-        select->select()->children.push_back(std::make_shared<ASTIdentifier>(column_name));
-
-    /// Don't let select list be empty.
-    if (select->select()->children.empty())
-        select->select()->children.push_back(std::make_shared<ASTLiteral>(Field(0)));
-
-    if (!prepared_stages[0].filters.empty())
-    {
-        ASTPtr where_expression;
-        if (prepared_stages[0].filters.size() == 1)
-            where_expression = prepared_stages[0].filters[0];
-        else
+        if (i)
         {
-            auto coalesced_predicates = std::make_shared<ASTFunction>();
-            coalesced_predicates->name = "and";
-            coalesced_predicates->arguments = std::make_shared<ASTExpressionList>();
-            coalesced_predicates->children.push_back(coalesced_predicates->arguments);
-            coalesced_predicates->arguments->children = prepared_stages[0].filters;
-            where_expression = std::move(coalesced_predicates);
+            /// Propagate information about columns needed as input.
+            for (const auto & column : actions_chain.steps.front()->getRequiredColumns())
+                prepared_stages[i - 1].output_columns.insert(column.name);
+        }
+    }
+}
+
+/// This structure re-implements adding virtual columns while reading from MergeTree part.
+/// It would be good to unify it with IMergeTreeSelectAlgorithm.
+struct VirtualColumns
+{
+    struct ColumnAndPosition
+    {
+        ColumnWithTypeAndName column;
+        size_t position;
+    };
+
+    using Columns = std::vector<ColumnAndPosition>;
+
+    Columns virtuals;
+    Names columns_to_read;
+
+    VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns))
+    {
+        for (size_t i = 0; i < columns_to_read.size(); ++i)
+        {
+            if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name)
+            {
+                LoadedMergeTreeDataPartInfoForReader part_info_reader(part);
+                if (!part_info_reader.getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name))
+                {
+                    ColumnWithTypeAndName mask_column;
+                    mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type;
+                    mask_column.column = mask_column.type->createColumnConst(0, 1);
+                    mask_column.name = std::move(columns_to_read[i]);
+
+                    virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i});
+                }
+            }
+            else if (columns_to_read[i] == "_partition_id")
+            {
+                ColumnWithTypeAndName column;
+                column.type = std::make_shared<DataTypeString>();
+                column.column = column.type->createColumnConst(0, part->info.partition_id);
+                column.name = std::move(columns_to_read[i]);
+
+                virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i});
+            }
+        }
+
+        if (!virtuals.empty())
+        {
+            Names columns_no_virtuals;
+            columns_no_virtuals.reserve(columns_to_read.size());
+            size_t next_virtual = 0;
+            for (size_t i = 0; i < columns_to_read.size(); ++i)
+            {
+                if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position)
+                    ++next_virtual;
+                else
+                    columns_no_virtuals.emplace_back(std::move(columns_to_read[i]));
+            }
+
+            columns_to_read.swap(columns_no_virtuals);
         }
-        select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
     }
 
-    return select;
+    void addVirtuals(QueryPlan & plan)
+    {
+        auto dag = std::make_unique<ActionsDAG>(plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+
+        for (auto & column : virtuals)
+        {
+            const auto & adding_const = dag->addColumn(std::move(column.column));
+            auto & outputs = dag->getOutputs();
+            outputs.insert(outputs.begin() + column.position, &adding_const);
+        }
+
+        auto step = std::make_unique<ExpressionStep>(plan.getCurrentDataStream(), std::move(dag));
+        plan.addStep(std::move(step));
+    }
+};
+
+void MutationsInterpreter::Source::read(
+    Stage & first_stage,
+    QueryPlan & plan,
+    const StorageMetadataPtr & snapshot_,
+    const ContextPtr & context_,
+    bool apply_deleted_mask_,
+    bool can_execute_) const
+{
+    auto required_columns = first_stage.expressions_chain.steps.front()->getRequiredColumns().getNames();
+    auto storage_snapshot = getStorageSnapshot(snapshot_, context_);
+
+    if (!can_execute_)
+    {
+        auto header = storage_snapshot->getSampleBlockForColumns(required_columns);
+        auto callback = []()
+        {
+            return DB::Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute a mutation because can_execute flag set to false");
+        };
+
+        Pipe pipe(std::make_shared<ThrowingExceptionSource>(header, callback));
+
+        auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
+        plan.addStep(std::move(read_from_pipe));
+        return;
+    }
+
+    if (data)
+    {
+        const auto & steps = first_stage.expressions_chain.steps;
+        const auto & names = first_stage.filter_column_names;
+        size_t num_filters = names.size();
+
+        ActionsDAGPtr filter;
+        if (!first_stage.filter_column_names.empty())
+        {
+
+            ActionsDAG::NodeRawConstPtrs nodes(num_filters);
+            for (size_t i = 0; i < num_filters; ++i)
+                nodes[i] = &steps[i]->actions()->findInOutputs(names[i]);
+
+            filter = ActionsDAG::buildFilterActionsDAG(nodes, {}, context_);
+        }
+
+        VirtualColumns virtual_columns(std::move(required_columns), part);
+
+        createMergeTreeSequentialSource(
+            plan, *data, storage_snapshot, part, std::move(virtual_columns.columns_to_read), apply_deleted_mask_, filter, context_,
+            &Poco::Logger::get("MutationsInterpreter"));
+
+        virtual_columns.addVirtuals(plan);
+    }
+    else
+    {
+        auto select = std::make_shared<ASTSelectQuery>();
+
+        select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared<ASTExpressionList>());
+        for (const auto & column_name : first_stage.output_columns)
+            select->select()->children.push_back(std::make_shared<ASTIdentifier>(column_name));
+
+        /// Don't let select list be empty.
+        if (select->select()->children.empty())
+            select->select()->children.push_back(std::make_shared<ASTLiteral>(Field(0)));
+
+        if (!first_stage.filters.empty())
+        {
+            ASTPtr where_expression;
+            if (first_stage.filters.size() == 1)
+                where_expression = first_stage.filters[0];
+            else
+            {
+                auto coalesced_predicates = std::make_shared<ASTFunction>();
+                coalesced_predicates->name = "and";
+                coalesced_predicates->arguments = std::make_shared<ASTExpressionList>();
+                coalesced_predicates->children.push_back(coalesced_predicates->arguments);
+                coalesced_predicates->arguments->children = first_stage.filters;
+                where_expression = std::move(coalesced_predicates);
+            }
+            select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression));
+        }
+
+        SelectQueryInfo query_info;
+        query_info.query = std::move(select);
+
+        size_t max_block_size = context_->getSettingsRef().max_block_size;
+        size_t max_streams = 1;
+        storage->read(plan, required_columns, storage_snapshot, query_info, context_, QueryProcessingStage::FetchColumns, max_block_size, max_streams);
+
+        if (!plan.isInitialized())
+        {
+            /// It may be possible when there is nothing to read from storage.
+            auto header = storage_snapshot->getSampleBlockForColumns(required_columns);
+            auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(Pipe(std::make_shared<NullSource>(header)));
+            plan.addStep(std::move(read_from_pipe));
+        }
+    }
+}
+
+void MutationsInterpreter::initQueryPlan(Stage & first_stage, QueryPlan & plan)
+{
+    source.read(first_stage, plan, metadata_snapshot, context, apply_deleted_mask, can_execute);
+    addCreatingSetsStep(plan, first_stage.analyzer->getPreparedSets(), context);
 }
 
 QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::vector<Stage> & prepared_stages, QueryPlan & plan) const
 {
-    for (size_t i_stage = 1; i_stage < prepared_stages.size(); ++i_stage)
+    for (const Stage & stage : prepared_stages)
     {
-        const Stage & stage = prepared_stages[i_stage];
-
         for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i)
         {
             const auto & step = stage.expressions_chain.steps[i];
@@ -988,14 +1214,11 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v
 
 void MutationsInterpreter::validate()
 {
-    if (!select_interpreter)
-        select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context, storage, metadata_snapshot, select_limits);
-
     const Settings & settings = context->getSettingsRef();
 
     /// For Replicated* storages mutations cannot employ non-deterministic functions
     /// because that produces inconsistencies between replicas
-    if (startsWith(storage->getName(), "Replicated") && !settings.allow_nondeterministic_mutations)
+    if (startsWith(source.getStorage()->getName(), "Replicated") && !settings.allow_nondeterministic_mutations)
     {
         for (const auto & command : commands)
         {
@@ -1012,7 +1235,7 @@ void MutationsInterpreter::validate()
     }
 
     QueryPlan plan;
-    select_interpreter->buildQueryPlan(plan);
+    initQueryPlan(stages.front(), plan);
     auto pipeline = addStreamsForLaterStages(stages, plan);
 }
 
@@ -1021,23 +1244,8 @@ QueryPipelineBuilder MutationsInterpreter::execute()
     if (!can_execute)
         throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR);
 
-    if (!select_interpreter)
-    {
-        /// Skip to apply deleted mask for MutateSomePartColumn cases when part has lightweight delete.
-        if (!apply_deleted_mask)
-        {
-            auto context_for_reading = Context::createCopy(context);
-            context_for_reading->setApplyDeletedMask(apply_deleted_mask);
-            select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context_for_reading, storage, metadata_snapshot, select_limits);
-        }
-        else
-            select_interpreter = std::make_unique<InterpreterSelectQuery>(mutation_ast, context, storage, metadata_snapshot, select_limits);
-    }
-
-
     QueryPlan plan;
-    select_interpreter->buildQueryPlan(plan);
-
+    initQueryPlan(stages.front(), plan);
     auto builder = addStreamsForLaterStages(stages, plan);
 
     /// Sometimes we update just part of columns (for example UPDATE mutation)
@@ -1069,11 +1277,7 @@ const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const
 
 size_t MutationsInterpreter::evaluateCommandsSize()
 {
-    for (const MutationCommand & command : commands)
-        if (unlikely(!command.predicate && !command.partition)) /// The command touches all rows.
-            return mutation_ast->size();
-
-    return std::max(prepareQueryAffectedAST(commands, storage, context)->size(), mutation_ast->size());
+    return prepareQueryAffectedAST(commands, source.getStorage(), context)->size();
 }
 
 std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const
@@ -1096,7 +1300,7 @@ std::optional<SortDescription> MutationsInterpreter::getStorageSortDescriptionIf
 
 ASTPtr MutationsInterpreter::getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const
 {
-    return DB::getPartitionAndPredicateExpressionForMutationCommand(command, storage, context);
+    return DB::getPartitionAndPredicateExpressionForMutationCommand(command, source.getStorage(), context);
 }
 
 bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const
diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h
index 336c5f11162..fbcb56fac6f 100644
--- a/src/Interpreters/MutationsInterpreter.h
+++ b/src/Interpreters/MutationsInterpreter.h
@@ -19,7 +19,8 @@ using QueryPipelineBuilderPtr = std::unique_ptr<QueryPipelineBuilder>;
 
 /// Return false if the data isn't going to be changed by mutations.
 bool isStorageTouchedByMutations(
-    const StoragePtr & storage,
+    MergeTreeData & storage,
+    MergeTreeData::DataPartPtr source_part,
     const StorageMetadataPtr & metadata_snapshot,
     const std::vector<MutationCommand> & commands,
     ContextMutablePtr context_copy
@@ -35,6 +36,8 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand(
 /// to this data.
 class MutationsInterpreter
 {
+    struct Stage;
+
 public:
     /// Storage to mutate, array of mutations commands and context. If you really want to execute mutation
     /// use can_execute = true, in other cases (validation, amount of commands) it can be false
@@ -47,8 +50,18 @@ public:
         bool return_all_columns_ = false,
         bool return_deleted_rows_ = false);
 
-    void validate();
+    /// Special case for MergeTree
+    MutationsInterpreter(
+        MergeTreeData & storage_,
+        MergeTreeData::DataPartPtr source_part_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        MutationCommands commands_,
+        ContextPtr context_,
+        bool can_execute_,
+        bool return_all_columns_ = false,
+        bool return_deleted_rows_ = false);
 
+    void validate();
     size_t evaluateCommandsSize();
 
     /// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices.
@@ -82,19 +95,60 @@ public:
 
     void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; }
 
+    /// Internal class which represents a data part for MergeTree
+    /// or just storage for other storages.
+    /// The main idea is to create a dedicated reading from MergeTree part.
+    /// Additionally we propagate some storage properties.
+    struct Source
+    {
+        StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const;
+        StoragePtr getStorage() const;
+        const MergeTreeData * getMergeTreeData() const;
+
+        bool supportsLightweightDelete() const;
+        bool hasLightweightDeleteMask() const;
+        bool materializeTTLRecalculateOnly() const;
+
+        void read(
+            Stage & first_stage,
+            QueryPlan & plan,
+            const StorageMetadataPtr & snapshot_,
+            const ContextPtr & context_,
+            bool apply_deleted_mask_,
+            bool can_execute_) const;
+
+        explicit Source(StoragePtr storage_);
+        Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_);
+
+    private:
+        StoragePtr storage;
+
+        /// Special case for MergeTree.
+        MergeTreeData * data = nullptr;
+        MergeTreeData::DataPartPtr part;
+    };
+
 private:
-    ASTPtr prepare(bool dry_run);
+    MutationsInterpreter(
+        Source source_,
+        const StorageMetadataPtr & metadata_snapshot_,
+        MutationCommands commands_,
+        ContextPtr context_,
+        bool can_execute_,
+        bool return_all_columns_,
+        bool return_deleted_rows_);
 
-    struct Stage;
+    void prepare(bool dry_run);
 
-    ASTPtr prepareInterpreterSelectQuery(std::vector<Stage> &prepared_stages, bool dry_run);
+    void initQueryPlan(Stage & first_stage, QueryPlan & query_plan);
+    void prepareMutationStages(std::vector<Stage> &prepared_stages, bool dry_run);
     QueryPipelineBuilder addStreamsForLaterStages(const std::vector<Stage> & prepared_stages, QueryPlan & plan) const;
 
     std::optional<SortDescription> getStorageSortDescriptionIfPossible(const Block & header) const;
 
     ASTPtr getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const;
 
-    StoragePtr storage;
+    Source source;
     StorageMetadataPtr metadata_snapshot;
     MutationCommands commands;
     ContextPtr context;
@@ -103,12 +157,6 @@ private:
 
     bool apply_deleted_mask = true;
 
-    ASTPtr mutation_ast;
-
-    /// We have to store interpreter because it use own copy of context
-    /// and some streams from execute method may use it.
-    std::unique_ptr<InterpreterSelectQuery> select_interpreter;
-
     /// A sequence of mutation commands is executed as a sequence of stages. Each stage consists of several
     /// filters, followed by updating values of some columns. Commands can reuse expressions calculated by the
     /// previous commands in the same stage, but at the end of each stage intermediate columns are thrown away
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 0d8fe84f9d3..4765b2cbfbe 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -64,7 +64,6 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings(
         .save_marks_in_cache = true,
         .checksum_on_read = settings.checksum_on_read,
         .read_in_order = query_info.input_order_info != nullptr,
-        .apply_deleted_mask = context->applyDeletedMask(),
         .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree
             && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1),
     };
diff --git a/src/Processors/Sources/ThrowingExceptionSource.h b/src/Processors/Sources/ThrowingExceptionSource.h
new file mode 100644
index 00000000000..5abebd89d07
--- /dev/null
+++ b/src/Processors/Sources/ThrowingExceptionSource.h
@@ -0,0 +1,32 @@
+#pragma once
+#include <Processors/ISource.h>
+
+
+namespace DB
+{
+
+/// This source is throwing exception at the first attempt to read from it.
+/// Can be used as a additional check that pipeline (or its part) is never executed.
+class ThrowingExceptionSource : public ISource
+{
+public:
+
+    using CallBack = std::function<Exception()>;
+
+    explicit ThrowingExceptionSource(Block header, CallBack callback_)
+        : ISource(std::move(header))
+        , callback(std::move(callback_))
+    {}
+
+    String getName() const override { return "ThrowingExceptionSource"; }
+
+protected:
+    Chunk generate() override
+    {
+        throw callback();
+    }
+
+    CallBack callback;
+};
+
+}
diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h
index cdf273b47df..7d927b51e5f 100644
--- a/src/Storages/IStorage.h
+++ b/src/Storages/IStorage.h
@@ -110,6 +110,8 @@ public:
     /// The name of the table.
     StorageID getStorageID() const;
 
+    virtual bool isMergeTree() const { return false; }
+
     /// Returns true if the storage receives data from a remote server or servers.
     virtual bool isRemote() const { return false; }
 
diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h
index 670c755cf72..19efd8f908a 100644
--- a/src/Storages/MergeTree/MergeTreeData.h
+++ b/src/Storages/MergeTree/MergeTreeData.h
@@ -424,6 +424,8 @@ public:
 
     StoragePolicyPtr getStoragePolicy() const override;
 
+    bool isMergeTree() const override { return true; }
+
     bool supportsPrewhere() const override { return true; }
 
     bool supportsFinal() const override;
diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
index e302663597d..30d09312245 100644
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h
@@ -66,6 +66,13 @@ public:
         size_t num_streams,
         std::shared_ptr<PartitionIdToMaxBlock> max_block_numbers_to_read = nullptr) const;
 
+    static MarkRanges markRangesFromPKRange(
+        const MergeTreeData::DataPartPtr & part,
+        const StorageMetadataPtr & metadata_snapshot,
+        const KeyCondition & key_condition,
+        const Settings & settings,
+        Poco::Logger * log);
+
 private:
     const MergeTreeData & data;
     Poco::Logger * log;
@@ -78,13 +85,6 @@ private:
         const Settings & settings,
         Poco::Logger * log);
 
-    static MarkRanges markRangesFromPKRange(
-        const MergeTreeData::DataPartPtr & part,
-        const StorageMetadataPtr & metadata_snapshot,
-        const KeyCondition & key_condition,
-        const Settings & settings,
-        Poco::Logger * log);
-
     static MarkRanges filterMarksUsingIndex(
         MergeTreeIndexPtr index_helper,
         MergeTreeIndexConditionPtr condition,
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
index 9e0c96fd88a..4539e0b36c5 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp
@@ -1,9 +1,14 @@
 #include <Storages/MergeTree/MergeTreeSequentialSource.h>
 #include <Storages/MergeTree/MergeTreeBlockReadUtils.h>
 #include <Storages/MergeTree/LoadedMergeTreeDataPartInfoForReader.h>
+#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
 #include <Processors/Transforms/FilterTransform.h>
+#include <Processors/QueryPlan/ISourceStep.h>
+#include <QueryPipeline/QueryPipelineBuilder.h>
 #include <QueryPipeline/Pipe.h>
 #include <Interpreters/Context.h>
+#include <Processors/Sources/NullSource.h>
+#include <Processors/QueryPlan/FilterStep.h>
 
 namespace DB
 {
@@ -25,6 +30,8 @@ public:
         const StorageSnapshotPtr & storage_snapshot_,
         MergeTreeData::DataPartPtr data_part_,
         Names columns_to_read_,
+        std::optional<MarkRanges> mark_ranges_,
+        bool apply_deleted_mask,
         bool read_with_direct_io_,
         bool take_column_types_from_storage,
         bool quiet = false);
@@ -56,6 +63,8 @@ private:
 
     Poco::Logger * log = &Poco::Logger::get("MergeTreeSequentialSource");
 
+    std::optional<MarkRanges> mark_ranges;
+
     std::shared_ptr<MarkCache> mark_cache;
     using MergeTreeReaderPtr = std::unique_ptr<IMergeTreeReader>;
     MergeTreeReaderPtr reader;
@@ -76,6 +85,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
     const StorageSnapshotPtr & storage_snapshot_,
     MergeTreeData::DataPartPtr data_part_,
     Names columns_to_read_,
+    std::optional<MarkRanges> mark_ranges_,
+    bool apply_deleted_mask,
     bool read_with_direct_io_,
     bool take_column_types_from_storage,
     bool quiet)
@@ -85,6 +96,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
     , data_part(std::move(data_part_))
     , columns_to_read(std::move(columns_to_read_))
     , read_with_direct_io(read_with_direct_io_)
+    , mark_ranges(std::move(mark_ranges_))
     , mark_cache(storage.getContext()->getMarkCache())
 {
     if (!quiet)
@@ -126,11 +138,15 @@ MergeTreeSequentialSource::MergeTreeSequentialSource(
     MergeTreeReaderSettings reader_settings =
     {
         .read_settings = read_settings,
-        .save_marks_in_cache = false
+        .save_marks_in_cache = false,
+        .apply_deleted_mask = apply_deleted_mask,
     };
 
+    if (!mark_ranges)
+        mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())});
+
     reader = data_part->getReader(columns_for_reader, storage_snapshot->metadata,
-        MarkRanges{MarkRange(0, data_part->getMarksCount())},
+        *mark_ranges,
         /* uncompressed_cache = */ nullptr, mark_cache.get(), reader_settings, {}, {});
 }
 
@@ -224,8 +240,10 @@ Pipe createMergeTreeSequentialSource(
     if (need_to_filter_deleted_rows)
         columns.emplace_back(LightweightDeleteDescription::FILTER_COLUMN.name);
 
+    bool apply_deleted_mask = false;
+
     auto column_part_source = std::make_shared<MergeTreeSequentialSource>(
-        storage, storage_snapshot, data_part, columns, read_with_direct_io, take_column_types_from_storage, quiet);
+        storage, storage_snapshot, data_part, columns, std::optional<MarkRanges>{}, apply_deleted_mask, read_with_direct_io, take_column_types_from_storage, quiet);
 
     Pipe pipe(std::move(column_part_source));
 
@@ -242,4 +260,92 @@ Pipe createMergeTreeSequentialSource(
     return pipe;
 }
 
+/// A Query Plan step to read from a single Merge Tree part
+/// using Merge Tree Sequential Source (which reads strictly sequentially in a single thread).
+/// This step is used for mutations because the usual reading is too tricky.
+/// Previously, sequential reading was achieved by changing some settings like max_threads,
+/// however, this approach lead to data corruption after some new settings were introduced.
+class ReadFromPart final : public ISourceStep
+{
+public:
+    ReadFromPart(
+        const MergeTreeData & storage_,
+        const StorageSnapshotPtr & storage_snapshot_,
+        MergeTreeData::DataPartPtr data_part_,
+        Names columns_to_read_,
+        bool apply_deleted_mask_,
+        ActionsDAGPtr filter_,
+        ContextPtr context_,
+        Poco::Logger * log_)
+        : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)})
+        , storage(storage_)
+        , storage_snapshot(storage_snapshot_)
+        , data_part(std::move(data_part_))
+        , columns_to_read(std::move(columns_to_read_))
+        , apply_deleted_mask(apply_deleted_mask_)
+        , filter(std::move(filter_))
+        , context(std::move(context_))
+        , log(log_)
+    {
+    }
+
+    String getName() const override { return fmt::format("ReadFromPart({})", data_part->name); }
+
+    void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override
+    {
+        std::optional<MarkRanges> mark_ranges;
+
+        const auto & metadata_snapshot = storage_snapshot->metadata;
+        if (filter && metadata_snapshot->hasPrimaryKey())
+        {
+            const auto & primary_key = storage_snapshot->metadata->getPrimaryKey();
+            const Names & primary_key_column_names = primary_key.column_names;
+            KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression, NameSet{});
+            LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
+
+            if (!key_condition.alwaysFalse())
+                mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange(
+                    data_part, metadata_snapshot, key_condition, context->getSettingsRef(), log);
+
+            if (mark_ranges && mark_ranges->empty())
+            {
+                pipeline.init(Pipe(std::make_unique<NullSource>(output_stream->header)));
+                return;
+            }
+        }
+
+        auto source = std::make_unique<MergeTreeSequentialSource>(
+            storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), apply_deleted_mask, false, true);
+
+        pipeline.init(Pipe(std::move(source)));
+    }
+
+private:
+    const MergeTreeData & storage;
+    StorageSnapshotPtr storage_snapshot;
+    MergeTreeData::DataPartPtr data_part;
+    Names columns_to_read;
+    bool apply_deleted_mask;
+    ActionsDAGPtr filter;
+    ContextPtr context;
+    Poco::Logger * log;
+};
+
+void createMergeTreeSequentialSource(
+    QueryPlan & plan,
+    const MergeTreeData & storage,
+    const StorageSnapshotPtr & storage_snapshot,
+    MergeTreeData::DataPartPtr data_part,
+    Names columns_to_read,
+    bool apply_deleted_mask,
+    ActionsDAGPtr filter,
+    ContextPtr context,
+    Poco::Logger * log)
+{
+    auto reading = std::make_unique<ReadFromPart>(
+        storage, storage_snapshot, std::move(data_part), std::move(columns_to_read), apply_deleted_mask, filter, std::move(context), log);
+
+    plan.addStep(std::move(reading));
+}
+
 }
diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h
index c6c29f9d49a..fb249568e8f 100644
--- a/src/Storages/MergeTree/MergeTreeSequentialSource.h
+++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h
@@ -20,4 +20,17 @@ Pipe createMergeTreeSequentialSource(
     bool quiet,
     std::shared_ptr<std::atomic<size_t>> filtered_rows_count);
 
+class QueryPlan;
+
+void createMergeTreeSequentialSource(
+    QueryPlan & plan,
+    const MergeTreeData & storage,
+    const StorageSnapshotPtr & storage_snapshot,
+    MergeTreeData::DataPartPtr data_part,
+    Names columns_to_read,
+    bool apply_deleted_mask,
+    ActionsDAGPtr filter,
+    ContextPtr context,
+    Poco::Logger * log);
+
 }
diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp
index de68cb6f0ba..3ecb790243d 100644
--- a/src/Storages/MergeTree/MutateTask.cpp
+++ b/src/Storages/MergeTree/MutateTask.cpp
@@ -714,8 +714,6 @@ struct MutationContext
 
     FutureMergedMutatedPartPtr future_part;
     MergeTreeData::DataPartPtr source_part;
-
-    StoragePtr storage_from_source_part;
     StorageMetadataPtr metadata_snapshot;
 
     MutationCommandsConstPtr commands;
@@ -1478,10 +1476,9 @@ MutateTask::MutateTask(
     ctx->storage_columns = metadata_snapshot_->getColumns().getAllPhysical();
     ctx->txn = txn;
     ctx->source_part = ctx->future_part->parts[0];
-    ctx->storage_from_source_part = std::make_shared<StorageFromMergeTreeDataPart>(ctx->source_part);
     ctx->need_prefix = need_prefix_;
 
-    auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_);
+    auto storage_snapshot = ctx->data->getStorageSnapshot(ctx->metadata_snapshot, context_);
     extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false);
 }
 
@@ -1554,7 +1551,7 @@ bool MutateTask::prepare()
     }
 
     if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations(
-        ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading)))
+        *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading)))
     {
         NameSet files_to_copy_instead_of_hardlinks;
         auto settings_ptr = ctx->data->getSettings();
@@ -1597,7 +1594,7 @@ bool MutateTask::prepare()
     if (!ctx->for_interpreter.empty())
     {
         ctx->interpreter = std::make_unique<MutationsInterpreter>(
-            ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true);
+            *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true);
         ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices();
         ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections();
         ctx->mutation_kind = ctx->interpreter->getMutationKind();

From d945b72d6fe01c216e32f505c5bd85220382b4fb Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 10 Jan 2023 15:40:31 +0000
Subject: [PATCH 141/262] Pull SQLancer image before check run

---
 tests/ci/sqlancer_check.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
index ce6d89a7267..b286d1a63bc 100644
--- a/tests/ci/sqlancer_check.py
+++ b/tests/ci/sqlancer_check.py
@@ -29,6 +29,11 @@ from rerun_helper import RerunHelper
 IMAGE_NAME = "clickhouse/sqlancer-test"
 
 
+def get_pull_command(docker_image):
+    return (
+        f"docker pull --network=host {docker_image}"
+    )
+
 def get_run_command(download_url, workspace_path, image):
     return (
         f"docker run "
@@ -92,6 +97,21 @@ if __name__ == "__main__":
     if not os.path.exists(workspace_path):
         os.makedirs(workspace_path)
 
+    pull_command = get_pull_command(docker_image)
+
+    logging.info("Going to pull image %s", pull_command)
+
+    pull_log_path = os.path.join(workspace_path, "pull.log")
+    with open(pull_log_path, "w", encoding="utf-8") as log:
+        with subprocess.Popen(
+            pull_command, shell=True, stderr=log, stdout=log
+        ) as process:
+            retcode = process.wait()
+            if retcode == 0:
+                logging.info("Pull successfully")
+            else:
+                logging.info("Pull failed")
+
     run_command = get_run_command(build_url, workspace_path, docker_image)
     logging.info("Going to run %s", run_command)
 
@@ -124,6 +144,7 @@ if __name__ == "__main__":
 
     paths = [
         run_log_path,
+        pull_log_path,
         os.path.join(workspace_path, "clickhouse-server.log"),
         os.path.join(workspace_path, "stderr.log"),
         os.path.join(workspace_path, "stdout.log"),

From fbba28b31e349450456f144704a3795f78d53707 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Mon, 9 Jan 2023 11:34:47 +0100
Subject: [PATCH 142/262] Analyzer aggregation without column fix

---
 src/Interpreters/ExpressionActions.cpp        |  8 +--
 src/Interpreters/ExpressionActions.h          |  2 +-
 src/Interpreters/TreeRewriter.cpp             |  2 +-
 src/Planner/PlannerJoinTree.cpp               | 61 ++++++++++++++++++-
 .../QueryPlan/ReadFromMergeTree.cpp           |  2 +-
 src/Storages/HDFS/StorageHDFS.cpp             |  2 +-
 src/Storages/StorageFile.cpp                  |  2 +-
 src/Storages/StorageMerge.cpp                 |  4 +-
 ...lyzer_aggregation_without_column.reference |  1 +
 ...21_analyzer_aggregation_without_column.sql | 15 +++++
 10 files changed, 85 insertions(+), 14 deletions(-)
 create mode 100644 tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference
 create mode 100644 tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql

diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp
index d89be9f3e2e..5ea29615942 100644
--- a/src/Interpreters/ExpressionActions.cpp
+++ b/src/Interpreters/ExpressionActions.cpp
@@ -790,10 +790,10 @@ void ExpressionActions::assertDeterministic() const
 }
 
 
-std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
+NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns)
 {
     std::optional<size_t> min_size;
-    String res;
+    NameAndTypePair result;
 
     for (const auto & column : columns)
     {
@@ -807,14 +807,14 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum
         if (!min_size || size < *min_size)
         {
             min_size = size;
-            res = column.name;
+            result = column;
         }
     }
 
     if (!min_size)
         throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR);
 
-    return res;
+    return result;
 }
 
 std::string ExpressionActions::dumpActions() const
diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h
index be63b9e0d78..faefe0985f7 100644
--- a/src/Interpreters/ExpressionActions.h
+++ b/src/Interpreters/ExpressionActions.h
@@ -111,7 +111,7 @@ public:
     std::string dumpActions() const;
     JSONBuilder::ItemPtr toTree() const;
 
-    static std::string getSmallestColumn(const NamesAndTypesList & columns);
+    static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns);
 
     /// Check if column is always zero. True if it's definite, false if we can't say for sure.
     /// Call it only after subqueries for sets were executed.
diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp
index 20c14b8d7b6..a1b3c8011cd 100644
--- a/src/Interpreters/TreeRewriter.cpp
+++ b/src/Interpreters/TreeRewriter.cpp
@@ -1146,7 +1146,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select
             required.insert(std::min_element(columns.begin(), columns.end())->name);
         else if (!source_columns.empty())
             /// If we have no information about columns sizes, choose a column of minimum size of its data type.
-            required.insert(ExpressionActions::getSmallestColumn(source_columns));
+            required.insert(ExpressionActions::getSmallestColumn(source_columns).name);
     }
     else if (is_select && storage_snapshot && !columns_context.has_array_join)
     {
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 3584c9d4caa..999aa32d850 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -81,6 +81,63 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names,
     query_context->checkAccess(AccessType::SELECT, storage_id, column_names);
 }
 
+NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot)
+{
+    /** We need to read at least one column to find the number of rows.
+      * We will find a column with minimum <compressed_size, type_size, uncompressed_size>.
+      * Because it is the column that is cheapest to read.
+      */
+    class ColumnWithSize
+    {
+    public:
+        ColumnWithSize(NameAndTypePair column_, ColumnSize column_size_)
+            : column(std::move(column_))
+            , compressed_size(column_size_.data_compressed)
+            , uncompressed_size(column_size_.data_uncompressed)
+            , type_size(column.type->haveMaximumSizeOfValue() ? column.type->getMaximumSizeOfValueInMemory() : 100)
+        {
+        }
+
+        bool operator<(const ColumnWithSize & rhs) const
+        {
+            return std::tie(compressed_size, type_size, uncompressed_size)
+                < std::tie(rhs.compressed_size, rhs.type_size, rhs.uncompressed_size);
+        }
+
+        NameAndTypePair column;
+        size_t compressed_size = 0;
+        size_t uncompressed_size = 0;
+        size_t type_size = 0;
+    };
+
+    std::vector<ColumnWithSize> columns_with_sizes;
+
+    auto column_sizes = storage->getColumnSizes();
+    auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns());
+
+    if (!column_sizes.empty())
+    {
+        for (auto & column_name_and_type : column_names_and_types)
+        {
+            auto it = column_sizes.find(column_name_and_type.name);
+            if (it == column_sizes.end())
+                continue;
+
+            columns_with_sizes.emplace_back(column_name_and_type, it->second);
+        }
+    }
+
+    NameAndTypePair result;
+
+    if (!columns_with_sizes.empty())
+        result = std::min_element(columns_with_sizes.begin(), columns_with_sizes.end())->column;
+    else
+        /// If we have no information about columns sizes, choose a column of minimum size of its data type
+        result = ExpressionActions::getSmallestColumn(column_names_and_types);
+
+    return result;
+}
+
 QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
     SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
@@ -127,9 +184,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
 
         if (columns_names.empty())
         {
-            auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns());
-            auto additional_column_to_read = column_names_and_types.front();
-
+            auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot);
             const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression);
             columns_names.push_back(additional_column_to_read.name);
             table_expression_data.addColumn(additional_column_to_read, column_identifier);
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index 0d8fe84f9d3..e5ad2729e6c 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -1023,7 +1023,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl(
     if (result.column_names_to_read.empty())
     {
         NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
-        result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
+        result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name);
     }
 
     // storage_snapshot->check(result.column_names_to_read);
diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp
index bbabd523c45..c7008a317c3 100644
--- a/src/Storages/HDFS/StorageHDFS.cpp
+++ b/src/Storages/HDFS/StorageHDFS.cpp
@@ -599,7 +599,7 @@ Pipe StorageHDFS::read(
             { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
 
         if (fetch_columns.empty())
-            fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
+            fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
 
         columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
         block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());
diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp
index 95bd0e7c53e..922754c2d8c 100644
--- a/src/Storages/StorageFile.cpp
+++ b/src/Storages/StorageFile.cpp
@@ -706,7 +706,7 @@ Pipe StorageFile::read(
                 });
 
             if (fetch_columns.empty())
-                fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
+                fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
             columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
         }
         else
diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp
index 79efab9e9d7..3e279b408d7 100644
--- a/src/Storages/StorageMerge.cpp
+++ b/src/Storages/StorageMerge.cpp
@@ -488,7 +488,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu
 
             column_names_as_aliases = alias_actions->getRequiredColumns().getNames();
             if (column_names_as_aliases.empty())
-                column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()));
+                column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name);
         }
 
         auto source_pipeline = createSources(
@@ -574,7 +574,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources(
     {
         /// If there are only virtual columns in query, you must request at least one other column.
         if (real_column_names.empty())
-            real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
+            real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
 
         QueryPlan plan;
         if (StorageView * view = dynamic_cast<StorageView *>(storage.get()))
diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference
new file mode 100644
index 00000000000..d00491fd7e5
--- /dev/null
+++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference
@@ -0,0 +1 @@
+1
diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql
new file mode 100644
index 00000000000..105bce6711c
--- /dev/null
+++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql
@@ -0,0 +1,15 @@
+SET allow_experimental_analyzer = 1;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    c0 String ALIAS c1,
+    c1 String,
+    c2 String,
+) ENGINE = MergeTree ORDER BY c1;
+
+INSERT INTO test_table VALUES ('a', 'b');
+
+SELECT MAX(1) FROM test_table;
+
+DROP TABLE test_table;

From 4571c74fdd4524fb4c7b92ff3b21e40765c4c8fb Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 10 Jan 2023 12:22:33 +0100
Subject: [PATCH 143/262] Fixed build

---
 src/Storages/StorageS3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp
index edd60a364af..9cb992bd24f 100644
--- a/src/Storages/StorageS3.cpp
+++ b/src/Storages/StorageS3.cpp
@@ -1057,7 +1057,7 @@ Pipe StorageS3::read(
             { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); });
 
         if (fetch_columns.empty())
-            fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()));
+            fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name);
 
         columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns);
         block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical());

From ee86afb1256567bb9259106ce1b2116169925a6d Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Tue, 10 Jan 2023 11:14:12 -0500
Subject: [PATCH 144/262] add deltalake

---
 .../table-functions/deltalake.md              | 181 +++---------------
 1 file changed, 24 insertions(+), 157 deletions(-)

diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md
index 7e3fffe4d8b..af944d70426 100644
--- a/docs/en/sql-reference/table-functions/deltalake.md
+++ b/docs/en/sql-reference/table-functions/deltalake.md
@@ -3,182 +3,49 @@ slug: /en/sql-reference/table-functions/deltalake
 sidebar_label: DeltLake
 ---
 
-# DeltaLake Table Function
+# deltaLake Table Function
 
-Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in [Amazon S3](https://aws.amazon.com/s3/).
+Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3.
 
-For example, to query an existing Delta Lake table named `deltalake` in S3:
-```sql
-CREATE TABLE dl_hits
-    ENGINE = DeltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/','','');
-
-SHOW TABLES;
-
-DESCRIBE dl_hits;
-
-SELECT URL, Referer, UserAgent FROM dl_hits WHERE URL IS NOT NULL LIMIT 10;
-
-SELECT URL, Referer, UserAgent FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/') WHERE URL IS NOT NULL LIMIT 10;
-
-```
-
-**Syntax**
+## Syntax
 
 ``` sql
-s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+deltaLake(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
 ```
 
-**Arguments**
+## Arguments
 
--   `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path).
--   `format` — The [format](../../interfaces/formats.md#formats) of the file.
--   `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
--   `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension.
+- `path` — Bucket url with path to existing Delta Lake table in S3.
+- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
+- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
+- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension.
 
 **Returned value**
 
-A table with the specified structure for reading or writing data in the specified file.
+A table with the specified structure for reading data in the specified Delta Lake table in S3.
 
 **Examples**
 
-Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`:
+Selecting rows from the table in S3 `https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/`:
 
 ``` sql
-SELECT *
-FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32')
-LIMIT 2;
+SELECT
+    URL,
+    UserAgent
+FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/')
+WHERE URL IS NOT NULL
+LIMIT 2
 ```
 
-``` text
-┌─column1─┬─column2─┬─column3─┐
-│       1 │       2 │       3 │
-│       3 │       2 │       1 │
-└─────────┴─────────┴─────────┘
+``` response
+┌─URL───────────────────────────────────────────────────────────────────┬─UserAgent─┐
+│ http://auto.ria.ua/search/index.kz/jobinmoscow/detail/55089/hasimages │         1 │
+│ http://auto.ria.ua/search/index.kz/jobinmoscow.ru/gosushi             │         1 │
+└───────────────────────────────────────────────────────────────────────┴───────────┘
 ```
 
-The similar but from file with `gzip` compression:
-
-``` sql
-SELECT *
-FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip')
-LIMIT 2;
-```
-
-``` text
-┌─column1─┬─column2─┬─column3─┐
-│       1 │       2 │       3 │
-│       3 │       2 │       1 │
-└─────────┴─────────┴─────────┘
-```
-
-## Usage
-
-Suppose that we have several files with following URIs on S3:
-
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv'
--   'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv'
-
-Count the amount of rows in files ending with numbers from 1 to 3:
-
-``` sql
-SELECT count(*)
-FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32')
-```
-
-``` text
-┌─count()─┐
-│      18 │
-└─────────┘
-```
-
-Count the total amount of rows in all files in these two directories:
-
-``` sql
-SELECT count(*)
-FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32')
-```
-
-``` text
-┌─count()─┐
-│      24 │
-└─────────┘
-```
-
-:::warning
-If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`.
-:::
-
-Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`:
-
-``` sql
-SELECT count(*)
-FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32');
-```
-
-``` text
-┌─count()─┐
-│      12 │
-└─────────┘
-```
-
-Insert data into file `test-data.csv.gz`:
-
-``` sql
-INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
-VALUES ('test-data', 1), ('test-data-2', 2);
-```
-
-Insert data into file `test-data.csv.gz` from existing table:
-
-``` sql
-INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip')
-SELECT name, value FROM existing_table;
-```
-
-Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively:
-
-``` sql
-SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip');
-```
-
-The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively:
-
-``` sql
-SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip');
-```
-
-## Partitioned Write
-
-If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency.
-
-**Examples**
-
-1. Using partition ID in a key creates separate files:
-
-```sql
-INSERT INTO TABLE FUNCTION
-    s3('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32')
-    PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24);
-```
-As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`.
-
-2. Using partition ID in a bucket name creates files in different buckets:
-
-```sql
-INSERT INTO TABLE FUNCTION
-    s3('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32')
-    PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24);
-```
-As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`.
-
 **See Also**
 
--   [S3 engine](../../engines/table-engines/integrations/s3.md)
+- [deltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
 
-[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/s3/) <!--hide-->

From 7cb3e174191e04f288de69e7fc3e4bea16058335 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 10 Jan 2023 16:17:59 +0000
Subject: [PATCH 145/262] black

---
 tests/ci/sqlancer_check.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
index b286d1a63bc..5b268141484 100644
--- a/tests/ci/sqlancer_check.py
+++ b/tests/ci/sqlancer_check.py
@@ -30,9 +30,8 @@ IMAGE_NAME = "clickhouse/sqlancer-test"
 
 
 def get_pull_command(docker_image):
-    return (
-        f"docker pull --network=host {docker_image}"
-    )
+    return f"docker pull --network=host {docker_image}"
+
 
 def get_run_command(download_url, workspace_path, image):
     return (

From 879ee05218905d1baa1f96e9de2ae4107883c417 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Tue, 10 Jan 2023 11:18:33 -0500
Subject: [PATCH 146/262] fix case of names

---
 docs/en/engines/table-engines/integrations/deltalake.md | 2 +-
 docs/en/sql-reference/table-functions/deltalake.md      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md
index 44407e34e38..5ce044680d4 100644
--- a/docs/en/engines/table-engines/integrations/deltalake.md
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@@ -29,5 +29,5 @@ CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/c
 
 ## See also
 
--  [DeltaLake table function](../../../sql-reference/table-functions/deltalake.md)
+-  [deltaLake table function](../../../sql-reference/table-functions/deltalake.md)
 
diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md
index af944d70426..6468e51d757 100644
--- a/docs/en/sql-reference/table-functions/deltalake.md
+++ b/docs/en/sql-reference/table-functions/deltalake.md
@@ -47,5 +47,5 @@ LIMIT 2
 
 **See Also**
 
-- [deltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
+- [DeltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md)
 

From da4e9c94309620285afde47e290088fc24882692 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Tue, 10 Jan 2023 16:35:46 +0000
Subject: [PATCH 147/262] fix SharedMutex build

---
 src/Common/SharedMutex.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp
index 3a69c106800..31525dbd668 100644
--- a/src/Common/SharedMutex.cpp
+++ b/src/Common/SharedMutex.cpp
@@ -9,6 +9,11 @@
 namespace DB
 {
 
+SharedMutex::SharedMutex()
+    : state(0)
+    , waiters(0)
+{}
+
 void SharedMutex::lock()
 {
     UInt64 value = state.load();

From 71333afd5a9c3d5c62c19066a24c705e5019d382 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 10 Jan 2023 16:40:10 +0000
Subject: [PATCH 148/262] Fixing a test.

---
 src/Interpreters/MutationsInterpreter.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index f8627f1ff85..e95e53db41b 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -523,11 +523,8 @@ void MutationsInterpreter::prepare(bool dry_run)
     NamesAndTypesList all_columns = columns_desc.getAllPhysical();
 
     /// Add _row_exists column if it is physically present in the part
-    if (auto part_storage = dynamic_pointer_cast<DB::StorageFromMergeTreeDataPart>(storage))
-    {
-        if (part_storage->hasLightweightDeletedMask())
+    if (source.hasLightweightDeleteMask())
             all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
-    }
 
     NameSet updated_columns;
     bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly();

From 7701dc571e84626c0bf81ee67e1c4daccd5efaf7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Tue, 10 Jan 2023 18:19:38 +0100
Subject: [PATCH 149/262] Update MutationsInterpreter.cpp

---
 src/Interpreters/MutationsInterpreter.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp
index e95e53db41b..cec03863c69 100644
--- a/src/Interpreters/MutationsInterpreter.cpp
+++ b/src/Interpreters/MutationsInterpreter.cpp
@@ -524,7 +524,7 @@ void MutationsInterpreter::prepare(bool dry_run)
 
     /// Add _row_exists column if it is physically present in the part
     if (source.hasLightweightDeleteMask())
-            all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
+        all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN});
 
     NameSet updated_columns;
     bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly();

From 9a81f27fb22a21be79d36be35b1e28e3be334ed7 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Tue, 10 Jan 2023 17:32:26 +0000
Subject: [PATCH 150/262] Fix additional_table_filters with minmax/count
 projection.

---
 src/Storages/MergeTree/MergeTreeData.cpp                    | 4 ++++
 .../01710_projection_additional_filters.reference           | 1 +
 .../0_stateless/01710_projection_additional_filters.sql     | 6 ++++++
 3 files changed, 11 insertions(+)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 358d527ae28..6bcfe5a35bd 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -5960,6 +5960,10 @@ std::optional<ProjectionCandidate> MergeTreeData::getQueryProcessingStageWithAgg
     if (settings.parallel_replicas_count > 1 || settings.max_parallel_replicas > 1)
         return std::nullopt;
 
+    /// Cannot use projections in case of additional filter.
+    if (query_info.additional_filter_ast)
+        return std::nullopt;
+
     auto query_ptr = query_info.original_query;
     auto * select_query = query_ptr->as<ASTSelectQuery>();
     if (!select_query)
diff --git a/tests/queries/0_stateless/01710_projection_additional_filters.reference b/tests/queries/0_stateless/01710_projection_additional_filters.reference
index 06b63ea6c2f..31b14cf6359 100644
--- a/tests/queries/0_stateless/01710_projection_additional_filters.reference
+++ b/tests/queries/0_stateless/01710_projection_additional_filters.reference
@@ -1 +1,2 @@
 0	0	0
+3
diff --git a/tests/queries/0_stateless/01710_projection_additional_filters.sql b/tests/queries/0_stateless/01710_projection_additional_filters.sql
index 1633b48ba7e..f12d3e2766b 100644
--- a/tests/queries/0_stateless/01710_projection_additional_filters.sql
+++ b/tests/queries/0_stateless/01710_projection_additional_filters.sql
@@ -7,3 +7,9 @@ INSERT INTO t SELECT number % 10, number FROM numbers(10000);
 SELECT count(), min(a), max(a) FROM t SETTINGS additional_table_filters = {'t' : '0'};
 
 DROP TABLE t;
+
+drop table if exists atf_p;
+create table atf_p (x UInt64) engine = MergeTree order by tuple();
+insert into atf_p select number from numbers(10);
+select count() from atf_p settings additional_table_filters = {'atf_p': 'x <= 2'};
+drop table atf_p;

From 6d86b8dd478e71eeeed957bba8f5f4579e89c46e Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Tue, 10 Jan 2023 19:05:02 +0100
Subject: [PATCH 151/262] Fix flaky azure test

---
 .../test.py                                   | 83 ++++++++++---------
 1 file changed, 46 insertions(+), 37 deletions(-)

diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py
index e41529eb385..6c1733fc72f 100644
--- a/tests/integration/test_merge_tree_azure_blob_storage/test.py
+++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py
@@ -42,10 +42,10 @@ def cluster():
 # For inserts there is no guarantee that retries will not result in duplicates.
 # But it is better to retry anyway because 'Connection was closed by the server' error
 # happens in fact only for inserts because reads already have build-in retries in code.
-def azure_query(node, query, try_num=3):
+def azure_query(node, query, try_num=3, settings={}):
     for i in range(try_num):
         try:
-            return node.query(query)
+            return node.query(query, settings=settings)
         except Exception as ex:
             retriable_errors = [
                 "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response"
@@ -80,7 +80,7 @@ def create_table(node, table_name, **additional_settings):
         ORDER BY (dt, id)
         SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}"""
 
-    node.query(f"DROP TABLE IF EXISTS {table_name}")
+    azure_query(node, f"DROP TABLE IF EXISTS {table_name}")
     azure_query(node, create_table_statement)
     assert (
         azure_query(node, f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)"
@@ -230,9 +230,9 @@ def test_alter_table_columns(cluster):
         f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}",
     )
 
-    node.query(f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1")
     # To ensure parts have been merged
-    node.query(f"OPTIMIZE TABLE {TABLE_NAME}")
+    azure_query(node, f"OPTIMIZE TABLE {TABLE_NAME}")
 
     assert (
         azure_query(node, f"SELECT sum(col1) FROM {TABLE_NAME} FORMAT Values")
@@ -245,7 +245,8 @@ def test_alter_table_columns(cluster):
         == "(4096)"
     )
 
-    node.query(
+    azure_query(
+        node,
         f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN col1 String",
         settings={"mutations_sync": 2},
     )
@@ -271,26 +272,27 @@ def test_attach_detach_partition(cluster):
         == "(8192)"
     )
 
-    node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'")
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
         == "(4096)"
     )
 
-    node.query(f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'")
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
         == "(8192)"
     )
 
-    node.query(f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'")
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
         == "(4096)"
     )
 
-    node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'")
-    node.query(
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'")
+    azure_query(
+        node,
         f"ALTER TABLE {TABLE_NAME} DROP DETACHED PARTITION '2020-01-04'",
         settings={"allow_drop_detached": 1},
     )
@@ -314,16 +316,18 @@ def test_move_partition_to_another_disk(cluster):
         == "(8192)"
     )
 
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'",
     )
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
         == "(8192)"
     )
 
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'",
     )
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
@@ -344,14 +348,14 @@ def test_table_manipulations(cluster):
         f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}"
     )
 
-    node.query(f"RENAME TABLE {TABLE_NAME} TO {renamed_table}")
+    azure_query(node, f"RENAME TABLE {TABLE_NAME} TO {renamed_table}")
     assert (
         azure_query(node, f"SELECT count(*) FROM {renamed_table} FORMAT Values")
         == "(8192)"
     )
 
-    node.query(f"RENAME TABLE {renamed_table} TO {TABLE_NAME}")
-    assert node.query(f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)"
+    azure_query(node, f"RENAME TABLE {renamed_table} TO {TABLE_NAME}")
+    assert azure_query(node, f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)"
 
     node.query(f"DETACH TABLE {TABLE_NAME}")
     node.query(f"ATTACH TABLE {TABLE_NAME}")
@@ -360,7 +364,7 @@ def test_table_manipulations(cluster):
         == "(8192)"
     )
 
-    node.query(f"TRUNCATE TABLE {TABLE_NAME}")
+    azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}")
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)"
     )
@@ -395,11 +399,13 @@ def test_move_replace_partition_to_another_table(cluster):
 
     create_table(node, table_clone_name)
 
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}",
     )
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}",
     )
     assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
     assert (
@@ -428,11 +434,13 @@ def test_move_replace_partition_to_another_table(cluster):
         == "(1024)"
     )
 
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}",
     )
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}",
     )
     assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
     assert (
@@ -448,16 +456,16 @@ def test_move_replace_partition_to_another_table(cluster):
         == "(512)"
     )
 
-    node.query(f"DROP TABLE {table_clone_name} NO DELAY")
+    azure_query(node, f"DROP TABLE {table_clone_name} NO DELAY")
     assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)"
     assert (
         azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values")
         == "(1024)"
     )
 
-    node.query(f"ALTER TABLE {TABLE_NAME} FREEZE")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE")
 
-    node.query(f"DROP TABLE {TABLE_NAME} NO DELAY")
+    azure_query(node, f"DROP TABLE {TABLE_NAME} NO DELAY")
 
 
 def test_freeze_unfreeze(cluster):
@@ -470,20 +478,21 @@ def test_freeze_unfreeze(cluster):
     azure_query(
         node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}"
     )
-    node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'")
     azure_query(
         node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}"
     )
-    node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'")
 
     azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}")
 
     # Unfreeze single partition from backup1.
-    node.query(
-        f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'"
+    azure_query(
+        node,
+        f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'",
     )
     # Unfreeze all partitions from backup2.
-    node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'")
+    azure_query(node, f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'")
 
 
 def test_apply_new_settings(cluster):
@@ -524,8 +533,8 @@ def test_big_insert(cluster):
         node,
         f"INSERT INTO {TABLE_NAME} {check_query}",
     )
-    assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query(
-        check_query
+    assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == azure_query(
+        node, check_query
     )
 
     blob_container_client = cluster.blob_service_client.get_container_client(

From 8fa1b070c6a02021b7b3cb858cf7185526720ace Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 10 Jan 2023 19:55:06 +0100
Subject: [PATCH 152/262] minor cleanup in stress/run.sh

---
 docker/test/stress/run.sh | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index f994e6c2269..7f3e551edbc 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -128,18 +128,12 @@ EOL
 
 function stop()
 {
+    local max_tries="${1:-90}"
     local pid
     # Preserve the pid, since the server can hung after the PID will be deleted.
     pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)"
 
-    clickhouse stop $max_tries --do-not-kill && return
-
-    if [ -n "$1" ]
-    then
-        # temporarily disable it in BC check
-        clickhouse stop --force
-        return
-    fi
+    clickhouse stop --max-tries "$max_tries" --do-not-kill && return
 
     # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces.
     kill -TERM "$(pidof gdb)" ||:
@@ -465,7 +459,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then
             clickhouse stop --force
         )
 
-        stop 1
+        # Use bigger timeout for previous version
+        stop 300
         mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log
 
         # Start new server

From 2af17ec1c0c1217ab5fda4cc95248b7e8852ed8b Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Tue, 10 Jan 2023 22:19:58 +0300
Subject: [PATCH 153/262] Update clickhouse-test

---
 tests/clickhouse-test | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index 2709ad1eecf..a5c6b3e0bb8 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -1544,8 +1544,11 @@ def check_server_started(args):
             print(" OK")
             sys.stdout.flush()
             return True
-        except (ConnectionError, http.client.ImproperConnectionState):
-            print(".", end="")
+        except (ConnectionError, http.client.ImproperConnectionState) as e:
+            if args.hung_check:
+                print("Connection error, will retry: ", str(e))
+            else:
+                print(".", end="")
             sys.stdout.flush()
             retry_count -= 1
             sleep(0.5)

From 035dc33707f5663f1612130c078434946932b9c7 Mon Sep 17 00:00:00 2001
From: avogar <pav.cruglov@yandex.ru>
Date: Tue, 10 Jan 2023 19:22:18 +0000
Subject: [PATCH 154/262] Fix builds

---
 src/Core/Settings.h               | 2 +-
 src/Core/SettingsEnums.cpp        | 2 +-
 src/Core/SettingsEnums.h          | 2 +-
 src/IO/ReadBufferFromFileBase.cpp | 1 -
 src/IO/ReadBufferFromFileBase.h   | 2 +-
 5 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index c9ade637340..b8d46244b6c 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -595,7 +595,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value)
     M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \
     M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \
     \
-    M(StorageFileReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
+    M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \
     M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \
     M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \
     M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \
diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp
index 26fcff1d410..3d5326ec0d6 100644
--- a/src/Core/SettingsEnums.cpp
+++ b/src/Core/SettingsEnums.cpp
@@ -163,7 +163,7 @@ IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS,
     {{"clickhouse", Dialect::clickhouse},
      {"kusto", Dialect::kusto}})
 
-IMPLEMENT_SETTING_ENUM(StorageFileReadMethod, ErrorCodes::BAD_ARGUMENTS,
+IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS,
     {{"mmap", LocalFSReadMethod::mmap},
      {"pread", LocalFSReadMethod::pread},
      {"read", LocalFSReadMethod::read}})
diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h
index 236bc7e9b10..8c66c7926a2 100644
--- a/src/Core/SettingsEnums.h
+++ b/src/Core/SettingsEnums.h
@@ -193,5 +193,5 @@ enum class Dialect
 
 DECLARE_SETTING_ENUM(Dialect)
 
-DECLARE_SETTING_ENUM_WITH_RENAME(StorageFileReadMethod, LocalFSReadMethod)
+DECLARE_SETTING_ENUM(LocalFSReadMethod)
 }
diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp
index 7ea16d679bc..d94cf12294b 100644
--- a/src/IO/ReadBufferFromFileBase.cpp
+++ b/src/IO/ReadBufferFromFileBase.cpp
@@ -1,4 +1,3 @@
-#include <IO/Progress.h>
 #include <IO/ReadBufferFromFileBase.h>
 #include <Interpreters/Context.h>
 
diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h
index cc4a131b10b..b77db29bc23 100644
--- a/src/IO/ReadBufferFromFileBase.h
+++ b/src/IO/ReadBufferFromFileBase.h
@@ -3,7 +3,7 @@
 #include <IO/BufferWithOwnMemory.h>
 #include <IO/SeekableReadBuffer.h>
 #include <IO/WithFileName.h>
-#include <Interpreters/Context.h>
+#include <Interpreters/Context_fwd.h>
 #include <base/time.h>
 
 #include <functional>

From 5cf1b1f61df98fb13b84db39a2e159b80061e1a6 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Tue, 10 Jan 2023 16:09:59 -0500
Subject: [PATCH 155/262] feedback

---
 docs/en/engines/table-engines/integrations/deltalake.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md
index 5ce044680d4..eb4d8e934a7 100644
--- a/docs/en/engines/table-engines/integrations/deltalake.md
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@@ -5,7 +5,7 @@ sidebar_label: DeltaLake
 
 # DeltaLake Table Engine
 
-This engine provides a read-only integration with existing Delta Lake tables in Amazon S3.
+This engine provides a read-only integration with existing [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3.
 
 ## Create Table
 

From 75c04945bd5cd80328837a3ed3b9b28efd43f103 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Tue, 10 Jan 2023 16:18:50 -0500
Subject: [PATCH 156/262] spelling

---
 docs/en/sql-reference/table-functions/deltalake.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md
index 6468e51d757..10e7c20e17a 100644
--- a/docs/en/sql-reference/table-functions/deltalake.md
+++ b/docs/en/sql-reference/table-functions/deltalake.md
@@ -1,6 +1,6 @@
 ---
 slug: /en/sql-reference/table-functions/deltalake
-sidebar_label: DeltLake
+sidebar_label: DeltaLake
 ---
 
 # deltaLake Table Function

From 563e0e76f929e1366971ab895db7230657eaf802 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Tue, 10 Jan 2023 16:59:34 -0500
Subject: [PATCH 157/262] init

---
 .../table-engines/integrations/hudi.md        | 33 +++++++++++++++++++
 docs/en/sql-reference/table-functions/hudi.md | 31 +++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 docs/en/engines/table-engines/integrations/hudi.md
 create mode 100644 docs/en/sql-reference/table-functions/hudi.md

diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md
new file mode 100644
index 00000000000..6da1634ba5a
--- /dev/null
+++ b/docs/en/engines/table-engines/integrations/hudi.md
@@ -0,0 +1,33 @@
+---
+slug: /en/engines/table-engines/integrations/hudi
+sidebar_label: Hudi
+---
+
+# Hudi Table Engine
+
+This engine provides a read-only integration with existing Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.
+
+## Create Table
+
+Note that the Hudi table must already exist in S3, this command does not take DDL parameters to create a new table.
+
+``` sql
+CREATE TABLE hudi_table
+    ENGINE = Hudi(path, [aws_access_key_id, aws_secret_access_key,])
+```
+
+**Engine parameters**
+
+-   `path` — Bucket url with the path to an existing Hudi table.
+-   `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
+
+**Example**
+
+```sql
+CREATE TABLE hudi_table ENGINE=Hudi('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123')
+```
+
+## See also
+
+-  [hudi table function](/docs/en/sql-reference/table-functions/hudi.md)
+
diff --git a/docs/en/sql-reference/table-functions/hudi.md b/docs/en/sql-reference/table-functions/hudi.md
new file mode 100644
index 00000000000..c1ccd0cda2f
--- /dev/null
+++ b/docs/en/sql-reference/table-functions/hudi.md
@@ -0,0 +1,31 @@
+---
+slug: /en/sql-reference/table-functions/hudi
+sidebar_label: Hudi
+---
+
+# hudi Table Function
+
+Provides a read-only table-like interface to Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3.
+
+## Syntax
+
+``` sql
+hudi(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+```
+
+## Arguments
+
+- `path` — Bucket url with the path to an existing Hudi table in S3.
+- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
+- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
+- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.
+- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension.
+
+**Returned value**
+
+A table with the specified structure for reading data in the specified Hudi table in S3.
+
+**See Also**
+
+- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md)
+

From e75df5e76b9f4e5dcdd139e56e3cc2a282bc49a8 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Tue, 10 Jan 2023 22:14:54 +0000
Subject: [PATCH 158/262] Test

+ fix some review comments
---
 src/Storages/MergeTree/MergeTreeData.cpp      | 48 ++++++++++---------
 src/Storages/MergeTree/MergeTreeSettings.h    |  1 +
 ...rrect_dealy_for_insert_bug_44902.reference |  6 +++
 ...21_incorrect_dealy_for_insert_bug_44902.sh | 24 ++++++++++
 4 files changed, 57 insertions(+), 22 deletions(-)
 create mode 100644 tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference
 create mode 100755 tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 89d90011398..996da3f4b7e 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3739,7 +3739,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
             toString(parts_count_in_total));
     }
 
-    size_t outdated_parts_over_threshold = [&]() -> size_t
+    size_t outdated_parts_over_threshold = 0;
     {
         size_t outdated_parts_count_in_partition = 0;
         if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0)
@@ -3754,10 +3754,8 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
                 outdated_parts_count_in_partition);
         }
         if (settings->inactive_parts_to_delay_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_delay_insert)
-            return outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1;
-
-        return 0;
-    }();
+            outdated_parts_over_threshold = outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1;
+    }
 
     auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition();
     size_t average_part_size = parts_count_in_partition ? size_of_partition / parts_count_in_partition : 0;
@@ -3765,50 +3763,56 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
         = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert;
     const auto active_parts_to_throw_insert
         = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert;
-    size_t active_parts_over_threshold = [&](size_t parts_count) -> size_t
+    size_t active_parts_over_threshold = 0;
     {
         bool parts_are_large_enough_in_average
             = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts;
 
-        if (parts_count >= active_parts_to_throw_insert && !parts_are_large_enough_in_average)
+        if (parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average)
         {
             ProfileEvents::increment(ProfileEvents::RejectedInserts);
             throw Exception(
                 ErrorCodes::TOO_MANY_PARTS,
                 "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts",
-                parts_count,
+                parts_count_in_partition,
                 ReadableSize(average_part_size));
         }
-        if (active_parts_to_delay_insert > 0 && parts_count >= active_parts_to_delay_insert && !parts_are_large_enough_in_average)
+        if (active_parts_to_delay_insert > 0 && parts_count_in_partition >= active_parts_to_delay_insert
+            && !parts_are_large_enough_in_average)
             /// if parts_count == parts_to_delay_insert -> we're 1 part over threshold
-            return parts_count - active_parts_to_delay_insert + 1;
-
-        return 0;
-    }(parts_count_in_partition);
+            active_parts_over_threshold = parts_count_in_partition - active_parts_to_delay_insert + 1;
+    }
 
     /// no need for delay
     if (!active_parts_over_threshold && !outdated_parts_over_threshold)
         return;
 
-    const UInt64 delay_milliseconds = [&]() -> UInt64
+    UInt64 delay_milliseconds = 0;
     {
-        size_t parts_over_threshold = std::max(active_parts_over_threshold, outdated_parts_over_threshold);
+        size_t parts_over_threshold = 0;
         size_t allowed_parts_over_threshold = 1;
         if (active_parts_over_threshold >= outdated_parts_over_threshold)
+        {
+            parts_over_threshold =  active_parts_over_threshold;
             allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert;
+        }
         else
-            allowed_parts_over_threshold
-                = (settings->inactive_parts_to_throw_insert > 0
-                       ? settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert
-                       : outdated_parts_over_threshold);
+        {
+            parts_over_threshold = outdated_parts_over_threshold;
+            allowed_parts_over_threshold = outdated_parts_over_threshold;
+            if (settings->inactive_parts_to_throw_insert > 0)
+                allowed_parts_over_threshold = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert;
+        }
 
-        chassert(parts_over_threshold <= allowed_parts_over_threshold);
+        chassert(allowed_parts_over_threshold > 0 && parts_over_threshold <= allowed_parts_over_threshold);
 
         const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000);
         double delay_factor = static_cast<double>(parts_over_threshold) / allowed_parts_over_threshold;
+        UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms;
         /// min() as a save guard here
-        return std::min(max_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor));
-    }();
+        delay_milliseconds = std::max(
+            min_delay_milliseconds, std::min(max_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor)));
+    }
 
     ProfileEvents::increment(ProfileEvents::DelayedInserts);
     ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds);
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 37e9bf5779c..0b8188f67c7 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -74,6 +74,7 @@ struct Settings;
     M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \
     M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \
     M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \
+    M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \
     M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \
     \
     /* Part removal settings. */ \
diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference
new file mode 100644
index 00000000000..c104ff58aff
--- /dev/null
+++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference
@@ -0,0 +1,6 @@
+0
+300
+500
+750
+1000
+TOO_MANY_PARTS
diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh
new file mode 100755
index 00000000000..6cbd77b262a
--- /dev/null
+++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh
@@ -0,0 +1,24 @@
+#!/usr/bin/env bash
+
+CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CUR_DIR"/../shell_config.sh
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02521_insert_delay"
+# Create MergeTree with settings which allow to insert maximum 5 parts, on 6th it'll throw TOO_MANY_PARTS
+$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02521_insert_delay (key UInt32, value String) Engine=MergeTree() ORDER BY tuple() SETTINGS parts_to_delay_insert=1, parts_to_throw_insert=5, max_delay_to_insert=1, min_delay_to_insert_ms=300"
+$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES test_02521_insert_delay"
+
+# Every delay is increased by max_delay_to_insert*1000/(parts_to_throw_insert - parts_to_delay_insert + 1), here it's 250ms
+# 0-indexed INSERT - no delay, 1-indexed INSERT - 300ms instead of 250ms due to min_delay_to_insert_ms
+for i in {0..4}
+do
+    query_id="${CLICKHOUSE_DATABASE}_02521_${i}_$RANDOM$RANDOM"
+    $CLICKHOUSE_CLIENT --query_id="$query_id" -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)"
+    $CLICKHOUSE_CLIENT -q "system flush logs"
+    $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select ProfileEvents['DelayedInsertsMilliseconds'] as delay from system.query_log where event_date >= yesterday() and query_id = {query_id:String} order by delay desc limit 1"
+done
+
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS'
+
+$CLICKHOUSE_CLIENT -q "DROP TABLE test_02521_insert_delay"

From a704cf804a222da2c9c1b9a2219659e7fbe7cff7 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Tue, 10 Jan 2023 22:17:28 +0000
Subject: [PATCH 159/262] fix

---
 tests/ci/sqlancer_check.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py
index 5b268141484..0e328122b9d 100644
--- a/tests/ci/sqlancer_check.py
+++ b/tests/ci/sqlancer_check.py
@@ -30,7 +30,7 @@ IMAGE_NAME = "clickhouse/sqlancer-test"
 
 
 def get_pull_command(docker_image):
-    return f"docker pull --network=host {docker_image}"
+    return f"docker pull {docker_image}"
 
 
 def get_run_command(download_url, workspace_path, image):

From 6027b8ee4df1d2327a9b97d712fbd4e9e12fb2a8 Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Tue, 10 Jan 2023 23:23:07 +0000
Subject: [PATCH 160/262] Remove redundant code

---
 .../Passes/AggregateFunctionsArithmericOperationsPass.cpp        | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index c25cff117d2..01072e0b3fc 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -156,7 +156,6 @@ private:
             { argument->getResultType() },
             function_aggregate_function->getParameters(),
             properties);
-        auto function_result_type = aggregate_function->getReturnType();
 
         function_node.resolveAsAggregateFunction(std::move(aggregate_function));
     }

From 9b16b3f48f09d949881ef00ad28a6f466662e334 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 11 Jan 2023 01:03:01 +0100
Subject: [PATCH 161/262] Fix potential memory leak

---
 contrib/azure                              |  2 +-
 src/CMakeLists.txt                         |  5 ++++
 src/Disks/tests/gtest_azure_xml_reader.cpp | 27 ++++++++++++++++++++++
 3 files changed, 33 insertions(+), 1 deletion(-)
 create mode 100644 src/Disks/tests/gtest_azure_xml_reader.cpp

diff --git a/contrib/azure b/contrib/azure
index ef75afc075f..000f7ee8fd2 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit ef75afc075fc71fbcd8fe28dcda3794ae265fd1c
+Subproject commit 000f7ee8fd22fa69e5ddb8fd6fd36b12c7a1bc2f
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d8a7dba72ac..b20b4a860d3 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -606,5 +606,10 @@ if (ENABLE_TESTS)
         target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp)
     endif()
 
+    if (TARGET ch_contrib::azure_sdk)
+        target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::azure_sdk)
+    endif()
+
+
     add_check(unit_tests_dbms)
 endif ()
diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp
new file mode 100644
index 00000000000..3caf34f938a
--- /dev/null
+++ b/src/Disks/tests/gtest_azure_xml_reader.cpp
@@ -0,0 +1,27 @@
+#include <string>
+#include <vector>
+#include <Common/logger_useful.h>
+
+#include "config.h"
+
+#if USE_AZURE_BLOB_STORAGE
+
+#include <azure/storage/blobs.hpp>
+#include <azure/storage/common/internal/xml_wrapper.hpp>
+
+#include <gtest/gtest.h>
+
+
+TEST(AzureXMLWrapper, TestLeak)
+{
+    std::string str = "<hello>world</hello>";
+
+    {
+        Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
+        reader.Read();
+        Azure::Storage::_internal::XmlReader reader2(std::move(reader));
+        Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
+    }
+}
+
+#endif

From be4d79e92426e7d0ac899698986d2e08d7d07c3a Mon Sep 17 00:00:00 2001
From: Zhiguo Zhou <zhiguo.zhou@intel.com>
Date: Wed, 14 Dec 2022 13:25:05 +0800
Subject: [PATCH 162/262] Deallocate memory of profile events out of critical
 section

To further shrink the critical section for releasing memory of the
profile events (ProfileEventsCountersAndMemory), this commit puts
the dealloaction out of the critical section while keeping the
memory move under lock. This change could mitigate the contention
for ThreadGroupStatus::mutex.
---
 src/Interpreters/ThreadStatusExt.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp
index e96a8a4b188..4b757e0be7e 100644
--- a/src/Interpreters/ThreadStatusExt.cpp
+++ b/src/Interpreters/ThreadStatusExt.cpp
@@ -342,11 +342,14 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits)
     query_id.clear();
     query_context.reset();
 
+    /// The memory of thread_group->finished_threads_counters_memory is temporarily moved to this vector, which is deallocated out of critical section.
+    std::vector<ThreadGroupStatus::ProfileEventsCountersAndMemory> move_to_temp;
+
     /// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory
     /// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads())
     {
         std::lock_guard guard(thread_group->mutex);
-        auto stats = std::move(thread_group->finished_threads_counters_memory);
+        move_to_temp = std::move(thread_group->finished_threads_counters_memory);
     }
 
     thread_group.reset();

From 67943676630fb45e8c68b5dfbe199c80031a2cbb Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 11 Jan 2023 10:38:17 +0800
Subject: [PATCH 163/262] fix uts

---
 .../0_stateless/00189_time_zones_long.sql     |  10 +-
 ...00921_datetime64_compatibility_long.python | 136 +++++++++---------
 2 files changed, 77 insertions(+), 69 deletions(-)

diff --git a/tests/queries/0_stateless/00189_time_zones_long.sql b/tests/queries/0_stateless/00189_time_zones_long.sql
index cf1b9e9ae1d..5760f6c0447 100644
--- a/tests/queries/0_stateless/00189_time_zones_long.sql
+++ b/tests/queries/0_stateless/00189_time_zones_long.sql
@@ -120,11 +120,11 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn');
 /* toDayOfWeek */
 
 SELECT 'toDayOfWeek';
-SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul');
-SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris');
-SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London');
-SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo');
-SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn');
+SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Istanbul');
+SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/Paris');
+SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/London');
+SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Tokyo');
+SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Pacific/Pitcairn');
 
 /* toHour */
 
diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
index e3cd7ee6d36..2706c0f5b12 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
@@ -7,14 +7,14 @@ import sys
 import argparse
 
 # Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
-FUNCTIONS="""
+FUNCTIONS = """
 toTimeZone(N, 'UTC')
 toYear(N, 'Asia/Istanbul')
 toQuarter(N, 'Asia/Istanbul')
 toMonth(N, 'Asia/Istanbul')
 toDayOfYear(N, 'Asia/Istanbul')
 toDayOfMonth(N, 'Asia/Istanbul')
-toDayOfWeek(N, 'Asia/Istanbul')
+toDayOfWeek(N, 0, 'Asia/Istanbul')
 toHour(N, 'Asia/Istanbul')
 toMinute(N, 'Asia/Istanbul')
 toSecond(N, 'Asia/Istanbul')
@@ -90,68 +90,51 @@ formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%',
 extra_ops = [
     # With same type:
     (
-        ['N {op} N'],
+        ["N {op} N"],
         {
-            'op':
-            [
-                '- ', # does not work, but should it?
-                '+ ', # does not work, but should it?
-                '!=', '==', # equality and inequality supposed to take sub-second part in account
-                '< ',
-                '<=',
-                '> ',
-                '>='
+            "op": [
+                "- ",  # does not work, but should it?
+                "+ ",  # does not work, but should it?
+                "!=",
+                "==",  # equality and inequality supposed to take sub-second part in account
+                "< ",
+                "<=",
+                "> ",
+                ">=",
             ]
-        }
+        },
     ),
     # With other DateTime types:
     (
-        [
-            'N {op} {arg}',
-            '{arg} {op} N'
-        ],
+        ["N {op} {arg}", "{arg} {op} N"],
         {
-            'op':
-            [
-                '-', # does not work, but should it?
-                '!=', '==',
+            "op": [
+                "-",  # does not work, but should it?
+                "!=",
+                "==",
                 # these are naturally expected to work, but they don't:
-                '< ',
-                '<=',
-                '> ',
-                '>='
+                "< ",
+                "<=",
+                "> ",
+                ">=",
             ],
-            'arg': ['DT', 'D', 'DT64'],
-        }
+            "arg": ["DT", "D", "DT64"],
+        },
     ),
     # With arithmetic types
     (
-        [
-            'N {op} {arg}',
-            '{arg} {op} N'
-        ],
+        ["N {op} {arg}", "{arg} {op} N"],
         {
-            'op':
-            [
-                '+ ',
-                '- ',
-                '==',
-                '!=',
-                '< ',
-                '<=',
-                '> ',
-                '>='
-            ],
-            'arg':
-            [
-                'toUInt8(1)',
-                'toInt8(-1)',
-                'toUInt16(1)',
-                'toInt16(-1)',
-                'toUInt32(1)',
-                'toInt32(-1)',
-                'toUInt64(1)',
-                'toInt64(-1)'
+            "op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="],
+            "arg": [
+                "toUInt8(1)",
+                "toInt8(-1)",
+                "toUInt16(1)",
+                "toInt16(-1)",
+                "toUInt32(1)",
+                "toInt32(-1)",
+                "toUInt64(1)",
+                "toInt64(-1)",
             ],
         },
     ),
@@ -167,14 +150,17 @@ for funcs, args in extra_ops:
 
 # filter out empty lines and commented out lines
 COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
-FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
-TYPES = ['D', 'DT', 'DT64']
+FUNCTIONS = list(
+    [f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]
+)
+TYPES = ["D", "DT", "DT64"]
+
 
 def escape_string(s):
     if sys.version_info[0] > 2:
-        return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
+        return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'")
     else:
-        return s.encode('string-escape').decode('utf-8')
+        return s.encode("string-escape").decode("utf-8")
 
 
 def execute_functions_for_types(functions, types):
@@ -186,18 +172,39 @@ def execute_functions_for_types(functions, types):
 WITH \
 toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
 toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
-toDate('2019-09-16') as D, {X} as N".format(X=dt)
-            print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
+toDate('2019-09-16') as D, {X} as N".format(
+                X=dt
+            )
+            print(
+                (
+                    """{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(
+                        prologue=prologue, func=func
+                    )
+                )
+            )
         print("""SELECT '------------------------------------------';""")
 
+
 def main():
     def parse_args():
         parser = argparse.ArgumentParser()
-        parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
-        parser.add_argument('--types_re',
-                type=lambda s: re.compile('^(' + s + ')$'),
-                help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
-        parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
+        parser.add_argument(
+            "--functions_re",
+            type=re.compile,
+            help="RE to enable functions",
+            default=None,
+        )
+        parser.add_argument(
+            "--types_re",
+            type=lambda s: re.compile("^(" + s + ")$"),
+            help="RE to enable types, supported types: " + ",".join(TYPES),
+            default=None,
+        )
+        parser.add_argument(
+            "--list_functions",
+            action="store_true",
+            help="List all functions to be tested and exit",
+        )
         return parser.parse_args()
 
     args = parse_args()
@@ -223,5 +230,6 @@ def main():
 
     execute_functions_for_types(functions, types)
 
-if __name__ == '__main__':
+
+if __name__ == "__main__":
     exit(main())

From 1167ae47e95fcc80da5b793c1dddf004b0e9ef0b Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganthan@ibm.com>
Date: Tue, 10 Jan 2023 20:56:16 -0800
Subject: [PATCH 164/262] Changes to support the CRC32 in PowerPC to address
 the WeakHash collision issue. Update the reference to support the hash values
 based on the specific platform

---
 .gitmodules                                   |    3 +
 contrib/CMakeLists.txt                        |    1 +
 contrib/crc32-vpmsum                          |    1 +
 contrib/crc32-vpmsum-cmake/CMakeLists.txt     |   12 +
 contrib/crc32-vpmsum-cmake/README.md          |    8 +
 contrib/crc32-vpmsum-cmake/crc32_constants.h  | 1206 +++++++++++++++++
 contrib/crc32-vpmsum-cmake/vec_crc32.h        |   29 +
 src/CMakeLists.txt                            |    4 +
 src/Common/HashTable/Hash.h                   |    8 +
 src/Functions/CMakeLists.txt                  |    4 +
 src/Functions/FunctionsStringHash.cpp         |   12 +
 src/Functions/FunctionsStringSimilarity.cpp   |    6 +
 .../01016_simhash_minhash.ppc64le.reference   |  148 ++
 .../0_stateless/01016_simhash_minhash.python  |  394 ++++++
 .../0_stateless/01016_simhash_minhash.sh      |    8 +
 .../0_stateless/01016_simhash_minhash.sql     |  115 --
 ...=> 01016_simhash_minhash.x86_64.reference} |    0
 17 files changed, 1844 insertions(+), 115 deletions(-)
 create mode 160000 contrib/crc32-vpmsum
 create mode 100644 contrib/crc32-vpmsum-cmake/CMakeLists.txt
 create mode 100644 contrib/crc32-vpmsum-cmake/README.md
 create mode 100644 contrib/crc32-vpmsum-cmake/crc32_constants.h
 create mode 100644 contrib/crc32-vpmsum-cmake/vec_crc32.h
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.python
 create mode 100755 tests/queries/0_stateless/01016_simhash_minhash.sh
 delete mode 100644 tests/queries/0_stateless/01016_simhash_minhash.sql
 rename tests/queries/0_stateless/{01016_simhash_minhash.reference => 01016_simhash_minhash.x86_64.reference} (100%)

diff --git a/.gitmodules b/.gitmodules
index 26824cb57ff..b4673f113b7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -327,3 +327,6 @@
 [submodule "contrib/aws-s2n-tls"]
 	path = contrib/aws-s2n-tls
 	url = https://github.com/ClickHouse/s2n-tls
+[submodule "contrib/crc32-vpmsum"]
+	path = contrib/crc32-vpmsum
+	url = https://github.com/antonblanchard/crc32-vpmsum.git
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 27b4a7ddb5c..f5d1315cc02 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -55,6 +55,7 @@ else ()
 endif ()
 add_contrib (miniselect-cmake miniselect)
 add_contrib (pdqsort-cmake pdqsort)
+add_contrib (crc32-vpmsum-cmake crc32-vpmsum)
 add_contrib (sparsehash-c11-cmake sparsehash-c11)
 add_contrib (abseil-cpp-cmake abseil-cpp)
 add_contrib (magic-enum-cmake magic_enum)
diff --git a/contrib/crc32-vpmsum b/contrib/crc32-vpmsum
new file mode 160000
index 00000000000..45215543938
--- /dev/null
+++ b/contrib/crc32-vpmsum
@@ -0,0 +1 @@
+Subproject commit 452155439389311fc7d143621eaf56a258e02476
diff --git a/contrib/crc32-vpmsum-cmake/CMakeLists.txt b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..bb7d5618410
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
@@ -0,0 +1,12 @@
+if (NOT ARCH_PPC64LE)
+    message(STATUS "crc32-vpmsum library is only supported on ppc64le")
+    return()
+endif()
+
+SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/crc32-vpmsum")
+
+add_library(_crc32-vpmsum 
+	"${LIBRARY_DIR}/vec_crc32.c"
+	)
+target_include_directories(_crc32-vpmsum SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
+add_library(ch_contrib::crc32-vpmsum ALIAS _crc32-vpmsum)
diff --git a/contrib/crc32-vpmsum-cmake/README.md b/contrib/crc32-vpmsum-cmake/README.md
new file mode 100644
index 00000000000..9ea8133e331
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/README.md
@@ -0,0 +1,8 @@
+# To Generate crc32_constants.h 
+
+- Run make file in `../crc32-vpmsum` diretory using folling options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions
+```bash
+make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c"
+```
+- move the generated `crc32_constants.h` into this directory
+- To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/
diff --git a/contrib/crc32-vpmsum-cmake/crc32_constants.h b/contrib/crc32-vpmsum-cmake/crc32_constants.h
new file mode 100644
index 00000000000..aea525c9038
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/crc32_constants.h
@@ -0,0 +1,1206 @@
+/*
+*
+* THIS FILE IS GENERATED WITH
+./crc32_constants -x -r -c 0x11EDC6F41
+
+* This is from https://github.com/antonblanchard/crc32-vpmsum/
+* DO NOT MODIFY IT MANUALLY!
+*
+*/
+
+#define CRC 0x1edc6f41
+#define CRC_XOR
+#define REFLECT
+#define MAX_SIZE    32768
+
+#ifndef __ASSEMBLER__
+#ifdef CRC_TABLE
+static const unsigned int crc_table[] = {
+	0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
+	0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+	0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+	0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+	0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
+	0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+	0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
+	0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+	0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+	0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+	0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
+	0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+	0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
+	0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+	0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+	0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+	0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
+	0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+	0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
+	0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+	0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+	0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+	0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
+	0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+	0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
+	0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+	0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+	0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+	0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
+	0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+	0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
+	0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+	0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+	0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+	0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
+	0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+	0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
+	0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+	0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+	0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+	0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
+	0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+	0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
+	0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+	0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+	0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+	0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
+	0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+	0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
+	0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+	0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+	0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+	0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
+	0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+	0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
+	0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+	0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+	0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+	0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
+	0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+	0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
+	0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+	0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+	0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
+
+#endif /* CRC_TABLE */
+#ifdef POWER8_INTRINSICS
+
+/* Constants */
+
+/* Reduce 262144 kbits to 1024 bits */
+static const __vector unsigned long long vcrc_const[255]
+	__attribute__((aligned (16))) = {
+#ifdef __LITTLE_ENDIAN__
+		/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+		{ 0x000000009c37c408, 0x00000000b6ca9e20 },
+		/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+		{ 0x00000001b51df26c, 0x00000000350249a8 },
+		/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+		{ 0x000000000724b9d0, 0x00000001862dac54 },
+		/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+		{ 0x00000001c00532fe, 0x00000001d87fb48c },
+		/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+		{ 0x00000000f05a9362, 0x00000001f39b699e },
+		/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+		{ 0x00000001e1007970, 0x0000000101da11b4 },
+		/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+		{ 0x00000000a57366ee, 0x00000001cab571e0 },
+		/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+		{ 0x0000000192011284, 0x00000000c7020cfe },
+		/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+		{ 0x0000000162716d9a, 0x00000000cdaed1ae },
+		/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+		{ 0x00000000cd97ecde, 0x00000001e804effc },
+		/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+		{ 0x0000000058812bc0, 0x0000000077c3ea3a },
+		/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+		{ 0x0000000088b8c12e, 0x0000000068df31b4 },
+		/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+		{ 0x00000001230b234c, 0x00000000b059b6c2 },
+		/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+		{ 0x00000001120b416e, 0x0000000145fb8ed8 },
+		/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+		{ 0x00000001974aecb0, 0x00000000cbc09168 },
+		/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+		{ 0x000000008ee3f226, 0x000000005ceeedc2 },
+		/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+		{ 0x00000001089aba9a, 0x0000000047d74e86 },
+		/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+		{ 0x0000000065113872, 0x00000001407e9e22 },
+		/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+		{ 0x000000005c07ec10, 0x00000001da967bda },
+		/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+		{ 0x0000000187590924, 0x000000006c898368 },
+		/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+		{ 0x00000000e35da7c6, 0x00000000f2d14c98 },
+		/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+		{ 0x000000000415855a, 0x00000001993c6ad4 },
+		/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+		{ 0x0000000073617758, 0x000000014683d1ac },
+		/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+		{ 0x0000000176021d28, 0x00000001a7c93e6c },
+		/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+		{ 0x00000001c358fd0a, 0x000000010211e90a },
+		/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+		{ 0x00000001ff7a2c18, 0x000000001119403e },
+		/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+		{ 0x00000000f2d9f7e4, 0x000000001c3261aa },
+		/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+		{ 0x000000016cf1f9c8, 0x000000014e37a634 },
+		/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+		{ 0x000000010af9279a, 0x0000000073786c0c },
+		/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+		{ 0x0000000004f101e8, 0x000000011dc037f8 },
+		/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+		{ 0x0000000070bcf184, 0x0000000031433dfc },
+		/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+		{ 0x000000000a8de642, 0x000000009cde8348 },
+		/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+		{ 0x0000000062ea130c, 0x0000000038d3c2a6 },
+		/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+		{ 0x00000001eb31cbb2, 0x000000011b25f260 },
+		/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+		{ 0x0000000170783448, 0x000000001629e6f0 },
+		/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+		{ 0x00000001a684b4c6, 0x0000000160838b4c },
+		/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+		{ 0x00000000253ca5b4, 0x000000007a44011c },
+		/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+		{ 0x0000000057b4b1e2, 0x00000000226f417a },
+		/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+		{ 0x00000000b6bd084c, 0x0000000045eb2eb4 },
+		/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+		{ 0x0000000123c2d592, 0x000000014459d70c },
+		/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+		{ 0x00000000159dafce, 0x00000001d406ed82 },
+		/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+		{ 0x0000000127e1a64e, 0x0000000160c8e1a8 },
+		/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+		{ 0x0000000056860754, 0x0000000027ba8098 },
+		/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+		{ 0x00000001e661aae8, 0x000000006d92d018 },
+		/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+		{ 0x00000000f82c6166, 0x000000012ed7e3f2 },
+		/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+		{ 0x00000000c4f9c7ae, 0x000000002dc87788 },
+		/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+		{ 0x0000000074203d20, 0x0000000018240bb8 },
+		/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+		{ 0x0000000198173052, 0x000000001ad38158 },
+		/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+		{ 0x00000001ce8aba54, 0x00000001396b78f2 },
+		/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+		{ 0x00000001850d5d94, 0x000000011a681334 },
+		/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+		{ 0x00000001d609239c, 0x000000012104732e },
+		/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+		{ 0x000000001595f048, 0x00000000a140d90c },
+		/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+		{ 0x0000000042ccee08, 0x00000001b7215eda },
+		/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+		{ 0x000000010a389d74, 0x00000001aaf1df3c },
+		/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+		{ 0x000000012a840da6, 0x0000000029d15b8a },
+		/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+		{ 0x000000001d181c0c, 0x00000000f1a96922 },
+		/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+		{ 0x0000000068b7d1f6, 0x00000001ac80d03c },
+		/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+		{ 0x000000005b0f14fc, 0x000000000f11d56a },
+		/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+		{ 0x0000000179e9e730, 0x00000001f1c022a2 },
+		/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+		{ 0x00000001ce1368d6, 0x0000000173d00ae2 },
+		/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+		{ 0x0000000112c3a84c, 0x00000001d4ffe4ac },
+		/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+		{ 0x00000000de940fee, 0x000000016edc5ae4 },
+		/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+		{ 0x00000000fe896b7e, 0x00000001f1a02140 },
+		/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+		{ 0x00000001f797431c, 0x00000000ca0b28a0 },
+		/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+		{ 0x0000000053e989ba, 0x00000001928e30a2 },
+		/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+		{ 0x000000003920cd16, 0x0000000097b1b002 },
+		/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+		{ 0x00000001e6f579b8, 0x00000000b15bf906 },
+		/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+		{ 0x000000007493cb0a, 0x00000000411c5d52 },
+		/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+		{ 0x00000001bdd376d8, 0x00000001c36f3300 },
+		/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+		{ 0x000000016badfee6, 0x00000001119227e0 },
+		/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+		{ 0x0000000071de5c58, 0x00000000114d4702 },
+		/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+		{ 0x00000000453f317c, 0x00000000458b5b98 },
+		/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+		{ 0x0000000121675cce, 0x000000012e31fb8e },
+		/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+		{ 0x00000001f409ee92, 0x000000005cf619d8 },
+		/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+		{ 0x00000000f36b9c88, 0x0000000063f4d8b2 },
+		/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+		{ 0x0000000036b398f4, 0x000000004138dc8a },
+		/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+		{ 0x00000001748f9adc, 0x00000001d29ee8e0 },
+		/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+		{ 0x00000001be94ec00, 0x000000006a08ace8 },
+		/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+		{ 0x00000000b74370d6, 0x0000000127d42010 },
+		/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+		{ 0x00000001174d0b98, 0x0000000019d76b62 },
+		/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+		{ 0x00000000befc06a4, 0x00000001b1471f6e },
+		/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+		{ 0x00000001ae125288, 0x00000001f64c19cc },
+		/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+		{ 0x0000000095c19b34, 0x00000000003c0ea0 },
+		/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+		{ 0x00000001a78496f2, 0x000000014d73abf6 },
+		/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+		{ 0x00000001ac5390a0, 0x00000001620eb844 },
+		/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+		{ 0x000000002a80ed6e, 0x0000000147655048 },
+		/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+		{ 0x00000001fa9b0128, 0x0000000067b5077e },
+		/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+		{ 0x00000001ea94929e, 0x0000000010ffe206 },
+		/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+		{ 0x0000000125f4305c, 0x000000000fee8f1e },
+		/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+		{ 0x00000001471e2002, 0x00000001da26fbae },
+		/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+		{ 0x0000000132d2253a, 0x00000001b3a8bd88 },
+		/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+		{ 0x00000000f26b3592, 0x00000000e8f3898e },
+		/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+		{ 0x00000000bc8b67b0, 0x00000000b0d0d28c },
+		/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+		{ 0x000000013a826ef2, 0x0000000030f2a798 },
+		/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+		{ 0x0000000081482c84, 0x000000000fba1002 },
+		/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+		{ 0x00000000e77307c2, 0x00000000bdb9bd72 },
+		/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+		{ 0x00000000d4a07ec8, 0x0000000075d3bf5a },
+		/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+		{ 0x0000000017102100, 0x00000000ef1f98a0 },
+		/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+		{ 0x00000000db406486, 0x00000000689c7602 },
+		/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+		{ 0x0000000192db7f88, 0x000000016d5fa5fe },
+		/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+		{ 0x000000018bf67b1e, 0x00000001d0d2b9ca },
+		/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+		{ 0x000000007c09163e, 0x0000000041e7b470 },
+		/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+		{ 0x000000000adac060, 0x00000001cbb6495e },
+		/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+		{ 0x00000000bd8316ae, 0x000000010052a0b0 },
+		/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+		{ 0x000000019f09ab54, 0x00000001d8effb5c },
+		/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+		{ 0x0000000125155542, 0x00000001d969853c },
+		/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+		{ 0x000000018fdb5882, 0x00000000523ccce2 },
+		/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+		{ 0x00000000e794b3f4, 0x000000001e2436bc },
+		/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+		{ 0x000000016f9bb022, 0x00000000ddd1c3a2 },
+		/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+		{ 0x00000000290c9978, 0x0000000019fcfe38 },
+		/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+		{ 0x0000000083c0f350, 0x00000001ce95db64 },
+		/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+		{ 0x0000000173ea6628, 0x00000000af582806 },
+		/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+		{ 0x00000001c8b4e00a, 0x00000001006388f6 },
+		/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+		{ 0x00000000de95d6aa, 0x0000000179eca00a },
+		/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+		{ 0x000000010b7f7248, 0x0000000122410a6a },
+		/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+		{ 0x00000001326e3a06, 0x000000004288e87c },
+		/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+		{ 0x00000000bb62c2e6, 0x000000016c5490da },
+		/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+		{ 0x0000000156a4b2c2, 0x00000000d1c71f6e },
+		/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+		{ 0x000000011dfe763a, 0x00000001b4ce08a6 },
+		/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+		{ 0x000000007bcca8e2, 0x00000001466ba60c },
+		/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+		{ 0x0000000186118faa, 0x00000001f6c488a4 },
+		/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+		{ 0x0000000111a65a88, 0x000000013bfb0682 },
+		/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+		{ 0x000000003565e1c4, 0x00000000690e9e54 },
+		/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+		{ 0x000000012ed02a82, 0x00000000281346b6 },
+		/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+		{ 0x00000000c486ecfc, 0x0000000156464024 },
+		/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+		{ 0x0000000001b951b2, 0x000000016063a8dc },
+		/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+		{ 0x0000000048143916, 0x0000000116a66362 },
+		/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+		{ 0x00000001dc2ae124, 0x000000017e8aa4d2 },
+		/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+		{ 0x00000001416c58d6, 0x00000001728eb10c },
+		/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+		{ 0x00000000a479744a, 0x00000001b08fd7fa },
+		/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+		{ 0x0000000096ca3a26, 0x00000001092a16e8 },
+		/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+		{ 0x00000000ff223d4e, 0x00000000a505637c },
+		/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+		{ 0x000000010e84da42, 0x00000000d94869b2 },
+		/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+		{ 0x00000001b61ba3d0, 0x00000001c8b203ae },
+		/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+		{ 0x00000000680f2de8, 0x000000005704aea0 },
+		/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+		{ 0x000000008772a9a8, 0x000000012e295fa2 },
+		/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+		{ 0x0000000155f295bc, 0x000000011d0908bc },
+		/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+		{ 0x00000000595f9282, 0x0000000193ed97ea },
+		/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+		{ 0x0000000164b1c25a, 0x000000013a0f1c52 },
+		/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+		{ 0x00000000fbd67c50, 0x000000010c2c40c0 },
+		/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+		{ 0x0000000096076268, 0x00000000ff6fac3e },
+		/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+		{ 0x00000001d288e4cc, 0x000000017b3609c0 },
+		/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+		{ 0x00000001eaac1bdc, 0x0000000088c8c922 },
+		/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+		{ 0x00000001f1ea39e2, 0x00000001751baae6 },
+		/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+		{ 0x00000001eb6506fc, 0x0000000107952972 },
+		/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+		{ 0x000000010f806ffe, 0x0000000162b00abe },
+		/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+		{ 0x000000010408481e, 0x000000000d7b404c },
+		/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+		{ 0x0000000188260534, 0x00000000763b13d4 },
+		/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+		{ 0x0000000058fc73e0, 0x00000000f6dc22d8 },
+		/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+		{ 0x00000000391c59b8, 0x000000007daae060 },
+		/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+		{ 0x000000018b638400, 0x000000013359ab7c },
+		/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+		{ 0x000000011738f5c4, 0x000000008add438a },
+		/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+		{ 0x000000008cf7c6da, 0x00000001edbefdea },
+		/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+		{ 0x00000001ef97fb16, 0x000000004104e0f8 },
+		/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+		{ 0x0000000102130e20, 0x00000000b48a8222 },
+		/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+		{ 0x00000000db968898, 0x00000001bcb46844 },
+		/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+		{ 0x00000000b5047b5e, 0x000000013293ce0a },
+		/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+		{ 0x000000010b90fdb2, 0x00000001710d0844 },
+		/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+		{ 0x000000004834a32e, 0x0000000117907f6e },
+		/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+		{ 0x0000000059c8f2b0, 0x0000000087ddf93e },
+		/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+		{ 0x0000000122cec508, 0x000000005970e9b0 },
+		/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+		{ 0x000000000a330cda, 0x0000000185b2b7d0 },
+		/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+		{ 0x000000014a47148c, 0x00000001dcee0efc },
+		/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+		{ 0x0000000042c61cb8, 0x0000000030da2722 },
+		/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+		{ 0x0000000012fe6960, 0x000000012f925a18 },
+		/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+		{ 0x00000000dbda2c20, 0x00000000dd2e357c },
+		/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+		{ 0x000000011122410c, 0x00000000071c80de },
+		/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+		{ 0x00000000977b2070, 0x000000011513140a },
+		/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+		{ 0x000000014050438e, 0x00000001df876e8e },
+		/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+		{ 0x0000000147c840e8, 0x000000015f81d6ce },
+		/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+		{ 0x00000001cc7c88ce, 0x000000019dd94dbe },
+		/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+		{ 0x00000001476b35a4, 0x00000001373d206e },
+		/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+		{ 0x000000013d52d508, 0x00000000668ccade },
+		/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+		{ 0x000000008e4be32e, 0x00000001b192d268 },
+		/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+		{ 0x00000000024120fe, 0x00000000e30f3a78 },
+		/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+		{ 0x00000000ddecddb4, 0x000000010ef1f7bc },
+		/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+		{ 0x00000000d4d403bc, 0x00000001f5ac7380 },
+		/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+		{ 0x00000001734b89aa, 0x000000011822ea70 },
+		/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+		{ 0x000000010e7a58d6, 0x00000000c3a33848 },
+		/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+		{ 0x00000001f9f04e9c, 0x00000001bd151c24 },
+		/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+		{ 0x00000000b692225e, 0x0000000056002d76 },
+		/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+		{ 0x000000019b8d3f3e, 0x000000014657c4f4 },
+		/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+		{ 0x00000001a874f11e, 0x0000000113742d7c },
+		/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+		{ 0x000000010d5a4254, 0x000000019c5920ba },
+		/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+		{ 0x00000000bbb2f5d6, 0x000000005216d2d6 },
+		/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+		{ 0x0000000179cc0e36, 0x0000000136f5ad8a },
+		/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+		{ 0x00000001dca1da4a, 0x000000018b07beb6 },
+		/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+		{ 0x00000000feb1a192, 0x00000000db1e93b0 },
+		/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+		{ 0x00000000d1eeedd6, 0x000000000b96fa3a },
+		/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+		{ 0x000000008fad9bb4, 0x00000001d9968af0 },
+		/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+		{ 0x00000001884938e4, 0x000000000e4a77a2 },
+		/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+		{ 0x00000001bc2e9bc0, 0x00000000508c2ac8 },
+		/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+		{ 0x00000001f9658a68, 0x0000000021572a80 },
+		/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+		{ 0x000000001b9224fc, 0x00000001b859daf2 },
+		/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+		{ 0x0000000055b2fb84, 0x000000016f788474 },
+		/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+		{ 0x000000018b090348, 0x00000001b438810e },
+		/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+		{ 0x000000011ccbd5ea, 0x0000000095ddc6f2 },
+		/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+		{ 0x0000000007ae47f8, 0x00000001d977c20c },
+		/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+		{ 0x0000000172acbec0, 0x00000000ebedb99a },
+		/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+		{ 0x00000001c6e3ff20, 0x00000001df9e9e92 },
+		/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+		{ 0x00000000e1b38744, 0x00000001a4a3f952 },
+		/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+		{ 0x00000000791585b2, 0x00000000e2f51220 },
+		/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+		{ 0x00000000ac53b894, 0x000000004aa01f3e },
+		/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+		{ 0x00000001ed5f2cf4, 0x00000000b3e90a58 },
+		/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+		{ 0x00000001df48b2e0, 0x000000000c9ca2aa },
+		/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+		{ 0x00000000049c1c62, 0x0000000151682316 },
+		/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+		{ 0x000000017c460c12, 0x0000000036fce78c },
+		/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+		{ 0x000000015be4da7e, 0x000000009037dc10 },
+		/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+		{ 0x000000010f38f668, 0x00000000d3298582 },
+		/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+		{ 0x0000000039f40a00, 0x00000001b42e8ad6 },
+		/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+		{ 0x00000000bd4c10c4, 0x00000000142a9838 },
+		/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+		{ 0x0000000042db1d98, 0x0000000109c7f190 },
+		/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+		{ 0x00000001c905bae6, 0x0000000056ff9310 },
+		/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+		{ 0x00000000069d40ea, 0x00000001594513aa },
+		/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+		{ 0x000000008e4fbad0, 0x00000001e3b5b1e8 },
+		/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+		{ 0x0000000047bedd46, 0x000000011dd5fc08 },
+		/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+		{ 0x0000000026396bf8, 0x00000001675f0cc2 },
+		/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+		{ 0x00000000379beb92, 0x00000000d1c8dd44 },
+		/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+		{ 0x000000000abae54a, 0x0000000115ebd3d8 },
+		/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+		{ 0x0000000007e6a128, 0x00000001ecbd0dac },
+		/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+		{ 0x000000000ade29d2, 0x00000000cdf67af2 },
+		/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+		{ 0x00000000f974c45c, 0x000000004c01ff4c },
+		/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+		{ 0x00000000e77ac60a, 0x00000000f2d8657e },
+		/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+		{ 0x0000000145895816, 0x000000006bae74c4 },
+		/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+		{ 0x0000000038e362be, 0x0000000152af8aa0 },
+		/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+		{ 0x000000007f991a64, 0x0000000004663802 },
+		/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+		{ 0x00000000fa366d3a, 0x00000001ab2f5afc },
+		/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+		{ 0x00000001a2bb34f0, 0x0000000074a4ebd4 },
+		/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+		{ 0x0000000028a9981e, 0x00000001d7ab3a4c },
+		/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+		{ 0x00000001dbc672be, 0x00000001a8da60c6 },
+		/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+		{ 0x00000000b04d77f6, 0x000000013cf63820 },
+		/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+		{ 0x0000000124400d96, 0x00000000bec12e1e },
+		/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+		{ 0x000000014ca4b414, 0x00000001c6368010 },
+		/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+		{ 0x000000012fe2c938, 0x00000001e6e78758 },
+		/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+		{ 0x00000001faed01e6, 0x000000008d7f2b3c },
+		/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+		{ 0x000000007e80ecfe, 0x000000016b4a156e },
+		/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+		{ 0x0000000098daee94, 0x00000001c63cfeb6 },
+		/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+		{ 0x000000010a04edea, 0x000000015f902670 },
+		/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+		{ 0x00000001c00b4524, 0x00000001cd5de11e },
+		/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+		{ 0x0000000170296550, 0x000000001acaec54 },
+		/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+		{ 0x0000000181afaa48, 0x000000002bd0ca78 },
+		/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+		{ 0x0000000185a31ffa, 0x0000000032d63d5c },
+		/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+		{ 0x000000002469f608, 0x000000001c6d4e4c },
+		/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+		{ 0x000000006980102a, 0x0000000106a60b92 },
+		/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+		{ 0x0000000111ea9ca8, 0x00000000d3855e12 },
+		/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+		{ 0x00000001bd1d29ce, 0x00000000e3125636 },
+		/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+		{ 0x00000001b34b9580, 0x000000009e8f7ea4 },
+		/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+		{ 0x000000003076054e, 0x00000001c82e562c },
+		/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+		{ 0x000000012a608ea4, 0x00000000ca9f09ce },
+		/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+		{ 0x00000000784d05fe, 0x00000000c63764e6 },
+		/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+		{ 0x000000016ef0d82a, 0x0000000168d2e49e },
+		/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+		{ 0x0000000075bda454, 0x00000000e986c148 },
+		/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+		{ 0x000000003dc0a1c4, 0x00000000cfb65894 },
+		/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+		{ 0x00000000e9a5d8be, 0x0000000111cadee4 },
+		/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+		{ 0x00000001609bc4b4, 0x0000000171fb63ce }
+#else /* __LITTLE_ENDIAN__ */
+		/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+		{ 0x00000000b6ca9e20, 0x000000009c37c408 },
+		/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+		{ 0x00000000350249a8, 0x00000001b51df26c },
+		/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+		{ 0x00000001862dac54, 0x000000000724b9d0 },
+		/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+		{ 0x00000001d87fb48c, 0x00000001c00532fe },
+		/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+		{ 0x00000001f39b699e, 0x00000000f05a9362 },
+		/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+		{ 0x0000000101da11b4, 0x00000001e1007970 },
+		/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+		{ 0x00000001cab571e0, 0x00000000a57366ee },
+		/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+		{ 0x00000000c7020cfe, 0x0000000192011284 },
+		/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+		{ 0x00000000cdaed1ae, 0x0000000162716d9a },
+		/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+		{ 0x00000001e804effc, 0x00000000cd97ecde },
+		/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+		{ 0x0000000077c3ea3a, 0x0000000058812bc0 },
+		/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+		{ 0x0000000068df31b4, 0x0000000088b8c12e },
+		/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+		{ 0x00000000b059b6c2, 0x00000001230b234c },
+		/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+		{ 0x0000000145fb8ed8, 0x00000001120b416e },
+		/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+		{ 0x00000000cbc09168, 0x00000001974aecb0 },
+		/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+		{ 0x000000005ceeedc2, 0x000000008ee3f226 },
+		/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+		{ 0x0000000047d74e86, 0x00000001089aba9a },
+		/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+		{ 0x00000001407e9e22, 0x0000000065113872 },
+		/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+		{ 0x00000001da967bda, 0x000000005c07ec10 },
+		/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+		{ 0x000000006c898368, 0x0000000187590924 },
+		/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+		{ 0x00000000f2d14c98, 0x00000000e35da7c6 },
+		/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+		{ 0x00000001993c6ad4, 0x000000000415855a },
+		/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+		{ 0x000000014683d1ac, 0x0000000073617758 },
+		/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+		{ 0x00000001a7c93e6c, 0x0000000176021d28 },
+		/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+		{ 0x000000010211e90a, 0x00000001c358fd0a },
+		/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+		{ 0x000000001119403e, 0x00000001ff7a2c18 },
+		/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+		{ 0x000000001c3261aa, 0x00000000f2d9f7e4 },
+		/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+		{ 0x000000014e37a634, 0x000000016cf1f9c8 },
+		/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+		{ 0x0000000073786c0c, 0x000000010af9279a },
+		/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+		{ 0x000000011dc037f8, 0x0000000004f101e8 },
+		/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+		{ 0x0000000031433dfc, 0x0000000070bcf184 },
+		/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+		{ 0x000000009cde8348, 0x000000000a8de642 },
+		/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+		{ 0x0000000038d3c2a6, 0x0000000062ea130c },
+		/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+		{ 0x000000011b25f260, 0x00000001eb31cbb2 },
+		/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+		{ 0x000000001629e6f0, 0x0000000170783448 },
+		/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+		{ 0x0000000160838b4c, 0x00000001a684b4c6 },
+		/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+		{ 0x000000007a44011c, 0x00000000253ca5b4 },
+		/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+		{ 0x00000000226f417a, 0x0000000057b4b1e2 },
+		/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+		{ 0x0000000045eb2eb4, 0x00000000b6bd084c },
+		/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+		{ 0x000000014459d70c, 0x0000000123c2d592 },
+		/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+		{ 0x00000001d406ed82, 0x00000000159dafce },
+		/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+		{ 0x0000000160c8e1a8, 0x0000000127e1a64e },
+		/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+		{ 0x0000000027ba8098, 0x0000000056860754 },
+		/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+		{ 0x000000006d92d018, 0x00000001e661aae8 },
+		/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+		{ 0x000000012ed7e3f2, 0x00000000f82c6166 },
+		/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+		{ 0x000000002dc87788, 0x00000000c4f9c7ae },
+		/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+		{ 0x0000000018240bb8, 0x0000000074203d20 },
+		/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+		{ 0x000000001ad38158, 0x0000000198173052 },
+		/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+		{ 0x00000001396b78f2, 0x00000001ce8aba54 },
+		/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+		{ 0x000000011a681334, 0x00000001850d5d94 },
+		/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+		{ 0x000000012104732e, 0x00000001d609239c },
+		/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+		{ 0x00000000a140d90c, 0x000000001595f048 },
+		/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+		{ 0x00000001b7215eda, 0x0000000042ccee08 },
+		/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+		{ 0x00000001aaf1df3c, 0x000000010a389d74 },
+		/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+		{ 0x0000000029d15b8a, 0x000000012a840da6 },
+		/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+		{ 0x00000000f1a96922, 0x000000001d181c0c },
+		/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+		{ 0x00000001ac80d03c, 0x0000000068b7d1f6 },
+		/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+		{ 0x000000000f11d56a, 0x000000005b0f14fc },
+		/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+		{ 0x00000001f1c022a2, 0x0000000179e9e730 },
+		/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+		{ 0x0000000173d00ae2, 0x00000001ce1368d6 },
+		/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+		{ 0x00000001d4ffe4ac, 0x0000000112c3a84c },
+		/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+		{ 0x000000016edc5ae4, 0x00000000de940fee },
+		/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+		{ 0x00000001f1a02140, 0x00000000fe896b7e },
+		/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+		{ 0x00000000ca0b28a0, 0x00000001f797431c },
+		/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+		{ 0x00000001928e30a2, 0x0000000053e989ba },
+		/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+		{ 0x0000000097b1b002, 0x000000003920cd16 },
+		/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+		{ 0x00000000b15bf906, 0x00000001e6f579b8 },
+		/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+		{ 0x00000000411c5d52, 0x000000007493cb0a },
+		/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+		{ 0x00000001c36f3300, 0x00000001bdd376d8 },
+		/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+		{ 0x00000001119227e0, 0x000000016badfee6 },
+		/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+		{ 0x00000000114d4702, 0x0000000071de5c58 },
+		/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+		{ 0x00000000458b5b98, 0x00000000453f317c },
+		/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+		{ 0x000000012e31fb8e, 0x0000000121675cce },
+		/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+		{ 0x000000005cf619d8, 0x00000001f409ee92 },
+		/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+		{ 0x0000000063f4d8b2, 0x00000000f36b9c88 },
+		/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+		{ 0x000000004138dc8a, 0x0000000036b398f4 },
+		/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+		{ 0x00000001d29ee8e0, 0x00000001748f9adc },
+		/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+		{ 0x000000006a08ace8, 0x00000001be94ec00 },
+		/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+		{ 0x0000000127d42010, 0x00000000b74370d6 },
+		/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+		{ 0x0000000019d76b62, 0x00000001174d0b98 },
+		/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+		{ 0x00000001b1471f6e, 0x00000000befc06a4 },
+		/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+		{ 0x00000001f64c19cc, 0x00000001ae125288 },
+		/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+		{ 0x00000000003c0ea0, 0x0000000095c19b34 },
+		/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+		{ 0x000000014d73abf6, 0x00000001a78496f2 },
+		/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+		{ 0x00000001620eb844, 0x00000001ac5390a0 },
+		/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+		{ 0x0000000147655048, 0x000000002a80ed6e },
+		/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+		{ 0x0000000067b5077e, 0x00000001fa9b0128 },
+		/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+		{ 0x0000000010ffe206, 0x00000001ea94929e },
+		/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+		{ 0x000000000fee8f1e, 0x0000000125f4305c },
+		/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+		{ 0x00000001da26fbae, 0x00000001471e2002 },
+		/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+		{ 0x00000001b3a8bd88, 0x0000000132d2253a },
+		/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+		{ 0x00000000e8f3898e, 0x00000000f26b3592 },
+		/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+		{ 0x00000000b0d0d28c, 0x00000000bc8b67b0 },
+		/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+		{ 0x0000000030f2a798, 0x000000013a826ef2 },
+		/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+		{ 0x000000000fba1002, 0x0000000081482c84 },
+		/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+		{ 0x00000000bdb9bd72, 0x00000000e77307c2 },
+		/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+		{ 0x0000000075d3bf5a, 0x00000000d4a07ec8 },
+		/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+		{ 0x00000000ef1f98a0, 0x0000000017102100 },
+		/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+		{ 0x00000000689c7602, 0x00000000db406486 },
+		/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+		{ 0x000000016d5fa5fe, 0x0000000192db7f88 },
+		/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+		{ 0x00000001d0d2b9ca, 0x000000018bf67b1e },
+		/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+		{ 0x0000000041e7b470, 0x000000007c09163e },
+		/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+		{ 0x00000001cbb6495e, 0x000000000adac060 },
+		/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+		{ 0x000000010052a0b0, 0x00000000bd8316ae },
+		/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+		{ 0x00000001d8effb5c, 0x000000019f09ab54 },
+		/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+		{ 0x00000001d969853c, 0x0000000125155542 },
+		/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+		{ 0x00000000523ccce2, 0x000000018fdb5882 },
+		/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+		{ 0x000000001e2436bc, 0x00000000e794b3f4 },
+		/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+		{ 0x00000000ddd1c3a2, 0x000000016f9bb022 },
+		/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+		{ 0x0000000019fcfe38, 0x00000000290c9978 },
+		/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+		{ 0x00000001ce95db64, 0x0000000083c0f350 },
+		/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+		{ 0x00000000af582806, 0x0000000173ea6628 },
+		/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+		{ 0x00000001006388f6, 0x00000001c8b4e00a },
+		/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+		{ 0x0000000179eca00a, 0x00000000de95d6aa },
+		/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+		{ 0x0000000122410a6a, 0x000000010b7f7248 },
+		/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+		{ 0x000000004288e87c, 0x00000001326e3a06 },
+		/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+		{ 0x000000016c5490da, 0x00000000bb62c2e6 },
+		/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+		{ 0x00000000d1c71f6e, 0x0000000156a4b2c2 },
+		/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+		{ 0x00000001b4ce08a6, 0x000000011dfe763a },
+		/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+		{ 0x00000001466ba60c, 0x000000007bcca8e2 },
+		/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+		{ 0x00000001f6c488a4, 0x0000000186118faa },
+		/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+		{ 0x000000013bfb0682, 0x0000000111a65a88 },
+		/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+		{ 0x00000000690e9e54, 0x000000003565e1c4 },
+		/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+		{ 0x00000000281346b6, 0x000000012ed02a82 },
+		/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+		{ 0x0000000156464024, 0x00000000c486ecfc },
+		/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+		{ 0x000000016063a8dc, 0x0000000001b951b2 },
+		/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+		{ 0x0000000116a66362, 0x0000000048143916 },
+		/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+		{ 0x000000017e8aa4d2, 0x00000001dc2ae124 },
+		/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+		{ 0x00000001728eb10c, 0x00000001416c58d6 },
+		/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+		{ 0x00000001b08fd7fa, 0x00000000a479744a },
+		/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+		{ 0x00000001092a16e8, 0x0000000096ca3a26 },
+		/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+		{ 0x00000000a505637c, 0x00000000ff223d4e },
+		/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+		{ 0x00000000d94869b2, 0x000000010e84da42 },
+		/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+		{ 0x00000001c8b203ae, 0x00000001b61ba3d0 },
+		/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+		{ 0x000000005704aea0, 0x00000000680f2de8 },
+		/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+		{ 0x000000012e295fa2, 0x000000008772a9a8 },
+		/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+		{ 0x000000011d0908bc, 0x0000000155f295bc },
+		/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+		{ 0x0000000193ed97ea, 0x00000000595f9282 },
+		/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+		{ 0x000000013a0f1c52, 0x0000000164b1c25a },
+		/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+		{ 0x000000010c2c40c0, 0x00000000fbd67c50 },
+		/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+		{ 0x00000000ff6fac3e, 0x0000000096076268 },
+		/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+		{ 0x000000017b3609c0, 0x00000001d288e4cc },
+		/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+		{ 0x0000000088c8c922, 0x00000001eaac1bdc },
+		/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+		{ 0x00000001751baae6, 0x00000001f1ea39e2 },
+		/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+		{ 0x0000000107952972, 0x00000001eb6506fc },
+		/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+		{ 0x0000000162b00abe, 0x000000010f806ffe },
+		/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+		{ 0x000000000d7b404c, 0x000000010408481e },
+		/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+		{ 0x00000000763b13d4, 0x0000000188260534 },
+		/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+		{ 0x00000000f6dc22d8, 0x0000000058fc73e0 },
+		/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+		{ 0x000000007daae060, 0x00000000391c59b8 },
+		/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+		{ 0x000000013359ab7c, 0x000000018b638400 },
+		/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+		{ 0x000000008add438a, 0x000000011738f5c4 },
+		/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+		{ 0x00000001edbefdea, 0x000000008cf7c6da },
+		/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+		{ 0x000000004104e0f8, 0x00000001ef97fb16 },
+		/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+		{ 0x00000000b48a8222, 0x0000000102130e20 },
+		/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+		{ 0x00000001bcb46844, 0x00000000db968898 },
+		/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+		{ 0x000000013293ce0a, 0x00000000b5047b5e },
+		/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+		{ 0x00000001710d0844, 0x000000010b90fdb2 },
+		/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+		{ 0x0000000117907f6e, 0x000000004834a32e },
+		/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+		{ 0x0000000087ddf93e, 0x0000000059c8f2b0 },
+		/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+		{ 0x000000005970e9b0, 0x0000000122cec508 },
+		/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+		{ 0x0000000185b2b7d0, 0x000000000a330cda },
+		/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+		{ 0x00000001dcee0efc, 0x000000014a47148c },
+		/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+		{ 0x0000000030da2722, 0x0000000042c61cb8 },
+		/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+		{ 0x000000012f925a18, 0x0000000012fe6960 },
+		/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+		{ 0x00000000dd2e357c, 0x00000000dbda2c20 },
+		/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+		{ 0x00000000071c80de, 0x000000011122410c },
+		/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+		{ 0x000000011513140a, 0x00000000977b2070 },
+		/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+		{ 0x00000001df876e8e, 0x000000014050438e },
+		/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+		{ 0x000000015f81d6ce, 0x0000000147c840e8 },
+		/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+		{ 0x000000019dd94dbe, 0x00000001cc7c88ce },
+		/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+		{ 0x00000001373d206e, 0x00000001476b35a4 },
+		/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+		{ 0x00000000668ccade, 0x000000013d52d508 },
+		/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+		{ 0x00000001b192d268, 0x000000008e4be32e },
+		/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+		{ 0x00000000e30f3a78, 0x00000000024120fe },
+		/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+		{ 0x000000010ef1f7bc, 0x00000000ddecddb4 },
+		/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+		{ 0x00000001f5ac7380, 0x00000000d4d403bc },
+		/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+		{ 0x000000011822ea70, 0x00000001734b89aa },
+		/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+		{ 0x00000000c3a33848, 0x000000010e7a58d6 },
+		/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+		{ 0x00000001bd151c24, 0x00000001f9f04e9c },
+		/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+		{ 0x0000000056002d76, 0x00000000b692225e },
+		/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+		{ 0x000000014657c4f4, 0x000000019b8d3f3e },
+		/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+		{ 0x0000000113742d7c, 0x00000001a874f11e },
+		/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+		{ 0x000000019c5920ba, 0x000000010d5a4254 },
+		/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+		{ 0x000000005216d2d6, 0x00000000bbb2f5d6 },
+		/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+		{ 0x0000000136f5ad8a, 0x0000000179cc0e36 },
+		/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+		{ 0x000000018b07beb6, 0x00000001dca1da4a },
+		/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+		{ 0x00000000db1e93b0, 0x00000000feb1a192 },
+		/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+		{ 0x000000000b96fa3a, 0x00000000d1eeedd6 },
+		/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+		{ 0x00000001d9968af0, 0x000000008fad9bb4 },
+		/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+		{ 0x000000000e4a77a2, 0x00000001884938e4 },
+		/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+		{ 0x00000000508c2ac8, 0x00000001bc2e9bc0 },
+		/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+		{ 0x0000000021572a80, 0x00000001f9658a68 },
+		/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+		{ 0x00000001b859daf2, 0x000000001b9224fc },
+		/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+		{ 0x000000016f788474, 0x0000000055b2fb84 },
+		/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+		{ 0x00000001b438810e, 0x000000018b090348 },
+		/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+		{ 0x0000000095ddc6f2, 0x000000011ccbd5ea },
+		/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+		{ 0x00000001d977c20c, 0x0000000007ae47f8 },
+		/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+		{ 0x00000000ebedb99a, 0x0000000172acbec0 },
+		/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+		{ 0x00000001df9e9e92, 0x00000001c6e3ff20 },
+		/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+		{ 0x00000001a4a3f952, 0x00000000e1b38744 },
+		/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+		{ 0x00000000e2f51220, 0x00000000791585b2 },
+		/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+		{ 0x000000004aa01f3e, 0x00000000ac53b894 },
+		/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+		{ 0x00000000b3e90a58, 0x00000001ed5f2cf4 },
+		/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+		{ 0x000000000c9ca2aa, 0x00000001df48b2e0 },
+		/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+		{ 0x0000000151682316, 0x00000000049c1c62 },
+		/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+		{ 0x0000000036fce78c, 0x000000017c460c12 },
+		/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+		{ 0x000000009037dc10, 0x000000015be4da7e },
+		/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+		{ 0x00000000d3298582, 0x000000010f38f668 },
+		/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+		{ 0x00000001b42e8ad6, 0x0000000039f40a00 },
+		/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+		{ 0x00000000142a9838, 0x00000000bd4c10c4 },
+		/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+		{ 0x0000000109c7f190, 0x0000000042db1d98 },
+		/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+		{ 0x0000000056ff9310, 0x00000001c905bae6 },
+		/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+		{ 0x00000001594513aa, 0x00000000069d40ea },
+		/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+		{ 0x00000001e3b5b1e8, 0x000000008e4fbad0 },
+		/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+		{ 0x000000011dd5fc08, 0x0000000047bedd46 },
+		/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+		{ 0x00000001675f0cc2, 0x0000000026396bf8 },
+		/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+		{ 0x00000000d1c8dd44, 0x00000000379beb92 },
+		/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+		{ 0x0000000115ebd3d8, 0x000000000abae54a },
+		/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+		{ 0x00000001ecbd0dac, 0x0000000007e6a128 },
+		/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+		{ 0x00000000cdf67af2, 0x000000000ade29d2 },
+		/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+		{ 0x000000004c01ff4c, 0x00000000f974c45c },
+		/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+		{ 0x00000000f2d8657e, 0x00000000e77ac60a },
+		/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+		{ 0x000000006bae74c4, 0x0000000145895816 },
+		/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+		{ 0x0000000152af8aa0, 0x0000000038e362be },
+		/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+		{ 0x0000000004663802, 0x000000007f991a64 },
+		/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+		{ 0x00000001ab2f5afc, 0x00000000fa366d3a },
+		/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+		{ 0x0000000074a4ebd4, 0x00000001a2bb34f0 },
+		/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+		{ 0x00000001d7ab3a4c, 0x0000000028a9981e },
+		/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+		{ 0x00000001a8da60c6, 0x00000001dbc672be },
+		/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+		{ 0x000000013cf63820, 0x00000000b04d77f6 },
+		/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+		{ 0x00000000bec12e1e, 0x0000000124400d96 },
+		/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+		{ 0x00000001c6368010, 0x000000014ca4b414 },
+		/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+		{ 0x00000001e6e78758, 0x000000012fe2c938 },
+		/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+		{ 0x000000008d7f2b3c, 0x00000001faed01e6 },
+		/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+		{ 0x000000016b4a156e, 0x000000007e80ecfe },
+		/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+		{ 0x00000001c63cfeb6, 0x0000000098daee94 },
+		/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+		{ 0x000000015f902670, 0x000000010a04edea },
+		/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+		{ 0x00000001cd5de11e, 0x00000001c00b4524 },
+		/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+		{ 0x000000001acaec54, 0x0000000170296550 },
+		/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+		{ 0x000000002bd0ca78, 0x0000000181afaa48 },
+		/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+		{ 0x0000000032d63d5c, 0x0000000185a31ffa },
+		/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+		{ 0x000000001c6d4e4c, 0x000000002469f608 },
+		/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+		{ 0x0000000106a60b92, 0x000000006980102a },
+		/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+		{ 0x00000000d3855e12, 0x0000000111ea9ca8 },
+		/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+		{ 0x00000000e3125636, 0x00000001bd1d29ce },
+		/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+		{ 0x000000009e8f7ea4, 0x00000001b34b9580 },
+		/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+		{ 0x00000001c82e562c, 0x000000003076054e },
+		/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+		{ 0x00000000ca9f09ce, 0x000000012a608ea4 },
+		/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+		{ 0x00000000c63764e6, 0x00000000784d05fe },
+		/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+		{ 0x0000000168d2e49e, 0x000000016ef0d82a },
+		/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+		{ 0x00000000e986c148, 0x0000000075bda454 },
+		/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+		{ 0x00000000cfb65894, 0x000000003dc0a1c4 },
+		/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+		{ 0x0000000111cadee4, 0x00000000e9a5d8be },
+		/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+		{ 0x0000000171fb63ce, 0x00000001609bc4b4 }
+#endif /* __LITTLE_ENDIAN__ */
+	};
+
+/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+
+static const __vector unsigned long long vcrc_short_const[16]
+	__attribute__((aligned (16))) = {
+#ifdef __LITTLE_ENDIAN__
+		/* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x)  */
+		{ 0x5cf015c388e56f72, 0x7fec2963e5bf8048 },
+		/* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x)  */
+		{ 0x963a18920246e2e6, 0x38e888d4844752a9 },
+		/* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x)  */
+		{ 0x419a441956993a31, 0x42316c00730206ad },
+		/* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x)  */
+		{ 0x924752ba2b830011, 0x543d5c543e65ddf9 },
+		/* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x)  */
+		{ 0x55bd7f9518e4a304, 0x78e87aaf56767c92 },
+		/* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x)  */
+		{ 0x6d76739fe0553f1e, 0x8f68fcec1903da7f },
+		/* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x)  */
+		{ 0xc133722b1fe0b5c3, 0x3f4840246791d588 },
+		/* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x)  */
+		{ 0x64b67ee0e55ef1f3, 0x34c96751b04de25a },
+		/* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x)  */
+		{ 0x069db049b8fdb1e7, 0x156c8e180b4a395b },
+		/* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x)  */
+		{ 0xa11bfaf3c9e90b9e, 0xe0b99ccbe661f7be },
+		/* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x)  */
+		{ 0x817cdc5119b29a35, 0x041d37768cd75659 },
+		/* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x)  */
+		{ 0x1ce9d94b36c41f1c, 0x3a0777818cfaa965 },
+		/* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x)  */
+		{ 0x4f256efcb82be955, 0x0e148e8252377a55 },
+		/* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x)  */
+		{ 0xec1631edb2dea967, 0x9c25531d19e65dde },
+		/* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x)  */
+		{ 0x5d27e147510ac59a, 0x790606ff9957c0a6 },
+		/* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x)  */
+		{ 0xa66805eb18b8ea18, 0x82f63b786ea2d55c }
+#else /* __LITTLE_ENDIAN__ */
+		/* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x)  */
+		{ 0x7fec2963e5bf8048, 0x5cf015c388e56f72 },
+		/* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x)  */
+		{ 0x38e888d4844752a9, 0x963a18920246e2e6 },
+		/* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x)  */
+		{ 0x42316c00730206ad, 0x419a441956993a31 },
+		/* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x)  */
+		{ 0x543d5c543e65ddf9, 0x924752ba2b830011 },
+		/* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x)  */
+		{ 0x78e87aaf56767c92, 0x55bd7f9518e4a304 },
+		/* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x)  */
+		{ 0x8f68fcec1903da7f, 0x6d76739fe0553f1e },
+		/* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x)  */
+		{ 0x3f4840246791d588, 0xc133722b1fe0b5c3 },
+		/* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x)  */
+		{ 0x34c96751b04de25a, 0x64b67ee0e55ef1f3 },
+		/* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x)  */
+		{ 0x156c8e180b4a395b, 0x069db049b8fdb1e7 },
+		/* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x)  */
+		{ 0xe0b99ccbe661f7be, 0xa11bfaf3c9e90b9e },
+		/* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x)  */
+		{ 0x041d37768cd75659, 0x817cdc5119b29a35 },
+		/* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x)  */
+		{ 0x3a0777818cfaa965, 0x1ce9d94b36c41f1c },
+		/* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x)  */
+		{ 0x0e148e8252377a55, 0x4f256efcb82be955 },
+		/* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x)  */
+		{ 0x9c25531d19e65dde, 0xec1631edb2dea967 },
+		/* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x)  */
+		{ 0x790606ff9957c0a6, 0x5d27e147510ac59a },
+		/* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x)  */
+		{ 0x82f63b786ea2d55c, 0xa66805eb18b8ea18 }
+#endif /* __LITTLE_ENDIAN__ */
+	};
+
+/* Barrett constants */
+/* 33 bit reflected Barrett constant m - (4^32)/n */
+
+static const __vector unsigned long long v_Barrett_const[2]
+	__attribute__((aligned (16))) = {
+		/* x^64 div p(x)  */
+#ifdef __LITTLE_ENDIAN__
+		{ 0x00000000dea713f1, 0x0000000000000000 },
+		{ 0x0000000105ec76f1, 0x0000000000000000 }
+#else /* __LITTLE_ENDIAN__ */
+		{ 0x0000000000000000, 0x00000000dea713f1 },
+		{ 0x0000000000000000, 0x0000000105ec76f1 }
+#endif /* __LITTLE_ENDIAN__ */
+	};
+#endif /* POWER8_INTRINSICS */
+
+#endif /* __ASSEMBLER__ */
diff --git a/contrib/crc32-vpmsum-cmake/vec_crc32.h b/contrib/crc32-vpmsum-cmake/vec_crc32.h
new file mode 100644
index 00000000000..0ef13616b34
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h
@@ -0,0 +1,29 @@
+#ifndef VEC_CRC32
+#define VEC_CRC32
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+
+static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len)
+{
+	unsigned char *emptybuffer;
+    if (!buffer) {
+        emptybuffer = (unsigned char *)malloc(len);
+        bzero(emptybuffer, len);
+        crc = crc32_vpmsum(crc, emptybuffer, len);
+        free(emptybuffer);
+    } else {
+        crc = crc32_vpmsum(crc, buffer, (unsigned long)len);
+    }
+	return crc;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d8a7dba72ac..1bc1151b90b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -364,6 +364,10 @@ if (TARGET ch_contrib::crc32_s390x)
     target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x)
 endif()
 
+if (TARGET ch_contrib::crc32-vpmsum)
+    target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum)
+ endif()
+
 dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables)
 
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index 01758c1b9fb..c7342d061d8 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
 #include <arm_acle.h>
 #endif
 
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 #if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
 #include <crc32-s390x.h>
 
@@ -89,6 +93,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
     return __crc32cd(-1U, x);
 #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     return s390x_crc32(-1U, x)
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return crc32_ppc(-1U, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
 #else
     /// On other platforms we do not have CRC32. NOTE This can be confusing.
     /// NOTE: consider using intHash32()
@@ -103,6 +109,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
     return __crc32cd(static_cast<UInt32>(updated_value), x);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
     return s390x_crc32(updated_value, x);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return crc32_ppc(updated_value, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
 #else
     /// On other platforms we do not have CRC32. NOTE This can be confusing.
     return intHash64(x) ^ updated_value;
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index e9810e918b4..45543f57b37 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -86,6 +86,10 @@ if (TARGET ch_contrib::rapidjson)
     list (APPEND PRIVATE_LIBS ch_contrib::rapidjson)
 endif()
 
+if (TARGET ch_contrib::crc32-vpmsum)
+    list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum)
+endif()
+
 add_subdirectory(GatherUtils)
 list (APPEND PRIVATE_LIBS clickhouse_functions_gatherutils)
 
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index 174acebe979..bf0b7463a5d 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -14,6 +14,10 @@
 
 #include <city.h>
 
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 namespace DB
 {
 
@@ -38,6 +42,8 @@ struct Hash
         return __crc32cd(static_cast<UInt32>(crc), val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -51,6 +57,8 @@ struct Hash
         return __crc32cw(crc, val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32_u32(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -64,6 +72,8 @@ struct Hash
         return __crc32ch(crc, val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32_u16(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -77,6 +87,8 @@ struct Hash
         return __crc32cb(crc, val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32_u8(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp
index 802aafc2042..87aa0f4b3f7 100644
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@@ -24,6 +24,10 @@
 #    include <arm_acle.h>
 #endif
 
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 namespace DB
 {
 /** Distance function implementation.
@@ -72,6 +76,8 @@ struct NgramDistanceImpl
         return __crc32cd(code_points[2], combined) & 0xFFFFu;
 #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
         return s390x_crc32(code_points[2], combined) & 0xFFFFu;
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(code_points[2], reinterpret_cast<const unsigned char *>(&combined), sizeof(combined)) & 0xFFFFu;
 #else
         return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu;
 #endif
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
new file mode 100644
index 00000000000..2acad33320b
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
@@ -0,0 +1,148 @@
+18446744073709551615
+1737075136
+1737075136
+4018781633
+4018781633
+1846985414
+1846985414
+1846985414
+1846985414
+(10693559443859979498,10693559443859979498)
+(12279482788274235946,6436413987527322272)
+(12279482788274235946,6436413987527322272)
+(13257488272755813409,6436413987527322272)
+(13257488272755813409,6436413987527322272)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+3023525975
+3040303199
+3023509591
+3023510623
+3040303191
+3040303191
+3023510615
+3023510615
+1999952988
+926211140
+1999699532
+1999683148
+1999952988
+926211140
+1999699532
+1999683148
+(16071125717475221203,9592059329600248798)
+(16071125717475221203,1914899959549098907)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,9592059329600248798)
+(16071125717475221203,1914899959549098907)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,7986182634218042944)
+(10576877560263640956,4278250516018530743)
+(16211512098526494023,11479872370566432466)
+(13515070557027359649,17725505493832406849)
+(12589381623326290380,575343713614534202)
+(10576877560263640956,4278250516018530743)
+(16211512098526494023,11479872370566432466)
+(13515070557027359649,17725505493832406849)
+(12589381623326290380,575343713614534202)
+uniqExact	6
+ngramSimHash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2293265501
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3392173149
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054169
+ngramSimHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
+ngramSimHashUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2284876893
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3459282013
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3694163037
+ngramSimHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
+wordShingleSimHash
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+wordShingleSimHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+wordShingleSimHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+wordShingleSimHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+ngramMinHash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ngramMinHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ngramMinHashUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ngramMinHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+wordShingleMinHash
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+wordShingleMinHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+wordShingleMinHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+wordShingleMinHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.python b/tests/queries/0_stateless/01016_simhash_minhash.python
new file mode 100644
index 00000000000..1d6eae456c1
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.python
@@ -0,0 +1,394 @@
+#!/usr/bin/env python3                                                                                                                                                                                              
+import os
+import socket
+import sys
+from scipy import stats
+import pandas as pd
+import numpy as np
+import shutil
+import platform
+
+import uuid
+
+CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
+CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
+CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
+
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from pure_http_client import ClickHouseClient
+
+if platform.machine() == "ppc64le":
+    shutil.copyfile(CURDIR + "/01016_simhash_minhash.ppc64le.reference", CURDIR + "/01016_simhash_minhash.reference")
+elif platform.machine() == "x86_64" :
+    shutil.copyfile(CURDIR + "/01016_simhash_minhash.x86_64.reference", CURDIR + "/01016_simhash_minhash.reference")
+
+def writeVarUInt(x, ba):
+    for _ in range(0, 9):
+
+        byte = x & 0x7F
+        if x > 0x7F:
+            byte |= 0x80
+
+        ba.append(byte)
+
+        x >>= 7
+        if x == 0:
+            return
+
+def writeStringBinary(s, ba):
+    b = bytes(s, 'utf-8')
+    writeVarUInt(len(s), ba)
+    ba.extend(b)
+
+def readStrict(s, size = 1):
+    res = bytearray()
+    while size:
+        cur = s.recv(size)
+        # if not res:
+        #     raise "Socket is closed"
+        size -= len(cur)
+        res.extend(cur)
+
+    return res
+
+def readUInt(s, size=1):
+    res = readStrict(s, size)
+    val = 0
+    for i in range(len(res)):
+        val += res[i] << (i * 8)
+    return val
+
+def readUInt8(s):
+    return readUInt(s)
+
+def readUInt16(s):
+    return readUInt(s, 2)
+
+def readUInt32(s):
+    return readUInt(s, 4)
+
+def readUInt64(s):
+    return readUInt(s, 8)
+
+def readVarUInt(s):
+    x = 0
+    for i in range(9):
+        byte = readStrict(s)[0]
+        x |= (byte & 0x7F) << (7 * i)
+
+        if not byte & 0x80:
+            return x
+
+    return x
+
+def readStringBinary(s):
+    size = readVarUInt(s)
+    s = readStrict(s, size)
+    return s.decode('utf-8')
+
+def sendHello(s):
+    ba = bytearray()
+    writeVarUInt(0, ba) # Hello
+    writeStringBinary('simple native protocol', ba)
+    writeVarUInt(21, ba)
+    writeVarUInt(9, ba)
+    writeVarUInt(54449, ba)
+    writeStringBinary('default', ba) # database
+    writeStringBinary('default', ba) # user
+    writeStringBinary('', ba) # pwd
+    s.sendall(ba)
+
+
+def receiveHello(s):
+    p_type = readVarUInt(s)
+    assert (p_type == 0) # Hello
+    server_name = readStringBinary(s)
+    # print("Server name: ", server_name)
+    server_version_major = readVarUInt(s)
+    # print("Major: ", server_version_major)
+    server_version_minor = readVarUInt(s)
+    # print("Minor: ", server_version_minor)
+    server_revision = readVarUInt(s)
+    # print("Revision: ", server_revision)
+    server_timezone = readStringBinary(s)
+    # print("Timezone: ", server_timezone)
+    server_display_name = readStringBinary(s)
+    # print("Display name: ", server_display_name)
+    server_version_patch = readVarUInt(s)
+    # print("Version patch: ", server_version_patch)
+
+def serializeClientInfo(ba, query_id):
+    writeStringBinary('default', ba) # initial_user
+    writeStringBinary(query_id, ba) # initial_query_id
+    writeStringBinary('127.0.0.1:9000', ba) # initial_address
+    ba.extend([0] * 8) # initial_query_start_time_microseconds
+    ba.append(1) # TCP
+    writeStringBinary('os_user', ba) # os_user
+    writeStringBinary('client_hostname', ba) # client_hostname
+    writeStringBinary('client_name', ba) # client_name
+    writeVarUInt(21, ba)
+    writeVarUInt(9, ba)
+    writeVarUInt(54449, ba)
+    writeStringBinary('', ba) # quota_key
+    writeVarUInt(0, ba) # distributed_depth
+    writeVarUInt(1, ba) # client_version_patch
+    ba.append(0) # No telemetry
+
+def sendQuery(s, query):
+    ba = bytearray()
+    query_id = uuid.uuid4().hex
+    writeVarUInt(1, ba) # query
+    writeStringBinary(query_id, ba)
+
+    ba.append(1) # INITIAL_QUERY
+
+    # client info
+    serializeClientInfo(ba, query_id)
+
+    writeStringBinary('', ba) # No settings
+    writeStringBinary('', ba) # No interserver secret
+    writeVarUInt(2, ba) # Stage - Complete
+    ba.append(0) # No compression
+    writeStringBinary(query, ba) # query, finally
+    s.sendall(ba)
+
+def serializeBlockInfo(ba):
+    writeVarUInt(1, ba) # 1
+    ba.append(0) # is_overflows
+    writeVarUInt(2, ba) # 2
+    writeVarUInt(0, ba) # 0
+    ba.extend([0] * 4) # bucket_num
+
+def sendEmptyBlock(s):
+    ba = bytearray()
+    writeVarUInt(2, ba) # Data
+    writeStringBinary('', ba)
+    serializeBlockInfo(ba)
+    writeVarUInt(0, ba) # rows
+    writeVarUInt(0, ba) # columns
+    s.sendall(ba)
+
+def assertPacket(packet, expected):
+    assert(packet == expected), packet
+
+def readException(s):
+    code = readUInt32(s)
+    name = readStringBinary(s)
+    text = readStringBinary(s)
+    readStringBinary(s) # trace
+    assertPacket(readUInt8(s), 0) # has_nested
+    sys.stdout.write("code {}: {}".format(code, text.replace('DB::Exception:', '')))
+
+
+def test():
+    client = ClickHouseClient()
+
+    res = client.query("SELECT ngramSimHash('')")
+    sys.stdout.write(res)
+    res=client.query("SELECT ngramSimHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT ngramMinHash('')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+
+    client.query("DROP TABLE IF EXISTS defaults")
+    client.query("CREATE TABLE defaults(s String) ENGINE = Memory()")
+    client.query("INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.')")
+
+    res = client.query("SELECT ngramSimHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT ngramMinHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+
+    client.query("TRUNCATE TABLE defaults")
+    client.query("INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', 'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'))")
+
+    res = client.query("SELECT 'uniqExact', uniqExact(s) FROM defaults")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT 'ngramSimHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramSimHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramSimHashUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramSimHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHashUTF8'")
+    sys.stdout.write(res)
+    res =  client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT 'ngramMinHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramMinHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramMinHashUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramMinHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHashUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+
+    wordShingleSimHashInvalidArg1()
+
+    wordShingleSimHashInvalidArg2()
+
+    wordShingleSimHashInvalidArg3()
+    #client.query("DROP TABLE defaults")
+
+def wordShingleSimHashInvalidArg1():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(30)
+        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
+        sendHello(s)
+        receiveHello(s)
+        sendQuery(s, "SELECT wordShingleSimHash('foobar', 9223372036854775807)")
+
+        # Fin block
+        sendEmptyBlock(s)
+
+
+        assertPacket(readVarUInt(s), 2)
+        print(readException(s))
+        s.close()
+
+
+def wordShingleSimHashInvalidArg2():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(30)
+        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
+        sendHello(s)
+        receiveHello(s)
+        sendQuery(s, "SELECT wordShingleSimHash('foobar', 1001)")
+
+        # Fin block
+        sendEmptyBlock(s)
+
+        assertPacket(readVarUInt(s), 2)
+        print(readException(s))
+        s.close()
+
+
+def wordShingleSimHashInvalidArg3():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(30)
+        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
+        sendHello(s)
+        receiveHello(s)
+        sendQuery(s, "SELECT wordShingleSimHash('foobar', 0)")
+
+        # Fin block
+        sendEmptyBlock(s)
+
+        assertPacket(readVarUInt(s), 2)
+        print(readException(s))
+        s.close()
+
+if __name__ == "__main__":
+    test()
+    #wordShingleSimHashInvalidArg1()
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sh b/tests/queries/0_stateless/01016_simhash_minhash.sh
new file mode 100755
index 00000000000..94bac7efacb
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+python3 "$CURDIR"/01016_simhash_minhash.python
+
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql
deleted file mode 100644
index 1e77b487851..00000000000
--- a/tests/queries/0_stateless/01016_simhash_minhash.sql
+++ /dev/null
@@ -1,115 +0,0 @@
-SELECT ngramSimHash('');
-SELECT ngramSimHash('what a cute cat.');
-SELECT ngramSimHashCaseInsensitive('what a cute cat.');
-SELECT ngramSimHashUTF8('what a cute cat.');
-SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.');
-SELECT wordShingleSimHash('what a cute cat.');
-SELECT wordShingleSimHashCaseInsensitive('what a cute cat.');
-SELECT wordShingleSimHashUTF8('what a cute cat.');
-SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.');
-
-SELECT ngramMinHash('');
-SELECT ngramMinHash('what a cute cat.');
-SELECT ngramMinHashCaseInsensitive('what a cute cat.');
-SELECT ngramMinHashUTF8('what a cute cat.');
-SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.');
-SELECT wordShingleMinHash('what a cute cat.');
-SELECT wordShingleMinHashCaseInsensitive('what a cute cat.');
-SELECT wordShingleMinHashUTF8('what a cute cat.');
-SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.');
-
-DROP TABLE IF EXISTS defaults;
-CREATE TABLE defaults
-(
-   s String
-)ENGINE = Memory();
-
-INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.');
-
-SELECT ngramSimHash(s) FROM defaults;
-SELECT ngramSimHashCaseInsensitive(s) FROM defaults;
-SELECT ngramSimHashUTF8(s) FROM defaults;
-SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults;
-SELECT wordShingleSimHash(s) FROM defaults;
-SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults;
-SELECT wordShingleSimHashUTF8(s) FROM defaults;
-SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults;
-
-SELECT ngramMinHash(s) FROM defaults;
-SELECT ngramMinHashCaseInsensitive(s) FROM defaults;
-SELECT ngramMinHashUTF8(s) FROM defaults;
-SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults;
-SELECT wordShingleMinHash(s) FROM defaults;
-SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults;
-SELECT wordShingleMinHashUTF8(s) FROM defaults;
-SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults;
-
-TRUNCATE TABLE defaults;
-INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n',
-'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.
-ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.
-ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.
-ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.
-ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
-ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
-ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
-ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'
-));
-
-SELECT 'uniqExact', uniqExact(s) FROM defaults;
-
-
-SELECT 'ngramSimHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramSimHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramSimHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramSimHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-
-SELECT 'ngramMinHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramMinHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramMinHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramMinHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-
-SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 }
-SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 }
-SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 }
-
-DROP TABLE defaults;
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
similarity index 100%
rename from tests/queries/0_stateless/01016_simhash_minhash.reference
rename to tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference

From 25c94dfa8300520555ef21b5aa8ca76f7a5a574d Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Tue, 10 Jan 2023 20:56:16 -0800
Subject: [PATCH 165/262] Changes to support the CRC32 in PowerPC to address
 the WeakHash collision issue. Update the reference to support the hash values
 based on the specific platform

---
 .gitmodules                                   |    3 +
 contrib/CMakeLists.txt                        |    1 +
 contrib/crc32-vpmsum                          |    1 +
 contrib/crc32-vpmsum-cmake/CMakeLists.txt     |   12 +
 contrib/crc32-vpmsum-cmake/README.md          |    8 +
 contrib/crc32-vpmsum-cmake/crc32_constants.h  | 1206 +++++++++++++++++
 contrib/crc32-vpmsum-cmake/vec_crc32.h        |   29 +
 src/CMakeLists.txt                            |    4 +
 src/Common/HashTable/Hash.h                   |    8 +
 src/Functions/CMakeLists.txt                  |    4 +
 src/Functions/FunctionsStringHash.cpp         |   12 +
 src/Functions/FunctionsStringSimilarity.cpp   |    6 +
 .../01016_simhash_minhash.ppc64le.reference   |  148 ++
 .../0_stateless/01016_simhash_minhash.python  |  394 ++++++
 .../0_stateless/01016_simhash_minhash.sh      |    8 +
 .../0_stateless/01016_simhash_minhash.sql     |  115 --
 ...=> 01016_simhash_minhash.x86_64.reference} |    0
 17 files changed, 1844 insertions(+), 115 deletions(-)
 create mode 160000 contrib/crc32-vpmsum
 create mode 100644 contrib/crc32-vpmsum-cmake/CMakeLists.txt
 create mode 100644 contrib/crc32-vpmsum-cmake/README.md
 create mode 100644 contrib/crc32-vpmsum-cmake/crc32_constants.h
 create mode 100644 contrib/crc32-vpmsum-cmake/vec_crc32.h
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.python
 create mode 100755 tests/queries/0_stateless/01016_simhash_minhash.sh
 delete mode 100644 tests/queries/0_stateless/01016_simhash_minhash.sql
 rename tests/queries/0_stateless/{01016_simhash_minhash.reference => 01016_simhash_minhash.x86_64.reference} (100%)

diff --git a/.gitmodules b/.gitmodules
index 26824cb57ff..b4673f113b7 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -327,3 +327,6 @@
 [submodule "contrib/aws-s2n-tls"]
 	path = contrib/aws-s2n-tls
 	url = https://github.com/ClickHouse/s2n-tls
+[submodule "contrib/crc32-vpmsum"]
+	path = contrib/crc32-vpmsum
+	url = https://github.com/antonblanchard/crc32-vpmsum.git
diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt
index 27b4a7ddb5c..f5d1315cc02 100644
--- a/contrib/CMakeLists.txt
+++ b/contrib/CMakeLists.txt
@@ -55,6 +55,7 @@ else ()
 endif ()
 add_contrib (miniselect-cmake miniselect)
 add_contrib (pdqsort-cmake pdqsort)
+add_contrib (crc32-vpmsum-cmake crc32-vpmsum)
 add_contrib (sparsehash-c11-cmake sparsehash-c11)
 add_contrib (abseil-cpp-cmake abseil-cpp)
 add_contrib (magic-enum-cmake magic_enum)
diff --git a/contrib/crc32-vpmsum b/contrib/crc32-vpmsum
new file mode 160000
index 00000000000..45215543938
--- /dev/null
+++ b/contrib/crc32-vpmsum
@@ -0,0 +1 @@
+Subproject commit 452155439389311fc7d143621eaf56a258e02476
diff --git a/contrib/crc32-vpmsum-cmake/CMakeLists.txt b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
new file mode 100644
index 00000000000..bb7d5618410
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
@@ -0,0 +1,12 @@
+if (NOT ARCH_PPC64LE)
+    message(STATUS "crc32-vpmsum library is only supported on ppc64le")
+    return()
+endif()
+
+SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/crc32-vpmsum")
+
+add_library(_crc32-vpmsum 
+	"${LIBRARY_DIR}/vec_crc32.c"
+	)
+target_include_directories(_crc32-vpmsum SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}")
+add_library(ch_contrib::crc32-vpmsum ALIAS _crc32-vpmsum)
diff --git a/contrib/crc32-vpmsum-cmake/README.md b/contrib/crc32-vpmsum-cmake/README.md
new file mode 100644
index 00000000000..9ea8133e331
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/README.md
@@ -0,0 +1,8 @@
+# To Generate crc32_constants.h 
+
+- Run make file in `../crc32-vpmsum` diretory using folling options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions
+```bash
+make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c"
+```
+- move the generated `crc32_constants.h` into this directory
+- To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/
diff --git a/contrib/crc32-vpmsum-cmake/crc32_constants.h b/contrib/crc32-vpmsum-cmake/crc32_constants.h
new file mode 100644
index 00000000000..aea525c9038
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/crc32_constants.h
@@ -0,0 +1,1206 @@
+/*
+*
+* THIS FILE IS GENERATED WITH
+./crc32_constants -x -r -c 0x11EDC6F41
+
+* This is from https://github.com/antonblanchard/crc32-vpmsum/
+* DO NOT MODIFY IT MANUALLY!
+*
+*/
+
+#define CRC 0x1edc6f41
+#define CRC_XOR
+#define REFLECT
+#define MAX_SIZE    32768
+
+#ifndef __ASSEMBLER__
+#ifdef CRC_TABLE
+static const unsigned int crc_table[] = {
+	0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4,
+	0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb,
+	0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b,
+	0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24,
+	0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b,
+	0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384,
+	0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54,
+	0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b,
+	0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a,
+	0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35,
+	0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5,
+	0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa,
+	0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45,
+	0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a,
+	0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a,
+	0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595,
+	0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48,
+	0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957,
+	0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687,
+	0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198,
+	0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927,
+	0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38,
+	0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8,
+	0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7,
+	0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096,
+	0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789,
+	0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859,
+	0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46,
+	0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9,
+	0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6,
+	0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36,
+	0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829,
+	0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c,
+	0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93,
+	0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043,
+	0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c,
+	0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3,
+	0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc,
+	0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c,
+	0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033,
+	0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652,
+	0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d,
+	0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d,
+	0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982,
+	0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d,
+	0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622,
+	0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2,
+	0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed,
+	0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530,
+	0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f,
+	0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff,
+	0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0,
+	0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f,
+	0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540,
+	0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90,
+	0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f,
+	0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee,
+	0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1,
+	0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321,
+	0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e,
+	0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81,
+	0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e,
+	0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e,
+	0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,};
+
+#endif /* CRC_TABLE */
+#ifdef POWER8_INTRINSICS
+
+/* Constants */
+
+/* Reduce 262144 kbits to 1024 bits */
+static const __vector unsigned long long vcrc_const[255]
+	__attribute__((aligned (16))) = {
+#ifdef __LITTLE_ENDIAN__
+		/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+		{ 0x000000009c37c408, 0x00000000b6ca9e20 },
+		/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+		{ 0x00000001b51df26c, 0x00000000350249a8 },
+		/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+		{ 0x000000000724b9d0, 0x00000001862dac54 },
+		/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+		{ 0x00000001c00532fe, 0x00000001d87fb48c },
+		/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+		{ 0x00000000f05a9362, 0x00000001f39b699e },
+		/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+		{ 0x00000001e1007970, 0x0000000101da11b4 },
+		/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+		{ 0x00000000a57366ee, 0x00000001cab571e0 },
+		/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+		{ 0x0000000192011284, 0x00000000c7020cfe },
+		/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+		{ 0x0000000162716d9a, 0x00000000cdaed1ae },
+		/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+		{ 0x00000000cd97ecde, 0x00000001e804effc },
+		/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+		{ 0x0000000058812bc0, 0x0000000077c3ea3a },
+		/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+		{ 0x0000000088b8c12e, 0x0000000068df31b4 },
+		/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+		{ 0x00000001230b234c, 0x00000000b059b6c2 },
+		/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+		{ 0x00000001120b416e, 0x0000000145fb8ed8 },
+		/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+		{ 0x00000001974aecb0, 0x00000000cbc09168 },
+		/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+		{ 0x000000008ee3f226, 0x000000005ceeedc2 },
+		/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+		{ 0x00000001089aba9a, 0x0000000047d74e86 },
+		/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+		{ 0x0000000065113872, 0x00000001407e9e22 },
+		/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+		{ 0x000000005c07ec10, 0x00000001da967bda },
+		/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+		{ 0x0000000187590924, 0x000000006c898368 },
+		/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+		{ 0x00000000e35da7c6, 0x00000000f2d14c98 },
+		/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+		{ 0x000000000415855a, 0x00000001993c6ad4 },
+		/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+		{ 0x0000000073617758, 0x000000014683d1ac },
+		/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+		{ 0x0000000176021d28, 0x00000001a7c93e6c },
+		/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+		{ 0x00000001c358fd0a, 0x000000010211e90a },
+		/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+		{ 0x00000001ff7a2c18, 0x000000001119403e },
+		/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+		{ 0x00000000f2d9f7e4, 0x000000001c3261aa },
+		/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+		{ 0x000000016cf1f9c8, 0x000000014e37a634 },
+		/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+		{ 0x000000010af9279a, 0x0000000073786c0c },
+		/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+		{ 0x0000000004f101e8, 0x000000011dc037f8 },
+		/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+		{ 0x0000000070bcf184, 0x0000000031433dfc },
+		/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+		{ 0x000000000a8de642, 0x000000009cde8348 },
+		/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+		{ 0x0000000062ea130c, 0x0000000038d3c2a6 },
+		/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+		{ 0x00000001eb31cbb2, 0x000000011b25f260 },
+		/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+		{ 0x0000000170783448, 0x000000001629e6f0 },
+		/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+		{ 0x00000001a684b4c6, 0x0000000160838b4c },
+		/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+		{ 0x00000000253ca5b4, 0x000000007a44011c },
+		/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+		{ 0x0000000057b4b1e2, 0x00000000226f417a },
+		/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+		{ 0x00000000b6bd084c, 0x0000000045eb2eb4 },
+		/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+		{ 0x0000000123c2d592, 0x000000014459d70c },
+		/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+		{ 0x00000000159dafce, 0x00000001d406ed82 },
+		/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+		{ 0x0000000127e1a64e, 0x0000000160c8e1a8 },
+		/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+		{ 0x0000000056860754, 0x0000000027ba8098 },
+		/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+		{ 0x00000001e661aae8, 0x000000006d92d018 },
+		/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+		{ 0x00000000f82c6166, 0x000000012ed7e3f2 },
+		/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+		{ 0x00000000c4f9c7ae, 0x000000002dc87788 },
+		/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+		{ 0x0000000074203d20, 0x0000000018240bb8 },
+		/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+		{ 0x0000000198173052, 0x000000001ad38158 },
+		/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+		{ 0x00000001ce8aba54, 0x00000001396b78f2 },
+		/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+		{ 0x00000001850d5d94, 0x000000011a681334 },
+		/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+		{ 0x00000001d609239c, 0x000000012104732e },
+		/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+		{ 0x000000001595f048, 0x00000000a140d90c },
+		/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+		{ 0x0000000042ccee08, 0x00000001b7215eda },
+		/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+		{ 0x000000010a389d74, 0x00000001aaf1df3c },
+		/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+		{ 0x000000012a840da6, 0x0000000029d15b8a },
+		/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+		{ 0x000000001d181c0c, 0x00000000f1a96922 },
+		/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+		{ 0x0000000068b7d1f6, 0x00000001ac80d03c },
+		/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+		{ 0x000000005b0f14fc, 0x000000000f11d56a },
+		/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+		{ 0x0000000179e9e730, 0x00000001f1c022a2 },
+		/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+		{ 0x00000001ce1368d6, 0x0000000173d00ae2 },
+		/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+		{ 0x0000000112c3a84c, 0x00000001d4ffe4ac },
+		/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+		{ 0x00000000de940fee, 0x000000016edc5ae4 },
+		/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+		{ 0x00000000fe896b7e, 0x00000001f1a02140 },
+		/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+		{ 0x00000001f797431c, 0x00000000ca0b28a0 },
+		/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+		{ 0x0000000053e989ba, 0x00000001928e30a2 },
+		/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+		{ 0x000000003920cd16, 0x0000000097b1b002 },
+		/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+		{ 0x00000001e6f579b8, 0x00000000b15bf906 },
+		/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+		{ 0x000000007493cb0a, 0x00000000411c5d52 },
+		/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+		{ 0x00000001bdd376d8, 0x00000001c36f3300 },
+		/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+		{ 0x000000016badfee6, 0x00000001119227e0 },
+		/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+		{ 0x0000000071de5c58, 0x00000000114d4702 },
+		/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+		{ 0x00000000453f317c, 0x00000000458b5b98 },
+		/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+		{ 0x0000000121675cce, 0x000000012e31fb8e },
+		/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+		{ 0x00000001f409ee92, 0x000000005cf619d8 },
+		/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+		{ 0x00000000f36b9c88, 0x0000000063f4d8b2 },
+		/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+		{ 0x0000000036b398f4, 0x000000004138dc8a },
+		/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+		{ 0x00000001748f9adc, 0x00000001d29ee8e0 },
+		/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+		{ 0x00000001be94ec00, 0x000000006a08ace8 },
+		/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+		{ 0x00000000b74370d6, 0x0000000127d42010 },
+		/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+		{ 0x00000001174d0b98, 0x0000000019d76b62 },
+		/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+		{ 0x00000000befc06a4, 0x00000001b1471f6e },
+		/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+		{ 0x00000001ae125288, 0x00000001f64c19cc },
+		/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+		{ 0x0000000095c19b34, 0x00000000003c0ea0 },
+		/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+		{ 0x00000001a78496f2, 0x000000014d73abf6 },
+		/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+		{ 0x00000001ac5390a0, 0x00000001620eb844 },
+		/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+		{ 0x000000002a80ed6e, 0x0000000147655048 },
+		/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+		{ 0x00000001fa9b0128, 0x0000000067b5077e },
+		/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+		{ 0x00000001ea94929e, 0x0000000010ffe206 },
+		/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+		{ 0x0000000125f4305c, 0x000000000fee8f1e },
+		/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+		{ 0x00000001471e2002, 0x00000001da26fbae },
+		/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+		{ 0x0000000132d2253a, 0x00000001b3a8bd88 },
+		/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+		{ 0x00000000f26b3592, 0x00000000e8f3898e },
+		/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+		{ 0x00000000bc8b67b0, 0x00000000b0d0d28c },
+		/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+		{ 0x000000013a826ef2, 0x0000000030f2a798 },
+		/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+		{ 0x0000000081482c84, 0x000000000fba1002 },
+		/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+		{ 0x00000000e77307c2, 0x00000000bdb9bd72 },
+		/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+		{ 0x00000000d4a07ec8, 0x0000000075d3bf5a },
+		/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+		{ 0x0000000017102100, 0x00000000ef1f98a0 },
+		/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+		{ 0x00000000db406486, 0x00000000689c7602 },
+		/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+		{ 0x0000000192db7f88, 0x000000016d5fa5fe },
+		/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+		{ 0x000000018bf67b1e, 0x00000001d0d2b9ca },
+		/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+		{ 0x000000007c09163e, 0x0000000041e7b470 },
+		/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+		{ 0x000000000adac060, 0x00000001cbb6495e },
+		/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+		{ 0x00000000bd8316ae, 0x000000010052a0b0 },
+		/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+		{ 0x000000019f09ab54, 0x00000001d8effb5c },
+		/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+		{ 0x0000000125155542, 0x00000001d969853c },
+		/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+		{ 0x000000018fdb5882, 0x00000000523ccce2 },
+		/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+		{ 0x00000000e794b3f4, 0x000000001e2436bc },
+		/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+		{ 0x000000016f9bb022, 0x00000000ddd1c3a2 },
+		/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+		{ 0x00000000290c9978, 0x0000000019fcfe38 },
+		/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+		{ 0x0000000083c0f350, 0x00000001ce95db64 },
+		/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+		{ 0x0000000173ea6628, 0x00000000af582806 },
+		/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+		{ 0x00000001c8b4e00a, 0x00000001006388f6 },
+		/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+		{ 0x00000000de95d6aa, 0x0000000179eca00a },
+		/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+		{ 0x000000010b7f7248, 0x0000000122410a6a },
+		/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+		{ 0x00000001326e3a06, 0x000000004288e87c },
+		/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+		{ 0x00000000bb62c2e6, 0x000000016c5490da },
+		/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+		{ 0x0000000156a4b2c2, 0x00000000d1c71f6e },
+		/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+		{ 0x000000011dfe763a, 0x00000001b4ce08a6 },
+		/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+		{ 0x000000007bcca8e2, 0x00000001466ba60c },
+		/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+		{ 0x0000000186118faa, 0x00000001f6c488a4 },
+		/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+		{ 0x0000000111a65a88, 0x000000013bfb0682 },
+		/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+		{ 0x000000003565e1c4, 0x00000000690e9e54 },
+		/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+		{ 0x000000012ed02a82, 0x00000000281346b6 },
+		/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+		{ 0x00000000c486ecfc, 0x0000000156464024 },
+		/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+		{ 0x0000000001b951b2, 0x000000016063a8dc },
+		/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+		{ 0x0000000048143916, 0x0000000116a66362 },
+		/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+		{ 0x00000001dc2ae124, 0x000000017e8aa4d2 },
+		/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+		{ 0x00000001416c58d6, 0x00000001728eb10c },
+		/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+		{ 0x00000000a479744a, 0x00000001b08fd7fa },
+		/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+		{ 0x0000000096ca3a26, 0x00000001092a16e8 },
+		/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+		{ 0x00000000ff223d4e, 0x00000000a505637c },
+		/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+		{ 0x000000010e84da42, 0x00000000d94869b2 },
+		/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+		{ 0x00000001b61ba3d0, 0x00000001c8b203ae },
+		/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+		{ 0x00000000680f2de8, 0x000000005704aea0 },
+		/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+		{ 0x000000008772a9a8, 0x000000012e295fa2 },
+		/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+		{ 0x0000000155f295bc, 0x000000011d0908bc },
+		/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+		{ 0x00000000595f9282, 0x0000000193ed97ea },
+		/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+		{ 0x0000000164b1c25a, 0x000000013a0f1c52 },
+		/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+		{ 0x00000000fbd67c50, 0x000000010c2c40c0 },
+		/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+		{ 0x0000000096076268, 0x00000000ff6fac3e },
+		/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+		{ 0x00000001d288e4cc, 0x000000017b3609c0 },
+		/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+		{ 0x00000001eaac1bdc, 0x0000000088c8c922 },
+		/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+		{ 0x00000001f1ea39e2, 0x00000001751baae6 },
+		/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+		{ 0x00000001eb6506fc, 0x0000000107952972 },
+		/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+		{ 0x000000010f806ffe, 0x0000000162b00abe },
+		/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+		{ 0x000000010408481e, 0x000000000d7b404c },
+		/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+		{ 0x0000000188260534, 0x00000000763b13d4 },
+		/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+		{ 0x0000000058fc73e0, 0x00000000f6dc22d8 },
+		/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+		{ 0x00000000391c59b8, 0x000000007daae060 },
+		/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+		{ 0x000000018b638400, 0x000000013359ab7c },
+		/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+		{ 0x000000011738f5c4, 0x000000008add438a },
+		/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+		{ 0x000000008cf7c6da, 0x00000001edbefdea },
+		/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+		{ 0x00000001ef97fb16, 0x000000004104e0f8 },
+		/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+		{ 0x0000000102130e20, 0x00000000b48a8222 },
+		/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+		{ 0x00000000db968898, 0x00000001bcb46844 },
+		/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+		{ 0x00000000b5047b5e, 0x000000013293ce0a },
+		/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+		{ 0x000000010b90fdb2, 0x00000001710d0844 },
+		/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+		{ 0x000000004834a32e, 0x0000000117907f6e },
+		/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+		{ 0x0000000059c8f2b0, 0x0000000087ddf93e },
+		/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+		{ 0x0000000122cec508, 0x000000005970e9b0 },
+		/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+		{ 0x000000000a330cda, 0x0000000185b2b7d0 },
+		/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+		{ 0x000000014a47148c, 0x00000001dcee0efc },
+		/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+		{ 0x0000000042c61cb8, 0x0000000030da2722 },
+		/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+		{ 0x0000000012fe6960, 0x000000012f925a18 },
+		/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+		{ 0x00000000dbda2c20, 0x00000000dd2e357c },
+		/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+		{ 0x000000011122410c, 0x00000000071c80de },
+		/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+		{ 0x00000000977b2070, 0x000000011513140a },
+		/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+		{ 0x000000014050438e, 0x00000001df876e8e },
+		/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+		{ 0x0000000147c840e8, 0x000000015f81d6ce },
+		/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+		{ 0x00000001cc7c88ce, 0x000000019dd94dbe },
+		/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+		{ 0x00000001476b35a4, 0x00000001373d206e },
+		/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+		{ 0x000000013d52d508, 0x00000000668ccade },
+		/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+		{ 0x000000008e4be32e, 0x00000001b192d268 },
+		/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+		{ 0x00000000024120fe, 0x00000000e30f3a78 },
+		/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+		{ 0x00000000ddecddb4, 0x000000010ef1f7bc },
+		/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+		{ 0x00000000d4d403bc, 0x00000001f5ac7380 },
+		/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+		{ 0x00000001734b89aa, 0x000000011822ea70 },
+		/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+		{ 0x000000010e7a58d6, 0x00000000c3a33848 },
+		/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+		{ 0x00000001f9f04e9c, 0x00000001bd151c24 },
+		/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+		{ 0x00000000b692225e, 0x0000000056002d76 },
+		/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+		{ 0x000000019b8d3f3e, 0x000000014657c4f4 },
+		/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+		{ 0x00000001a874f11e, 0x0000000113742d7c },
+		/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+		{ 0x000000010d5a4254, 0x000000019c5920ba },
+		/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+		{ 0x00000000bbb2f5d6, 0x000000005216d2d6 },
+		/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+		{ 0x0000000179cc0e36, 0x0000000136f5ad8a },
+		/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+		{ 0x00000001dca1da4a, 0x000000018b07beb6 },
+		/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+		{ 0x00000000feb1a192, 0x00000000db1e93b0 },
+		/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+		{ 0x00000000d1eeedd6, 0x000000000b96fa3a },
+		/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+		{ 0x000000008fad9bb4, 0x00000001d9968af0 },
+		/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+		{ 0x00000001884938e4, 0x000000000e4a77a2 },
+		/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+		{ 0x00000001bc2e9bc0, 0x00000000508c2ac8 },
+		/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+		{ 0x00000001f9658a68, 0x0000000021572a80 },
+		/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+		{ 0x000000001b9224fc, 0x00000001b859daf2 },
+		/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+		{ 0x0000000055b2fb84, 0x000000016f788474 },
+		/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+		{ 0x000000018b090348, 0x00000001b438810e },
+		/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+		{ 0x000000011ccbd5ea, 0x0000000095ddc6f2 },
+		/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+		{ 0x0000000007ae47f8, 0x00000001d977c20c },
+		/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+		{ 0x0000000172acbec0, 0x00000000ebedb99a },
+		/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+		{ 0x00000001c6e3ff20, 0x00000001df9e9e92 },
+		/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+		{ 0x00000000e1b38744, 0x00000001a4a3f952 },
+		/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+		{ 0x00000000791585b2, 0x00000000e2f51220 },
+		/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+		{ 0x00000000ac53b894, 0x000000004aa01f3e },
+		/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+		{ 0x00000001ed5f2cf4, 0x00000000b3e90a58 },
+		/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+		{ 0x00000001df48b2e0, 0x000000000c9ca2aa },
+		/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+		{ 0x00000000049c1c62, 0x0000000151682316 },
+		/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+		{ 0x000000017c460c12, 0x0000000036fce78c },
+		/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+		{ 0x000000015be4da7e, 0x000000009037dc10 },
+		/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+		{ 0x000000010f38f668, 0x00000000d3298582 },
+		/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+		{ 0x0000000039f40a00, 0x00000001b42e8ad6 },
+		/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+		{ 0x00000000bd4c10c4, 0x00000000142a9838 },
+		/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+		{ 0x0000000042db1d98, 0x0000000109c7f190 },
+		/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+		{ 0x00000001c905bae6, 0x0000000056ff9310 },
+		/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+		{ 0x00000000069d40ea, 0x00000001594513aa },
+		/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+		{ 0x000000008e4fbad0, 0x00000001e3b5b1e8 },
+		/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+		{ 0x0000000047bedd46, 0x000000011dd5fc08 },
+		/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+		{ 0x0000000026396bf8, 0x00000001675f0cc2 },
+		/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+		{ 0x00000000379beb92, 0x00000000d1c8dd44 },
+		/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+		{ 0x000000000abae54a, 0x0000000115ebd3d8 },
+		/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+		{ 0x0000000007e6a128, 0x00000001ecbd0dac },
+		/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+		{ 0x000000000ade29d2, 0x00000000cdf67af2 },
+		/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+		{ 0x00000000f974c45c, 0x000000004c01ff4c },
+		/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+		{ 0x00000000e77ac60a, 0x00000000f2d8657e },
+		/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+		{ 0x0000000145895816, 0x000000006bae74c4 },
+		/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+		{ 0x0000000038e362be, 0x0000000152af8aa0 },
+		/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+		{ 0x000000007f991a64, 0x0000000004663802 },
+		/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+		{ 0x00000000fa366d3a, 0x00000001ab2f5afc },
+		/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+		{ 0x00000001a2bb34f0, 0x0000000074a4ebd4 },
+		/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+		{ 0x0000000028a9981e, 0x00000001d7ab3a4c },
+		/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+		{ 0x00000001dbc672be, 0x00000001a8da60c6 },
+		/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+		{ 0x00000000b04d77f6, 0x000000013cf63820 },
+		/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+		{ 0x0000000124400d96, 0x00000000bec12e1e },
+		/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+		{ 0x000000014ca4b414, 0x00000001c6368010 },
+		/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+		{ 0x000000012fe2c938, 0x00000001e6e78758 },
+		/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+		{ 0x00000001faed01e6, 0x000000008d7f2b3c },
+		/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+		{ 0x000000007e80ecfe, 0x000000016b4a156e },
+		/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+		{ 0x0000000098daee94, 0x00000001c63cfeb6 },
+		/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+		{ 0x000000010a04edea, 0x000000015f902670 },
+		/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+		{ 0x00000001c00b4524, 0x00000001cd5de11e },
+		/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+		{ 0x0000000170296550, 0x000000001acaec54 },
+		/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+		{ 0x0000000181afaa48, 0x000000002bd0ca78 },
+		/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+		{ 0x0000000185a31ffa, 0x0000000032d63d5c },
+		/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+		{ 0x000000002469f608, 0x000000001c6d4e4c },
+		/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+		{ 0x000000006980102a, 0x0000000106a60b92 },
+		/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+		{ 0x0000000111ea9ca8, 0x00000000d3855e12 },
+		/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+		{ 0x00000001bd1d29ce, 0x00000000e3125636 },
+		/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+		{ 0x00000001b34b9580, 0x000000009e8f7ea4 },
+		/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+		{ 0x000000003076054e, 0x00000001c82e562c },
+		/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+		{ 0x000000012a608ea4, 0x00000000ca9f09ce },
+		/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+		{ 0x00000000784d05fe, 0x00000000c63764e6 },
+		/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+		{ 0x000000016ef0d82a, 0x0000000168d2e49e },
+		/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+		{ 0x0000000075bda454, 0x00000000e986c148 },
+		/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+		{ 0x000000003dc0a1c4, 0x00000000cfb65894 },
+		/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+		{ 0x00000000e9a5d8be, 0x0000000111cadee4 },
+		/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+		{ 0x00000001609bc4b4, 0x0000000171fb63ce }
+#else /* __LITTLE_ENDIAN__ */
+		/* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */
+		{ 0x00000000b6ca9e20, 0x000000009c37c408 },
+		/* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */
+		{ 0x00000000350249a8, 0x00000001b51df26c },
+		/* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */
+		{ 0x00000001862dac54, 0x000000000724b9d0 },
+		/* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */
+		{ 0x00000001d87fb48c, 0x00000001c00532fe },
+		/* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */
+		{ 0x00000001f39b699e, 0x00000000f05a9362 },
+		/* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */
+		{ 0x0000000101da11b4, 0x00000001e1007970 },
+		/* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */
+		{ 0x00000001cab571e0, 0x00000000a57366ee },
+		/* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */
+		{ 0x00000000c7020cfe, 0x0000000192011284 },
+		/* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */
+		{ 0x00000000cdaed1ae, 0x0000000162716d9a },
+		/* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */
+		{ 0x00000001e804effc, 0x00000000cd97ecde },
+		/* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */
+		{ 0x0000000077c3ea3a, 0x0000000058812bc0 },
+		/* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */
+		{ 0x0000000068df31b4, 0x0000000088b8c12e },
+		/* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */
+		{ 0x00000000b059b6c2, 0x00000001230b234c },
+		/* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */
+		{ 0x0000000145fb8ed8, 0x00000001120b416e },
+		/* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */
+		{ 0x00000000cbc09168, 0x00000001974aecb0 },
+		/* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */
+		{ 0x000000005ceeedc2, 0x000000008ee3f226 },
+		/* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */
+		{ 0x0000000047d74e86, 0x00000001089aba9a },
+		/* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */
+		{ 0x00000001407e9e22, 0x0000000065113872 },
+		/* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */
+		{ 0x00000001da967bda, 0x000000005c07ec10 },
+		/* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */
+		{ 0x000000006c898368, 0x0000000187590924 },
+		/* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */
+		{ 0x00000000f2d14c98, 0x00000000e35da7c6 },
+		/* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */
+		{ 0x00000001993c6ad4, 0x000000000415855a },
+		/* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */
+		{ 0x000000014683d1ac, 0x0000000073617758 },
+		/* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */
+		{ 0x00000001a7c93e6c, 0x0000000176021d28 },
+		/* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */
+		{ 0x000000010211e90a, 0x00000001c358fd0a },
+		/* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */
+		{ 0x000000001119403e, 0x00000001ff7a2c18 },
+		/* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */
+		{ 0x000000001c3261aa, 0x00000000f2d9f7e4 },
+		/* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */
+		{ 0x000000014e37a634, 0x000000016cf1f9c8 },
+		/* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */
+		{ 0x0000000073786c0c, 0x000000010af9279a },
+		/* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */
+		{ 0x000000011dc037f8, 0x0000000004f101e8 },
+		/* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */
+		{ 0x0000000031433dfc, 0x0000000070bcf184 },
+		/* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */
+		{ 0x000000009cde8348, 0x000000000a8de642 },
+		/* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */
+		{ 0x0000000038d3c2a6, 0x0000000062ea130c },
+		/* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */
+		{ 0x000000011b25f260, 0x00000001eb31cbb2 },
+		/* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */
+		{ 0x000000001629e6f0, 0x0000000170783448 },
+		/* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */
+		{ 0x0000000160838b4c, 0x00000001a684b4c6 },
+		/* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */
+		{ 0x000000007a44011c, 0x00000000253ca5b4 },
+		/* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */
+		{ 0x00000000226f417a, 0x0000000057b4b1e2 },
+		/* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */
+		{ 0x0000000045eb2eb4, 0x00000000b6bd084c },
+		/* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */
+		{ 0x000000014459d70c, 0x0000000123c2d592 },
+		/* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */
+		{ 0x00000001d406ed82, 0x00000000159dafce },
+		/* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */
+		{ 0x0000000160c8e1a8, 0x0000000127e1a64e },
+		/* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */
+		{ 0x0000000027ba8098, 0x0000000056860754 },
+		/* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */
+		{ 0x000000006d92d018, 0x00000001e661aae8 },
+		/* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */
+		{ 0x000000012ed7e3f2, 0x00000000f82c6166 },
+		/* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */
+		{ 0x000000002dc87788, 0x00000000c4f9c7ae },
+		/* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */
+		{ 0x0000000018240bb8, 0x0000000074203d20 },
+		/* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */
+		{ 0x000000001ad38158, 0x0000000198173052 },
+		/* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */
+		{ 0x00000001396b78f2, 0x00000001ce8aba54 },
+		/* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */
+		{ 0x000000011a681334, 0x00000001850d5d94 },
+		/* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */
+		{ 0x000000012104732e, 0x00000001d609239c },
+		/* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */
+		{ 0x00000000a140d90c, 0x000000001595f048 },
+		/* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */
+		{ 0x00000001b7215eda, 0x0000000042ccee08 },
+		/* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */
+		{ 0x00000001aaf1df3c, 0x000000010a389d74 },
+		/* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */
+		{ 0x0000000029d15b8a, 0x000000012a840da6 },
+		/* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */
+		{ 0x00000000f1a96922, 0x000000001d181c0c },
+		/* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */
+		{ 0x00000001ac80d03c, 0x0000000068b7d1f6 },
+		/* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */
+		{ 0x000000000f11d56a, 0x000000005b0f14fc },
+		/* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */
+		{ 0x00000001f1c022a2, 0x0000000179e9e730 },
+		/* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */
+		{ 0x0000000173d00ae2, 0x00000001ce1368d6 },
+		/* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */
+		{ 0x00000001d4ffe4ac, 0x0000000112c3a84c },
+		/* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */
+		{ 0x000000016edc5ae4, 0x00000000de940fee },
+		/* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */
+		{ 0x00000001f1a02140, 0x00000000fe896b7e },
+		/* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */
+		{ 0x00000000ca0b28a0, 0x00000001f797431c },
+		/* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */
+		{ 0x00000001928e30a2, 0x0000000053e989ba },
+		/* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */
+		{ 0x0000000097b1b002, 0x000000003920cd16 },
+		/* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */
+		{ 0x00000000b15bf906, 0x00000001e6f579b8 },
+		/* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */
+		{ 0x00000000411c5d52, 0x000000007493cb0a },
+		/* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */
+		{ 0x00000001c36f3300, 0x00000001bdd376d8 },
+		/* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */
+		{ 0x00000001119227e0, 0x000000016badfee6 },
+		/* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */
+		{ 0x00000000114d4702, 0x0000000071de5c58 },
+		/* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */
+		{ 0x00000000458b5b98, 0x00000000453f317c },
+		/* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */
+		{ 0x000000012e31fb8e, 0x0000000121675cce },
+		/* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */
+		{ 0x000000005cf619d8, 0x00000001f409ee92 },
+		/* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */
+		{ 0x0000000063f4d8b2, 0x00000000f36b9c88 },
+		/* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */
+		{ 0x000000004138dc8a, 0x0000000036b398f4 },
+		/* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */
+		{ 0x00000001d29ee8e0, 0x00000001748f9adc },
+		/* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */
+		{ 0x000000006a08ace8, 0x00000001be94ec00 },
+		/* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */
+		{ 0x0000000127d42010, 0x00000000b74370d6 },
+		/* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */
+		{ 0x0000000019d76b62, 0x00000001174d0b98 },
+		/* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */
+		{ 0x00000001b1471f6e, 0x00000000befc06a4 },
+		/* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */
+		{ 0x00000001f64c19cc, 0x00000001ae125288 },
+		/* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */
+		{ 0x00000000003c0ea0, 0x0000000095c19b34 },
+		/* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */
+		{ 0x000000014d73abf6, 0x00000001a78496f2 },
+		/* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */
+		{ 0x00000001620eb844, 0x00000001ac5390a0 },
+		/* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */
+		{ 0x0000000147655048, 0x000000002a80ed6e },
+		/* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */
+		{ 0x0000000067b5077e, 0x00000001fa9b0128 },
+		/* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */
+		{ 0x0000000010ffe206, 0x00000001ea94929e },
+		/* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */
+		{ 0x000000000fee8f1e, 0x0000000125f4305c },
+		/* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */
+		{ 0x00000001da26fbae, 0x00000001471e2002 },
+		/* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */
+		{ 0x00000001b3a8bd88, 0x0000000132d2253a },
+		/* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */
+		{ 0x00000000e8f3898e, 0x00000000f26b3592 },
+		/* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */
+		{ 0x00000000b0d0d28c, 0x00000000bc8b67b0 },
+		/* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */
+		{ 0x0000000030f2a798, 0x000000013a826ef2 },
+		/* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */
+		{ 0x000000000fba1002, 0x0000000081482c84 },
+		/* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */
+		{ 0x00000000bdb9bd72, 0x00000000e77307c2 },
+		/* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */
+		{ 0x0000000075d3bf5a, 0x00000000d4a07ec8 },
+		/* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */
+		{ 0x00000000ef1f98a0, 0x0000000017102100 },
+		/* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */
+		{ 0x00000000689c7602, 0x00000000db406486 },
+		/* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */
+		{ 0x000000016d5fa5fe, 0x0000000192db7f88 },
+		/* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */
+		{ 0x00000001d0d2b9ca, 0x000000018bf67b1e },
+		/* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */
+		{ 0x0000000041e7b470, 0x000000007c09163e },
+		/* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */
+		{ 0x00000001cbb6495e, 0x000000000adac060 },
+		/* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */
+		{ 0x000000010052a0b0, 0x00000000bd8316ae },
+		/* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */
+		{ 0x00000001d8effb5c, 0x000000019f09ab54 },
+		/* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */
+		{ 0x00000001d969853c, 0x0000000125155542 },
+		/* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */
+		{ 0x00000000523ccce2, 0x000000018fdb5882 },
+		/* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */
+		{ 0x000000001e2436bc, 0x00000000e794b3f4 },
+		/* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */
+		{ 0x00000000ddd1c3a2, 0x000000016f9bb022 },
+		/* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */
+		{ 0x0000000019fcfe38, 0x00000000290c9978 },
+		/* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */
+		{ 0x00000001ce95db64, 0x0000000083c0f350 },
+		/* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */
+		{ 0x00000000af582806, 0x0000000173ea6628 },
+		/* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */
+		{ 0x00000001006388f6, 0x00000001c8b4e00a },
+		/* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */
+		{ 0x0000000179eca00a, 0x00000000de95d6aa },
+		/* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */
+		{ 0x0000000122410a6a, 0x000000010b7f7248 },
+		/* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */
+		{ 0x000000004288e87c, 0x00000001326e3a06 },
+		/* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */
+		{ 0x000000016c5490da, 0x00000000bb62c2e6 },
+		/* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */
+		{ 0x00000000d1c71f6e, 0x0000000156a4b2c2 },
+		/* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */
+		{ 0x00000001b4ce08a6, 0x000000011dfe763a },
+		/* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */
+		{ 0x00000001466ba60c, 0x000000007bcca8e2 },
+		/* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */
+		{ 0x00000001f6c488a4, 0x0000000186118faa },
+		/* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */
+		{ 0x000000013bfb0682, 0x0000000111a65a88 },
+		/* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */
+		{ 0x00000000690e9e54, 0x000000003565e1c4 },
+		/* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */
+		{ 0x00000000281346b6, 0x000000012ed02a82 },
+		/* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */
+		{ 0x0000000156464024, 0x00000000c486ecfc },
+		/* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */
+		{ 0x000000016063a8dc, 0x0000000001b951b2 },
+		/* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */
+		{ 0x0000000116a66362, 0x0000000048143916 },
+		/* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */
+		{ 0x000000017e8aa4d2, 0x00000001dc2ae124 },
+		/* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */
+		{ 0x00000001728eb10c, 0x00000001416c58d6 },
+		/* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */
+		{ 0x00000001b08fd7fa, 0x00000000a479744a },
+		/* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */
+		{ 0x00000001092a16e8, 0x0000000096ca3a26 },
+		/* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */
+		{ 0x00000000a505637c, 0x00000000ff223d4e },
+		/* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */
+		{ 0x00000000d94869b2, 0x000000010e84da42 },
+		/* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */
+		{ 0x00000001c8b203ae, 0x00000001b61ba3d0 },
+		/* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */
+		{ 0x000000005704aea0, 0x00000000680f2de8 },
+		/* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */
+		{ 0x000000012e295fa2, 0x000000008772a9a8 },
+		/* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */
+		{ 0x000000011d0908bc, 0x0000000155f295bc },
+		/* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */
+		{ 0x0000000193ed97ea, 0x00000000595f9282 },
+		/* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */
+		{ 0x000000013a0f1c52, 0x0000000164b1c25a },
+		/* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */
+		{ 0x000000010c2c40c0, 0x00000000fbd67c50 },
+		/* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */
+		{ 0x00000000ff6fac3e, 0x0000000096076268 },
+		/* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */
+		{ 0x000000017b3609c0, 0x00000001d288e4cc },
+		/* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */
+		{ 0x0000000088c8c922, 0x00000001eaac1bdc },
+		/* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */
+		{ 0x00000001751baae6, 0x00000001f1ea39e2 },
+		/* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */
+		{ 0x0000000107952972, 0x00000001eb6506fc },
+		/* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */
+		{ 0x0000000162b00abe, 0x000000010f806ffe },
+		/* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */
+		{ 0x000000000d7b404c, 0x000000010408481e },
+		/* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */
+		{ 0x00000000763b13d4, 0x0000000188260534 },
+		/* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */
+		{ 0x00000000f6dc22d8, 0x0000000058fc73e0 },
+		/* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */
+		{ 0x000000007daae060, 0x00000000391c59b8 },
+		/* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */
+		{ 0x000000013359ab7c, 0x000000018b638400 },
+		/* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */
+		{ 0x000000008add438a, 0x000000011738f5c4 },
+		/* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */
+		{ 0x00000001edbefdea, 0x000000008cf7c6da },
+		/* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */
+		{ 0x000000004104e0f8, 0x00000001ef97fb16 },
+		/* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */
+		{ 0x00000000b48a8222, 0x0000000102130e20 },
+		/* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */
+		{ 0x00000001bcb46844, 0x00000000db968898 },
+		/* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */
+		{ 0x000000013293ce0a, 0x00000000b5047b5e },
+		/* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */
+		{ 0x00000001710d0844, 0x000000010b90fdb2 },
+		/* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */
+		{ 0x0000000117907f6e, 0x000000004834a32e },
+		/* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */
+		{ 0x0000000087ddf93e, 0x0000000059c8f2b0 },
+		/* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */
+		{ 0x000000005970e9b0, 0x0000000122cec508 },
+		/* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */
+		{ 0x0000000185b2b7d0, 0x000000000a330cda },
+		/* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */
+		{ 0x00000001dcee0efc, 0x000000014a47148c },
+		/* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */
+		{ 0x0000000030da2722, 0x0000000042c61cb8 },
+		/* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */
+		{ 0x000000012f925a18, 0x0000000012fe6960 },
+		/* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */
+		{ 0x00000000dd2e357c, 0x00000000dbda2c20 },
+		/* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */
+		{ 0x00000000071c80de, 0x000000011122410c },
+		/* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */
+		{ 0x000000011513140a, 0x00000000977b2070 },
+		/* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */
+		{ 0x00000001df876e8e, 0x000000014050438e },
+		/* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */
+		{ 0x000000015f81d6ce, 0x0000000147c840e8 },
+		/* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */
+		{ 0x000000019dd94dbe, 0x00000001cc7c88ce },
+		/* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */
+		{ 0x00000001373d206e, 0x00000001476b35a4 },
+		/* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */
+		{ 0x00000000668ccade, 0x000000013d52d508 },
+		/* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */
+		{ 0x00000001b192d268, 0x000000008e4be32e },
+		/* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */
+		{ 0x00000000e30f3a78, 0x00000000024120fe },
+		/* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */
+		{ 0x000000010ef1f7bc, 0x00000000ddecddb4 },
+		/* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */
+		{ 0x00000001f5ac7380, 0x00000000d4d403bc },
+		/* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */
+		{ 0x000000011822ea70, 0x00000001734b89aa },
+		/* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */
+		{ 0x00000000c3a33848, 0x000000010e7a58d6 },
+		/* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */
+		{ 0x00000001bd151c24, 0x00000001f9f04e9c },
+		/* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */
+		{ 0x0000000056002d76, 0x00000000b692225e },
+		/* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */
+		{ 0x000000014657c4f4, 0x000000019b8d3f3e },
+		/* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */
+		{ 0x0000000113742d7c, 0x00000001a874f11e },
+		/* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */
+		{ 0x000000019c5920ba, 0x000000010d5a4254 },
+		/* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */
+		{ 0x000000005216d2d6, 0x00000000bbb2f5d6 },
+		/* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */
+		{ 0x0000000136f5ad8a, 0x0000000179cc0e36 },
+		/* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */
+		{ 0x000000018b07beb6, 0x00000001dca1da4a },
+		/* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */
+		{ 0x00000000db1e93b0, 0x00000000feb1a192 },
+		/* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */
+		{ 0x000000000b96fa3a, 0x00000000d1eeedd6 },
+		/* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */
+		{ 0x00000001d9968af0, 0x000000008fad9bb4 },
+		/* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */
+		{ 0x000000000e4a77a2, 0x00000001884938e4 },
+		/* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */
+		{ 0x00000000508c2ac8, 0x00000001bc2e9bc0 },
+		/* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */
+		{ 0x0000000021572a80, 0x00000001f9658a68 },
+		/* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */
+		{ 0x00000001b859daf2, 0x000000001b9224fc },
+		/* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */
+		{ 0x000000016f788474, 0x0000000055b2fb84 },
+		/* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */
+		{ 0x00000001b438810e, 0x000000018b090348 },
+		/* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */
+		{ 0x0000000095ddc6f2, 0x000000011ccbd5ea },
+		/* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */
+		{ 0x00000001d977c20c, 0x0000000007ae47f8 },
+		/* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */
+		{ 0x00000000ebedb99a, 0x0000000172acbec0 },
+		/* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */
+		{ 0x00000001df9e9e92, 0x00000001c6e3ff20 },
+		/* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */
+		{ 0x00000001a4a3f952, 0x00000000e1b38744 },
+		/* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */
+		{ 0x00000000e2f51220, 0x00000000791585b2 },
+		/* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */
+		{ 0x000000004aa01f3e, 0x00000000ac53b894 },
+		/* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */
+		{ 0x00000000b3e90a58, 0x00000001ed5f2cf4 },
+		/* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */
+		{ 0x000000000c9ca2aa, 0x00000001df48b2e0 },
+		/* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */
+		{ 0x0000000151682316, 0x00000000049c1c62 },
+		/* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */
+		{ 0x0000000036fce78c, 0x000000017c460c12 },
+		/* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */
+		{ 0x000000009037dc10, 0x000000015be4da7e },
+		/* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */
+		{ 0x00000000d3298582, 0x000000010f38f668 },
+		/* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */
+		{ 0x00000001b42e8ad6, 0x0000000039f40a00 },
+		/* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */
+		{ 0x00000000142a9838, 0x00000000bd4c10c4 },
+		/* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */
+		{ 0x0000000109c7f190, 0x0000000042db1d98 },
+		/* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */
+		{ 0x0000000056ff9310, 0x00000001c905bae6 },
+		/* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */
+		{ 0x00000001594513aa, 0x00000000069d40ea },
+		/* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */
+		{ 0x00000001e3b5b1e8, 0x000000008e4fbad0 },
+		/* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */
+		{ 0x000000011dd5fc08, 0x0000000047bedd46 },
+		/* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */
+		{ 0x00000001675f0cc2, 0x0000000026396bf8 },
+		/* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */
+		{ 0x00000000d1c8dd44, 0x00000000379beb92 },
+		/* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */
+		{ 0x0000000115ebd3d8, 0x000000000abae54a },
+		/* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */
+		{ 0x00000001ecbd0dac, 0x0000000007e6a128 },
+		/* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */
+		{ 0x00000000cdf67af2, 0x000000000ade29d2 },
+		/* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */
+		{ 0x000000004c01ff4c, 0x00000000f974c45c },
+		/* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */
+		{ 0x00000000f2d8657e, 0x00000000e77ac60a },
+		/* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */
+		{ 0x000000006bae74c4, 0x0000000145895816 },
+		/* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */
+		{ 0x0000000152af8aa0, 0x0000000038e362be },
+		/* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */
+		{ 0x0000000004663802, 0x000000007f991a64 },
+		/* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */
+		{ 0x00000001ab2f5afc, 0x00000000fa366d3a },
+		/* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */
+		{ 0x0000000074a4ebd4, 0x00000001a2bb34f0 },
+		/* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */
+		{ 0x00000001d7ab3a4c, 0x0000000028a9981e },
+		/* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */
+		{ 0x00000001a8da60c6, 0x00000001dbc672be },
+		/* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */
+		{ 0x000000013cf63820, 0x00000000b04d77f6 },
+		/* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */
+		{ 0x00000000bec12e1e, 0x0000000124400d96 },
+		/* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */
+		{ 0x00000001c6368010, 0x000000014ca4b414 },
+		/* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */
+		{ 0x00000001e6e78758, 0x000000012fe2c938 },
+		/* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */
+		{ 0x000000008d7f2b3c, 0x00000001faed01e6 },
+		/* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */
+		{ 0x000000016b4a156e, 0x000000007e80ecfe },
+		/* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */
+		{ 0x00000001c63cfeb6, 0x0000000098daee94 },
+		/* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */
+		{ 0x000000015f902670, 0x000000010a04edea },
+		/* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */
+		{ 0x00000001cd5de11e, 0x00000001c00b4524 },
+		/* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */
+		{ 0x000000001acaec54, 0x0000000170296550 },
+		/* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */
+		{ 0x000000002bd0ca78, 0x0000000181afaa48 },
+		/* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */
+		{ 0x0000000032d63d5c, 0x0000000185a31ffa },
+		/* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */
+		{ 0x000000001c6d4e4c, 0x000000002469f608 },
+		/* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */
+		{ 0x0000000106a60b92, 0x000000006980102a },
+		/* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */
+		{ 0x00000000d3855e12, 0x0000000111ea9ca8 },
+		/* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */
+		{ 0x00000000e3125636, 0x00000001bd1d29ce },
+		/* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */
+		{ 0x000000009e8f7ea4, 0x00000001b34b9580 },
+		/* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */
+		{ 0x00000001c82e562c, 0x000000003076054e },
+		/* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */
+		{ 0x00000000ca9f09ce, 0x000000012a608ea4 },
+		/* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */
+		{ 0x00000000c63764e6, 0x00000000784d05fe },
+		/* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */
+		{ 0x0000000168d2e49e, 0x000000016ef0d82a },
+		/* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */
+		{ 0x00000000e986c148, 0x0000000075bda454 },
+		/* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */
+		{ 0x00000000cfb65894, 0x000000003dc0a1c4 },
+		/* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */
+		{ 0x0000000111cadee4, 0x00000000e9a5d8be },
+		/* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */
+		{ 0x0000000171fb63ce, 0x00000001609bc4b4 }
+#endif /* __LITTLE_ENDIAN__ */
+	};
+
+/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */
+
+static const __vector unsigned long long vcrc_short_const[16]
+	__attribute__((aligned (16))) = {
+#ifdef __LITTLE_ENDIAN__
+		/* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x)  */
+		{ 0x5cf015c388e56f72, 0x7fec2963e5bf8048 },
+		/* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x)  */
+		{ 0x963a18920246e2e6, 0x38e888d4844752a9 },
+		/* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x)  */
+		{ 0x419a441956993a31, 0x42316c00730206ad },
+		/* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x)  */
+		{ 0x924752ba2b830011, 0x543d5c543e65ddf9 },
+		/* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x)  */
+		{ 0x55bd7f9518e4a304, 0x78e87aaf56767c92 },
+		/* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x)  */
+		{ 0x6d76739fe0553f1e, 0x8f68fcec1903da7f },
+		/* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x)  */
+		{ 0xc133722b1fe0b5c3, 0x3f4840246791d588 },
+		/* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x)  */
+		{ 0x64b67ee0e55ef1f3, 0x34c96751b04de25a },
+		/* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x)  */
+		{ 0x069db049b8fdb1e7, 0x156c8e180b4a395b },
+		/* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x)  */
+		{ 0xa11bfaf3c9e90b9e, 0xe0b99ccbe661f7be },
+		/* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x)  */
+		{ 0x817cdc5119b29a35, 0x041d37768cd75659 },
+		/* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x)  */
+		{ 0x1ce9d94b36c41f1c, 0x3a0777818cfaa965 },
+		/* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x)  */
+		{ 0x4f256efcb82be955, 0x0e148e8252377a55 },
+		/* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x)  */
+		{ 0xec1631edb2dea967, 0x9c25531d19e65dde },
+		/* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x)  */
+		{ 0x5d27e147510ac59a, 0x790606ff9957c0a6 },
+		/* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x)  */
+		{ 0xa66805eb18b8ea18, 0x82f63b786ea2d55c }
+#else /* __LITTLE_ENDIAN__ */
+		/* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x)  */
+		{ 0x7fec2963e5bf8048, 0x5cf015c388e56f72 },
+		/* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x)  */
+		{ 0x38e888d4844752a9, 0x963a18920246e2e6 },
+		/* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x)  */
+		{ 0x42316c00730206ad, 0x419a441956993a31 },
+		/* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x)  */
+		{ 0x543d5c543e65ddf9, 0x924752ba2b830011 },
+		/* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x)  */
+		{ 0x78e87aaf56767c92, 0x55bd7f9518e4a304 },
+		/* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x)  */
+		{ 0x8f68fcec1903da7f, 0x6d76739fe0553f1e },
+		/* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x)  */
+		{ 0x3f4840246791d588, 0xc133722b1fe0b5c3 },
+		/* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x)  */
+		{ 0x34c96751b04de25a, 0x64b67ee0e55ef1f3 },
+		/* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x)  */
+		{ 0x156c8e180b4a395b, 0x069db049b8fdb1e7 },
+		/* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x)  */
+		{ 0xe0b99ccbe661f7be, 0xa11bfaf3c9e90b9e },
+		/* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x)  */
+		{ 0x041d37768cd75659, 0x817cdc5119b29a35 },
+		/* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x)  */
+		{ 0x3a0777818cfaa965, 0x1ce9d94b36c41f1c },
+		/* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x)  */
+		{ 0x0e148e8252377a55, 0x4f256efcb82be955 },
+		/* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x)  */
+		{ 0x9c25531d19e65dde, 0xec1631edb2dea967 },
+		/* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x)  */
+		{ 0x790606ff9957c0a6, 0x5d27e147510ac59a },
+		/* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x)  */
+		{ 0x82f63b786ea2d55c, 0xa66805eb18b8ea18 }
+#endif /* __LITTLE_ENDIAN__ */
+	};
+
+/* Barrett constants */
+/* 33 bit reflected Barrett constant m - (4^32)/n */
+
+static const __vector unsigned long long v_Barrett_const[2]
+	__attribute__((aligned (16))) = {
+		/* x^64 div p(x)  */
+#ifdef __LITTLE_ENDIAN__
+		{ 0x00000000dea713f1, 0x0000000000000000 },
+		{ 0x0000000105ec76f1, 0x0000000000000000 }
+#else /* __LITTLE_ENDIAN__ */
+		{ 0x0000000000000000, 0x00000000dea713f1 },
+		{ 0x0000000000000000, 0x0000000105ec76f1 }
+#endif /* __LITTLE_ENDIAN__ */
+	};
+#endif /* POWER8_INTRINSICS */
+
+#endif /* __ASSEMBLER__ */
diff --git a/contrib/crc32-vpmsum-cmake/vec_crc32.h b/contrib/crc32-vpmsum-cmake/vec_crc32.h
new file mode 100644
index 00000000000..0ef13616b34
--- /dev/null
+++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h
@@ -0,0 +1,29 @@
+#ifndef VEC_CRC32
+#define VEC_CRC32
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len);
+
+static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len)
+{
+	unsigned char *emptybuffer;
+    if (!buffer) {
+        emptybuffer = (unsigned char *)malloc(len);
+        bzero(emptybuffer, len);
+        crc = crc32_vpmsum(crc, emptybuffer, len);
+        free(emptybuffer);
+    } else {
+        crc = crc32_vpmsum(crc, buffer, (unsigned long)len);
+    }
+	return crc;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index d8a7dba72ac..1bc1151b90b 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -364,6 +364,10 @@ if (TARGET ch_contrib::crc32_s390x)
     target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x)
 endif()
 
+if (TARGET ch_contrib::crc32-vpmsum)
+    target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum)
+ endif()
+
 dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables)
 target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables)
 
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index 01758c1b9fb..c7342d061d8 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x)
 #include <arm_acle.h>
 #endif
 
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 #if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
 #include <crc32-s390x.h>
 
@@ -89,6 +93,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
     return __crc32cd(-1U, x);
 #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
     return s390x_crc32(-1U, x)
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return crc32_ppc(-1U, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
 #else
     /// On other platforms we do not have CRC32. NOTE This can be confusing.
     /// NOTE: consider using intHash32()
@@ -103,6 +109,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
     return __crc32cd(static_cast<UInt32>(updated_value), x);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
     return s390x_crc32(updated_value, x);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+    return crc32_ppc(updated_value, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
 #else
     /// On other platforms we do not have CRC32. NOTE This can be confusing.
     return intHash64(x) ^ updated_value;
diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt
index e9810e918b4..45543f57b37 100644
--- a/src/Functions/CMakeLists.txt
+++ b/src/Functions/CMakeLists.txt
@@ -86,6 +86,10 @@ if (TARGET ch_contrib::rapidjson)
     list (APPEND PRIVATE_LIBS ch_contrib::rapidjson)
 endif()
 
+if (TARGET ch_contrib::crc32-vpmsum)
+    list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum)
+endif()
+
 add_subdirectory(GatherUtils)
 list (APPEND PRIVATE_LIBS clickhouse_functions_gatherutils)
 
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index 174acebe979..bf0b7463a5d 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -14,6 +14,10 @@
 
 #include <city.h>
 
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 namespace DB
 {
 
@@ -38,6 +42,8 @@ struct Hash
         return __crc32cd(static_cast<UInt32>(crc), val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -51,6 +57,8 @@ struct Hash
         return __crc32cw(crc, val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32_u32(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -64,6 +72,8 @@ struct Hash
         return __crc32ch(crc, val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32_u16(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -77,6 +87,8 @@ struct Hash
         return __crc32cb(crc, val);
 #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
         return s390x_crc32_u8(crc, val);
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp
index 802aafc2042..87aa0f4b3f7 100644
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@@ -24,6 +24,10 @@
 #    include <arm_acle.h>
 #endif
 
+#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#include "vec_crc32.h"
+#endif
+
 namespace DB
 {
 /** Distance function implementation.
@@ -72,6 +76,8 @@ struct NgramDistanceImpl
         return __crc32cd(code_points[2], combined) & 0xFFFFu;
 #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
         return s390x_crc32(code_points[2], combined) & 0xFFFFu;
+#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+        return crc32_ppc(code_points[2], reinterpret_cast<const unsigned char *>(&combined), sizeof(combined)) & 0xFFFFu;
 #else
         return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu;
 #endif
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
new file mode 100644
index 00000000000..2acad33320b
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
@@ -0,0 +1,148 @@
+18446744073709551615
+1737075136
+1737075136
+4018781633
+4018781633
+1846985414
+1846985414
+1846985414
+1846985414
+(10693559443859979498,10693559443859979498)
+(12279482788274235946,6436413987527322272)
+(12279482788274235946,6436413987527322272)
+(13257488272755813409,6436413987527322272)
+(13257488272755813409,6436413987527322272)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+3023525975
+3040303199
+3023509591
+3023510623
+3040303191
+3040303191
+3023510615
+3023510615
+1999952988
+926211140
+1999699532
+1999683148
+1999952988
+926211140
+1999699532
+1999683148
+(16071125717475221203,9592059329600248798)
+(16071125717475221203,1914899959549098907)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,9592059329600248798)
+(16071125717475221203,1914899959549098907)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,7986182634218042944)
+(10576877560263640956,4278250516018530743)
+(16211512098526494023,11479872370566432466)
+(13515070557027359649,17725505493832406849)
+(12589381623326290380,575343713614534202)
+(10576877560263640956,4278250516018530743)
+(16211512098526494023,11479872370566432466)
+(13515070557027359649,17725505493832406849)
+(12589381623326290380,575343713614534202)
+uniqExact	6
+ngramSimHash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2293265501
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3392173149
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054169
+ngramSimHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
+ngramSimHashUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2284876893
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3459282013
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3694163037
+ngramSimHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
+wordShingleSimHash
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+wordShingleSimHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+wordShingleSimHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+wordShingleSimHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+ngramMinHash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ngramMinHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ngramMinHashUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ngramMinHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+wordShingleMinHash
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+wordShingleMinHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+wordShingleMinHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+wordShingleMinHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.python b/tests/queries/0_stateless/01016_simhash_minhash.python
new file mode 100644
index 00000000000..1d6eae456c1
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.python
@@ -0,0 +1,394 @@
+#!/usr/bin/env python3                                                                                                                                                                                              
+import os
+import socket
+import sys
+from scipy import stats
+import pandas as pd
+import numpy as np
+import shutil
+import platform
+
+import uuid
+
+CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
+CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
+CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
+
+
+CURDIR = os.path.dirname(os.path.realpath(__file__))
+sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
+
+from pure_http_client import ClickHouseClient
+
+if platform.machine() == "ppc64le":
+    shutil.copyfile(CURDIR + "/01016_simhash_minhash.ppc64le.reference", CURDIR + "/01016_simhash_minhash.reference")
+elif platform.machine() == "x86_64" :
+    shutil.copyfile(CURDIR + "/01016_simhash_minhash.x86_64.reference", CURDIR + "/01016_simhash_minhash.reference")
+
+def writeVarUInt(x, ba):
+    for _ in range(0, 9):
+
+        byte = x & 0x7F
+        if x > 0x7F:
+            byte |= 0x80
+
+        ba.append(byte)
+
+        x >>= 7
+        if x == 0:
+            return
+
+def writeStringBinary(s, ba):
+    b = bytes(s, 'utf-8')
+    writeVarUInt(len(s), ba)
+    ba.extend(b)
+
+def readStrict(s, size = 1):
+    res = bytearray()
+    while size:
+        cur = s.recv(size)
+        # if not res:
+        #     raise "Socket is closed"
+        size -= len(cur)
+        res.extend(cur)
+
+    return res
+
+def readUInt(s, size=1):
+    res = readStrict(s, size)
+    val = 0
+    for i in range(len(res)):
+        val += res[i] << (i * 8)
+    return val
+
+def readUInt8(s):
+    return readUInt(s)
+
+def readUInt16(s):
+    return readUInt(s, 2)
+
+def readUInt32(s):
+    return readUInt(s, 4)
+
+def readUInt64(s):
+    return readUInt(s, 8)
+
+def readVarUInt(s):
+    x = 0
+    for i in range(9):
+        byte = readStrict(s)[0]
+        x |= (byte & 0x7F) << (7 * i)
+
+        if not byte & 0x80:
+            return x
+
+    return x
+
+def readStringBinary(s):
+    size = readVarUInt(s)
+    s = readStrict(s, size)
+    return s.decode('utf-8')
+
+def sendHello(s):
+    ba = bytearray()
+    writeVarUInt(0, ba) # Hello
+    writeStringBinary('simple native protocol', ba)
+    writeVarUInt(21, ba)
+    writeVarUInt(9, ba)
+    writeVarUInt(54449, ba)
+    writeStringBinary('default', ba) # database
+    writeStringBinary('default', ba) # user
+    writeStringBinary('', ba) # pwd
+    s.sendall(ba)
+
+
+def receiveHello(s):
+    p_type = readVarUInt(s)
+    assert (p_type == 0) # Hello
+    server_name = readStringBinary(s)
+    # print("Server name: ", server_name)
+    server_version_major = readVarUInt(s)
+    # print("Major: ", server_version_major)
+    server_version_minor = readVarUInt(s)
+    # print("Minor: ", server_version_minor)
+    server_revision = readVarUInt(s)
+    # print("Revision: ", server_revision)
+    server_timezone = readStringBinary(s)
+    # print("Timezone: ", server_timezone)
+    server_display_name = readStringBinary(s)
+    # print("Display name: ", server_display_name)
+    server_version_patch = readVarUInt(s)
+    # print("Version patch: ", server_version_patch)
+
+def serializeClientInfo(ba, query_id):
+    writeStringBinary('default', ba) # initial_user
+    writeStringBinary(query_id, ba) # initial_query_id
+    writeStringBinary('127.0.0.1:9000', ba) # initial_address
+    ba.extend([0] * 8) # initial_query_start_time_microseconds
+    ba.append(1) # TCP
+    writeStringBinary('os_user', ba) # os_user
+    writeStringBinary('client_hostname', ba) # client_hostname
+    writeStringBinary('client_name', ba) # client_name
+    writeVarUInt(21, ba)
+    writeVarUInt(9, ba)
+    writeVarUInt(54449, ba)
+    writeStringBinary('', ba) # quota_key
+    writeVarUInt(0, ba) # distributed_depth
+    writeVarUInt(1, ba) # client_version_patch
+    ba.append(0) # No telemetry
+
+def sendQuery(s, query):
+    ba = bytearray()
+    query_id = uuid.uuid4().hex
+    writeVarUInt(1, ba) # query
+    writeStringBinary(query_id, ba)
+
+    ba.append(1) # INITIAL_QUERY
+
+    # client info
+    serializeClientInfo(ba, query_id)
+
+    writeStringBinary('', ba) # No settings
+    writeStringBinary('', ba) # No interserver secret
+    writeVarUInt(2, ba) # Stage - Complete
+    ba.append(0) # No compression
+    writeStringBinary(query, ba) # query, finally
+    s.sendall(ba)
+
+def serializeBlockInfo(ba):
+    writeVarUInt(1, ba) # 1
+    ba.append(0) # is_overflows
+    writeVarUInt(2, ba) # 2
+    writeVarUInt(0, ba) # 0
+    ba.extend([0] * 4) # bucket_num
+
+def sendEmptyBlock(s):
+    ba = bytearray()
+    writeVarUInt(2, ba) # Data
+    writeStringBinary('', ba)
+    serializeBlockInfo(ba)
+    writeVarUInt(0, ba) # rows
+    writeVarUInt(0, ba) # columns
+    s.sendall(ba)
+
+def assertPacket(packet, expected):
+    assert(packet == expected), packet
+
+def readException(s):
+    code = readUInt32(s)
+    name = readStringBinary(s)
+    text = readStringBinary(s)
+    readStringBinary(s) # trace
+    assertPacket(readUInt8(s), 0) # has_nested
+    sys.stdout.write("code {}: {}".format(code, text.replace('DB::Exception:', '')))
+
+
+def test():
+    client = ClickHouseClient()
+
+    res = client.query("SELECT ngramSimHash('')")
+    sys.stdout.write(res)
+    res=client.query("SELECT ngramSimHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT ngramMinHash('')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHash('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitive('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.')")
+    sys.stdout.write(res)
+
+    client.query("DROP TABLE IF EXISTS defaults")
+    client.query("CREATE TABLE defaults(s String) ENGINE = Memory()")
+    client.query("INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.')")
+
+    res = client.query("SELECT ngramSimHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT ngramMinHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHash(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+    res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults")
+    sys.stdout.write(res)
+
+    client.query("TRUNCATE TABLE defaults")
+    client.query("INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', 'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'))")
+
+    res = client.query("SELECT 'uniqExact', uniqExact(s) FROM defaults")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT 'ngramSimHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramSimHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramSimHashUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramSimHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHashUTF8'")
+    sys.stdout.write(res)
+    res =  client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleSimHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+
+    res = client.query("SELECT 'ngramMinHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramMinHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramMinHashUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'ngramMinHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHash'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHashCaseInsensitive'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHashUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+    res = client.query("SELECT 'wordShingleMinHashCaseInsensitiveUTF8'")
+    sys.stdout.write(res)
+    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
+    sys.stdout.write(res)
+
+    wordShingleSimHashInvalidArg1()
+
+    wordShingleSimHashInvalidArg2()
+
+    wordShingleSimHashInvalidArg3()
+    #client.query("DROP TABLE defaults")
+
+def wordShingleSimHashInvalidArg1():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(30)
+        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
+        sendHello(s)
+        receiveHello(s)
+        sendQuery(s, "SELECT wordShingleSimHash('foobar', 9223372036854775807)")
+
+        # Fin block
+        sendEmptyBlock(s)
+
+
+        assertPacket(readVarUInt(s), 2)
+        print(readException(s))
+        s.close()
+
+
+def wordShingleSimHashInvalidArg2():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(30)
+        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
+        sendHello(s)
+        receiveHello(s)
+        sendQuery(s, "SELECT wordShingleSimHash('foobar', 1001)")
+
+        # Fin block
+        sendEmptyBlock(s)
+
+        assertPacket(readVarUInt(s), 2)
+        print(readException(s))
+        s.close()
+
+
+def wordShingleSimHashInvalidArg3():
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        s.settimeout(30)
+        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
+        sendHello(s)
+        receiveHello(s)
+        sendQuery(s, "SELECT wordShingleSimHash('foobar', 0)")
+
+        # Fin block
+        sendEmptyBlock(s)
+
+        assertPacket(readVarUInt(s), 2)
+        print(readException(s))
+        s.close()
+
+if __name__ == "__main__":
+    test()
+    #wordShingleSimHashInvalidArg1()
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sh b/tests/queries/0_stateless/01016_simhash_minhash.sh
new file mode 100755
index 00000000000..94bac7efacb
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+# shellcheck source=../shell_config.sh
+. "$CURDIR"/../shell_config.sh
+
+python3 "$CURDIR"/01016_simhash_minhash.python
+
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql
deleted file mode 100644
index 1e77b487851..00000000000
--- a/tests/queries/0_stateless/01016_simhash_minhash.sql
+++ /dev/null
@@ -1,115 +0,0 @@
-SELECT ngramSimHash('');
-SELECT ngramSimHash('what a cute cat.');
-SELECT ngramSimHashCaseInsensitive('what a cute cat.');
-SELECT ngramSimHashUTF8('what a cute cat.');
-SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.');
-SELECT wordShingleSimHash('what a cute cat.');
-SELECT wordShingleSimHashCaseInsensitive('what a cute cat.');
-SELECT wordShingleSimHashUTF8('what a cute cat.');
-SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.');
-
-SELECT ngramMinHash('');
-SELECT ngramMinHash('what a cute cat.');
-SELECT ngramMinHashCaseInsensitive('what a cute cat.');
-SELECT ngramMinHashUTF8('what a cute cat.');
-SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.');
-SELECT wordShingleMinHash('what a cute cat.');
-SELECT wordShingleMinHashCaseInsensitive('what a cute cat.');
-SELECT wordShingleMinHashUTF8('what a cute cat.');
-SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.');
-
-DROP TABLE IF EXISTS defaults;
-CREATE TABLE defaults
-(
-   s String
-)ENGINE = Memory();
-
-INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.');
-
-SELECT ngramSimHash(s) FROM defaults;
-SELECT ngramSimHashCaseInsensitive(s) FROM defaults;
-SELECT ngramSimHashUTF8(s) FROM defaults;
-SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults;
-SELECT wordShingleSimHash(s) FROM defaults;
-SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults;
-SELECT wordShingleSimHashUTF8(s) FROM defaults;
-SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults;
-
-SELECT ngramMinHash(s) FROM defaults;
-SELECT ngramMinHashCaseInsensitive(s) FROM defaults;
-SELECT ngramMinHashUTF8(s) FROM defaults;
-SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults;
-SELECT wordShingleMinHash(s) FROM defaults;
-SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults;
-SELECT wordShingleMinHashUTF8(s) FROM defaults;
-SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults;
-
-TRUNCATE TABLE defaults;
-INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n',
-'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.
-ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.
-ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.
-ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.
-ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
-ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
-ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
-
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
-ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
-ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'
-));
-
-SELECT 'uniqExact', uniqExact(s) FROM defaults;
-
-
-SELECT 'ngramSimHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramSimHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramSimHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramSimHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleSimHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
-
-SELECT 'ngramMinHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramMinHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramMinHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'ngramMinHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHash';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHashCaseInsensitive';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHashUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-SELECT 'wordShingleMinHashCaseInsensitiveUTF8';
-SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
-
-SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 }
-SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 }
-SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 }
-
-DROP TABLE defaults;
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
similarity index 100%
rename from tests/queries/0_stateless/01016_simhash_minhash.reference
rename to tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference

From fe1df7aabe81cf64722a4087d8f3b6c598c7eeca Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Tue, 10 Jan 2023 22:00:53 -0800
Subject: [PATCH 166/262] Updated the reference files

---
 .../01016_simhash_minhash.reference           | 152 ++++++++++++++++++
 .../01016_simhash_minhash.x86_64.reference    |  91 ++++++-----
 2 files changed, 203 insertions(+), 40 deletions(-)
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.reference

diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference
new file mode 100644
index 00000000000..9d3ff35efb0
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.reference
@@ -0,0 +1,152 @@
+18446744073709551615
+130877626
+130877626
+2414681787
+2414681787
+3795742796
+3795742796
+3795742796
+3795742796
+(10693559443859979498,10693559443859979498)
+(12862934800683464900,12912608544812513109)
+(12862934800683464900,12912608544812513109)
+(5701637312405877447,12912608544812513109)
+(5701637312405877447,12912608544812513109)
+(17357047205102710216,17357047205102710216)
+(17357047205102710216,17357047205102710216)
+(17357047205102710216,17357047205102710216)
+(17357047205102710216,17357047205102710216)
+3562273581
+3579050789
+3562257197
+3562258213
+3579050797
+3579050757
+3562258221
+3562258181
+3004171816
+2584740395
+437257770
+2651981610
+3004171816
+2584740395
+437257770
+2651981610
+(17614245890954671019,12771214424940442770)
+(17614245890954671019,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(17614245890954671019,12771214424940442770)
+(17614245890954671019,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(14260447771268573594,5578182242585518316)
+(14260447771268573594,16377939020851853906)
+(4363920713808688881,5013693163726625177)
+(14260447771268573594,3863279269132177973)
+(14260447771268573594,5578182242585518316)
+(14260447771268573594,16377939020851853906)
+(4363920713808688881,5013693163726625177)
+(14260447771268573594,3863279269132177973)
+uniqExact	6
+ngramSimHash
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2857686823
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567843
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
+ngramSimHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891240999
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3908359975
+ngramSimHashUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2924795687
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3159676711
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3897874215
+ngramSimHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891241255
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
+wordShingleSimHash
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
+wordShingleSimHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
+wordShingleSimHashUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
+wordShingleSimHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
+ngramMinHash
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
+ngramMinHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
+ngramMinHashUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
+ngramMinHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
+wordShingleMinHash
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
+wordShingleMinHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
+wordShingleMinHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
+wordShingleMinHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
index d4fdcfea6a5..9d3ff35efb0 100644
--- a/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
+++ b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
@@ -72,33 +72,33 @@ ClickHouse makes full use of all available hardware to process each request as q
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
 wordShingleSimHash
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215014
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215270
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	425963587
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	563598566
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	857724390
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	991679910
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
 wordShingleSimHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	420713958
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	421737795
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	429118950
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	959182215
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	964941252
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	965465540
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
 wordShingleSimHashUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215014
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215270
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	425963587
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	563598566
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	857724390
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	991679910
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
 wordShingleSimHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	420713958
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	421737795
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	429118950
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	959182215
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	964941252
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	965465540
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
 ngramMinHash
 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
@@ -120,22 +120,33 @@ ClickHouse makes full use of all available hardware to process each request as q
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
 wordShingleMinHash
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,12338022931991160906)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(18148981179837829400,6048943706095721476)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(18148981179837829400,14581416672396321264)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
 wordShingleMinHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,3381836163833256482)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(15504011608613565061,6048943706095721476)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(15504011608613565061,14581416672396321264)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
 wordShingleMinHashUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,12338022931991160906)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(18148981179837829400,6048943706095721476)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(18148981179837829400,14581416672396321264)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
 wordShingleMinHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,3381836163833256482)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(15504011608613565061,6048943706095721476)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(15504011608613565061,14581416672396321264)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
+code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None

From af0c3d751fb602a807dc51716c30ff9fbb0d1999 Mon Sep 17 00:00:00 2001
From: taiyang-li <654010905@qq.com>
Date: Wed, 11 Jan 2023 14:32:28 +0800
Subject: [PATCH 167/262] fix uts

---
 .../0_stateless/00921_datetime64_compatibility_long.reference   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
index 8d28a69ff3d..8a168ed0e9e 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
@@ -28,7 +28,7 @@ SELECT toDayOfMonth(N, \'Asia/Istanbul\')
 "UInt8",16
 "UInt8",16
 ------------------------------------------
-SELECT toDayOfWeek(N, \'Asia/Istanbul\')
+SELECT toDayOfWeek(N, 0, \'Asia/Istanbul\')
 "UInt8",1
 "UInt8",1
 "UInt8",1

From f871949d8513e712234ee3358ccb176fb99432ae Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Tue, 10 Jan 2023 12:02:33 +0100
Subject: [PATCH 168/262] Try to fix flaky
 test_create_user_and_login/test.py::test_login_as_dropped_user_xml

---
 tests/integration/test_create_user_and_login/test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py
index 1b59089fa11..372fd549b3f 100644
--- a/tests/integration/test_create_user_and_login/test.py
+++ b/tests/integration/test_create_user_and_login/test.py
@@ -80,7 +80,7 @@ EOF""",
             ["bash", "-c", "rm /etc/clickhouse-server/users.d/user_c.xml"]
         )
 
-        expected_errors = ["no user with such name", "not found in user directories"]
+        expected_errors = ["no user with such name", "not found in user directories", "User has been dropped"]
         while True:
             out, err = instance.query_and_get_answer_with_error("SELECT 1", user="C")
             found_error = [

From 6d6e803cfbfe61fb33b382bdd9f6e006199d6f63 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Tue, 10 Jan 2023 11:09:34 +0000
Subject: [PATCH 169/262] Automatic style fix

---
 tests/integration/test_create_user_and_login/test.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py
index 372fd549b3f..b60ec65cb7b 100644
--- a/tests/integration/test_create_user_and_login/test.py
+++ b/tests/integration/test_create_user_and_login/test.py
@@ -80,7 +80,11 @@ EOF""",
             ["bash", "-c", "rm /etc/clickhouse-server/users.d/user_c.xml"]
         )
 
-        expected_errors = ["no user with such name", "not found in user directories", "User has been dropped"]
+        expected_errors = [
+            "no user with such name",
+            "not found in user directories",
+            "User has been dropped",
+        ]
         while True:
             out, err = instance.query_and_get_answer_with_error("SELECT 1", user="C")
             found_error = [

From 82271d6c4bff3b6bcdf09638227a012213d1cbba Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Dec 2022 16:05:06 +0100
Subject: [PATCH 170/262] Analyzer SumIfToCountIfPass crash fix

---
 src/Analyzer/Passes/SumIfToCountIfPass.cpp                  | 6 +++---
 .../02497_analyzer_sum_if_count_if_pass_crash_fix.reference | 1 +
 .../02497_analyzer_sum_if_count_if_pass_crash_fix.sql       | 4 ++++
 3 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference
 create mode 100644 tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql

diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index 879eb4d4a8d..27717fccd78 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -77,7 +77,7 @@ public:
         if (!nested_function || nested_function->getFunctionName() != "if")
             return;
 
-        auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
+        const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes();
         if (nested_if_function_arguments_nodes.size() != 3)
             return;
 
@@ -101,7 +101,7 @@ public:
         /// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`.
         if (if_true_condition_value == 1 && if_false_condition_value == 0)
         {
-            function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]);
+            function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0];
             function_node_arguments_nodes.resize(1);
 
             resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType());
@@ -120,7 +120,7 @@ public:
             auto not_function = std::make_shared<FunctionNode>("not");
 
             auto & not_function_arguments = not_function->getArguments().getNodes();
-            not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0]));
+            not_function_arguments.push_back(nested_if_function_arguments_nodes[0]);
 
             not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns()));
 
diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference
new file mode 100644
index 00000000000..cf534567c6f
--- /dev/null
+++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference
@@ -0,0 +1 @@
+50	50	50	1	0
diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql
new file mode 100644
index 00000000000..51522565014
--- /dev/null
+++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql
@@ -0,0 +1,4 @@
+SET allow_experimental_analyzer = 1;
+SET optimize_rewrite_sum_if_to_count_if = 1;
+
+SELECT sum(if((number % 2) = 0 AS cond_expr, 1 AS one_expr, 0 AS zero_expr) AS if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100);

From eac7a07f3f8c0076a119c27ef2301326024e964a Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 7 Dec 2022 16:15:51 +0100
Subject: [PATCH 171/262] Analyzer AggregateFunctionsArithmeticOperationsPass
 fix

---
 ...egateFunctionsArithmericOperationsPass.cpp | 26 +++++++++++--------
 ...s_arithmetic_operations_pass_fix.reference |  1 +
 ...nctions_arithmetic_operations_pass_fix.sql | 14 ++++++++++
 3 files changed, 30 insertions(+), 11 deletions(-)
 create mode 100644 tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference
 create mode 100644 tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql

diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index 01072e0b3fc..f1566f9639b 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -73,7 +73,7 @@ public:
         if (!inner_function_node)
             return;
 
-        auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
+        const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes();
         if (inner_function_arguments_nodes.size() != 2)
             return;
 
@@ -121,11 +121,13 @@ public:
             }
             resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
 
-            auto inner_function = aggregate_function_arguments_nodes[0];
-            auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]);
-            aggregate_function_arguments_nodes = {inner_function_right_argument};
-            inner_function_arguments_nodes[1] = node;
-            node = std::move(inner_function);
+            auto inner_function_clone = inner_function_node->clone();
+            auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
+            auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+            auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1];
+            aggregate_function_arguments_nodes = {inner_function_clone_right_argument};
+            inner_function_clone_arguments_nodes[1] = node;
+            node = std::move(inner_function_clone);
         }
         else if (right_argument_constant_node)
         {
@@ -138,11 +140,13 @@ public:
             }
             resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
 
-            auto inner_function = aggregate_function_arguments_nodes[0];
-            auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]);
-            aggregate_function_arguments_nodes = {inner_function_left_argument};
-            inner_function_arguments_nodes[0] = node;
-            node = std::move(inner_function);
+            auto inner_function_clone = inner_function_node->clone();
+            auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
+            auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
+            auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
+            aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
+            inner_function_clone_arguments_nodes[0] = node;
+            node = std::move(inner_function_clone);
         }
     }
 
diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference
new file mode 100644
index 00000000000..4f9430ef608
--- /dev/null
+++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference
@@ -0,0 +1 @@
+4	2
diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql
new file mode 100644
index 00000000000..e3e508e17be
--- /dev/null
+++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql
@@ -0,0 +1,14 @@
+SET allow_experimental_analyzer = 1;
+SET optimize_arithmetic_operations_in_aggregate_functions = 1;
+
+DROP TABLE IF EXISTS test_table;
+CREATE TABLE test_table
+(
+    id UInt64,
+    value UInt64
+) ENGINE=MergeTree ORDER BY id;
+
+INSERT INTO test_table VALUES (1, 1);
+INSERT INTO test_table VALUES (1, 1);
+
+SELECT sum((2 * id) as func), func FROM test_table GROUP BY id;

From 1420c4b85278737afed41ee8d1cdb50a0b68755e Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 10:44:59 +0100
Subject: [PATCH 172/262] Use logging instead of printing

---
 tests/ci/get_previous_release_tag.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py
index 6551ba80ecd..aa84169611c 100755
--- a/tests/ci/get_previous_release_tag.py
+++ b/tests/ci/get_previous_release_tag.py
@@ -9,6 +9,8 @@ CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
 CLICKHOUSE_PACKAGE_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
 VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
 
+logger = logging.getLogger(__name__)
+
 
 class Version:
     def __init__(self, version):
@@ -61,10 +63,10 @@ def find_previous_release(server_version, releases):
             ):
                 return True, release
             else:
-                print(
-                    "The tag {version}-{type} exists but the package is not yet available on GitHub".format(
-                        version=release.version, type=release.type
-                    )
+                logger.debug(
+                    "The tag %s-%s exists but the package is not yet available on GitHub",
+                    release.version,
+                    release.type,
                 )
 
     return False, None

From 1dc9fe6f5ae97e8d6be2ca2fab6aebba299143e6 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 10:45:51 +0100
Subject: [PATCH 173/262] Fix the timeout, remove wrong parameters

---
 tests/ci/get_previous_release_tag.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py
index aa84169611c..579035bd943 100755
--- a/tests/ci/get_previous_release_tag.py
+++ b/tests/ci/get_previous_release_tag.py
@@ -54,10 +54,7 @@ def find_previous_release(server_version, releases):
                     CLICKHOUSE_PACKAGE_URL.format(
                         version=release.version, type=release.type
                     ),
-                    total=10,
-                    read=10,
-                    connect=10,
-                    backoff_factor=0.3,
+                    timeout=10,
                 ).status_code
                 != 404
             ):

From f1947b94def69a99960a13becf10a269f0798327 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 11 Jan 2023 11:10:06 +0100
Subject: [PATCH 174/262] Fixed tests

---
 .../Passes/AggregateFunctionsArithmericOperationsPass.cpp   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index f1566f9639b..33ecf549363 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -119,13 +119,13 @@ public:
             {
                 lower_function_name = function_name_if_constant_is_negative;
             }
-            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name);
 
             auto inner_function_clone = inner_function_node->clone();
             auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
             auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
             auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1];
             aggregate_function_arguments_nodes = {inner_function_clone_right_argument};
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name);
             inner_function_clone_arguments_nodes[1] = node;
             node = std::move(inner_function_clone);
         }
@@ -138,20 +138,20 @@ public:
             {
                 lower_function_name = function_name_if_constant_is_negative;
             }
-            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative);
 
             auto inner_function_clone = inner_function_node->clone();
             auto & inner_function_clone_arguments = inner_function_clone->as<FunctionNode &>().getArguments();
             auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
             auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
             aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, function_name_if_constant_is_negative);
             inner_function_clone_arguments_nodes[0] = node;
             node = std::move(inner_function_clone);
         }
     }
 
 private:
-    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name)
+    static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name)
     {
         auto function_aggregate_function = function_node.getAggregateFunction();
 

From 77f0724629c16a2bf425ed7e82b5e2c432335ed3 Mon Sep 17 00:00:00 2001
From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com>
Date: Wed, 11 Jan 2023 10:28:30 +0000
Subject: [PATCH 175/262] Fix flaky test_tcp_handler_interserver_listen_host

---
 .../test_tcp_handler_interserver_listen_host/test_case.py   | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py
index 44df1c369cf..e792d0867f6 100644
--- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py
+++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py
@@ -29,6 +29,12 @@ node_without_interserver_listen_host = cluster.add_instance(
 def start_cluster():
     try:
         cluster.start()
+        cluster.wait_for_url(
+            f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}"
+        )
+        cluster.wait_for_url(
+            f"http://{node_without_interserver_listen_host.ip_address}:8123"
+        )
         yield cluster
 
     finally:

From a881a61e748fe7f728817d26a88bbe146520125c Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 23 Dec 2022 11:30:22 +0000
Subject: [PATCH 176/262] Set pipeline type in join step description

---
 src/Planner/PlannerJoinTree.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 3584c9d4caa..a0e8b9c5f7a 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -586,6 +586,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
         size_t max_block_size = query_context->getSettingsRef().max_block_size;
         size_t max_streams = query_context->getSettingsRef().max_threads;
 
+        JoinPipelineType join_pipeline_type = join_algorithm->pipelineType();
         auto join_step = std::make_unique<JoinStep>(
             left_plan.getCurrentDataStream(),
             right_plan.getCurrentDataStream(),
@@ -594,7 +595,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
             max_streams,
             false /*optimize_read_in_order*/);
 
-        join_step->setStepDescription(fmt::format("JOIN {}", JoinPipelineType::FillRightFirst));
+        join_step->setStepDescription(fmt::format("JOIN {}", join_pipeline_type));
 
         std::vector<QueryPlanPtr> plans;
         plans.emplace_back(std::make_unique<QueryPlan>(std::move(left_plan)));

From 0b86deb58527c347482892d91ead2a7041c60c92 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 23 Dec 2022 19:33:46 +0000
Subject: [PATCH 177/262] [wip] drop unused columns after join on/using

---
 src/Planner/Planner.cpp         |  5 ++++-
 src/Planner/PlannerJoinTree.cpp | 21 ++++++++++++++++++---
 src/Planner/PlannerJoinTree.h   |  3 +++
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index a0e8c4687c6..fc8dafd1b49 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -64,6 +64,7 @@
 #include <Planner/CollectTableExpressionData.h>
 #include <Planner/PlannerJoinTree.h>
 #include <Planner/PlannerExpressionAnalysis.h>
+#include <Planner/CollectUsedIndetifiers.h>
 
 namespace DB
 {
@@ -374,7 +375,9 @@ void Planner::buildQueryPlanIfNeeded()
 
     collectSets(query_tree, *planner_context);
 
-    query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context);
+    auto top_level_identifiers = collectUsedIdentifiers(query_tree, planner_context);
+
+    query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, top_level_identifiers, planner_context);
     auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context);
 
     if (expression_analysis_result.hasWhere())
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index a0e8b9c5f7a..5f1b27bb1cc 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -33,6 +33,7 @@
 #include <Interpreters/HashJoin.h>
 #include <Interpreters/ArrayJoinAction.h>
 
+#include <Planner/CollectUsedIndetifiers.h>
 #include <Planner/Planner.h>
 #include <Planner/PlannerJoins.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -207,19 +208,25 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
 QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
     SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
+    const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
     auto & join_node = join_tree_node->as<JoinNode &>();
 
+    ColumnIdentifierSet current_scope_columns = outer_scope_columns;
+    collectUsedIdentifiers(join_tree_node, planner_context, current_scope_columns);
+
     auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(),
         select_query_info,
         select_query_options,
+        current_scope_columns,
         planner_context);
     auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
 
     auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(),
         select_query_info,
         select_query_options,
+        current_scope_columns,
         planner_context);
     auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
 
@@ -610,8 +617,13 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
 
     for (auto & output : drop_unused_columns_after_join_actions_dag->getOutputs())
     {
-        if (updated_outputs_names.contains(output->result_name) || !planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output->result_name))
+        const auto & global_planner_context = planner_context->getGlobalPlannerContext();
+        if (updated_outputs_names.contains(output->result_name)
+            || !global_planner_context->hasColumnIdentifier(output->result_name)
+            || !outer_scope_columns.contains(output->result_name))
+        {
             continue;
+        }
 
         updated_outputs.push_back(output);
         updated_outputs_names.insert(output->result_name);
@@ -629,6 +641,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
 QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression,
     SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
+    const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
     auto & array_join_node = table_expression->as<ArrayJoinNode &>();
@@ -636,6 +649,7 @@ QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression,
     auto plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(),
         select_query_info,
         select_query_options,
+        outer_scope_columns,
         planner_context);
     auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
 
@@ -675,6 +689,7 @@ QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression,
 QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
     SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
+    const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
     auto join_tree_node_type = join_tree_node->getNodeType();
@@ -693,11 +708,11 @@ QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
         }
         case QueryTreeNodeType::JOIN:
         {
-            return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, planner_context);
+            return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, outer_scope_columns, planner_context);
         }
         case QueryTreeNodeType::ARRAY_JOIN:
         {
-            return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, planner_context);
+            return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, outer_scope_columns, planner_context);
         }
         default:
         {
diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h
index c93b71e0df1..9d83bf62fc1 100644
--- a/src/Planner/PlannerJoinTree.h
+++ b/src/Planner/PlannerJoinTree.h
@@ -11,10 +11,13 @@
 namespace DB
 {
 
+using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
+
 /// Build query plan for query JOIN TREE node
 QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
     SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
+    const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context);
 
 }

From f3702e9279b82409af62be9df20ab8f62e638b73 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 23 Dec 2022 19:34:56 +0000
Subject: [PATCH 178/262] add missing src/Planner/CollectUsedIndetifiers

---
 src/Planner/CollectUsedIndetifiers.cpp | 66 ++++++++++++++++++++++++++
 src/Planner/CollectUsedIndetifiers.h   | 17 +++++++
 2 files changed, 83 insertions(+)
 create mode 100644 src/Planner/CollectUsedIndetifiers.cpp
 create mode 100644 src/Planner/CollectUsedIndetifiers.h

diff --git a/src/Planner/CollectUsedIndetifiers.cpp b/src/Planner/CollectUsedIndetifiers.cpp
new file mode 100644
index 00000000000..f475bc586f3
--- /dev/null
+++ b/src/Planner/CollectUsedIndetifiers.cpp
@@ -0,0 +1,66 @@
+#include <Planner/CollectUsedIndetifiers.h>
+
+#include <Analyzer/InDepthQueryTreeVisitor.h>
+#include <Analyzer/ColumnNode.h>
+
+#include <Planner/PlannerContext.h>
+
+namespace DB
+{
+
+namespace
+{
+
+class CollectUsedIdentifiersVisitor : public InDepthQueryTreeVisitor<CollectUsedIdentifiersVisitor, true>
+{
+public:
+
+    explicit CollectUsedIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_)
+        : used_identifiers(used_identifiers_)
+        , planner_context(planner_context_)
+    {}
+
+    bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]])
+    {
+        const auto & node_type = child->getNodeType();
+        return node_type !=  QueryTreeNodeType::TABLE
+            && node_type !=  QueryTreeNodeType::TABLE_FUNCTION
+            && node_type !=  QueryTreeNodeType::QUERY
+            && node_type !=  QueryTreeNodeType::UNION
+            && node_type !=  QueryTreeNodeType::JOIN
+            && node_type !=  QueryTreeNodeType::ARRAY_JOIN;
+    }
+
+    void visitImpl(const QueryTreeNodePtr & node)
+    {
+        if (node->getNodeType() != QueryTreeNodeType::COLUMN)
+            return;
+
+        const auto * column_ident = planner_context->getColumnNodeIdentifierOrNull(node);
+        if (!column_ident)
+            return;
+
+        used_identifiers.insert(*column_ident);
+    }
+
+    ColumnIdentifierSet & used_identifiers;
+    const PlannerContextPtr & planner_context;
+};
+
+}
+
+void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out)
+{
+    CollectUsedIdentifiersVisitor visitor(planner_context, out);
+    visitor.visit(node);
+}
+
+ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context)
+{
+    ColumnIdentifierSet out;
+    collectUsedIdentifiers(node, planner_context, out);
+    return out;
+}
+
+}
+
diff --git a/src/Planner/CollectUsedIndetifiers.h b/src/Planner/CollectUsedIndetifiers.h
new file mode 100644
index 00000000000..06c50d41e59
--- /dev/null
+++ b/src/Planner/CollectUsedIndetifiers.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Analyzer/IQueryTreeNode.h>
+#include <Planner/Planner.h>
+#include <Planner/TableExpressionData.h>
+
+namespace DB
+{
+
+using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
+
+ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context);
+void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out);
+
+
+}
+

From 6c5b4458cb000f0411cdf5ab617edc17b82f88a8 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Tue, 27 Dec 2022 11:08:56 +0000
Subject: [PATCH 179/262] Add test, comments, rename

---
 ...fiers.cpp => CollectColumnIndetifiers.cpp} |  14 +-
 src/Planner/CollectColumnIndetifiers.h        |  22 +++
 src/Planner/CollectUsedIndetifiers.h          |  17 ---
 src/Planner/Planner.cpp                       |   4 +-
 src/Planner/PlannerJoinTree.cpp               |   4 +-
 .../02514_analyzer_drop_join_on.reference     | 141 ++++++++++++++++++
 .../02514_analyzer_drop_join_on.sql           |  46 ++++++
 7 files changed, 220 insertions(+), 28 deletions(-)
 rename src/Planner/{CollectUsedIndetifiers.cpp => CollectColumnIndetifiers.cpp} (64%)
 create mode 100644 src/Planner/CollectColumnIndetifiers.h
 delete mode 100644 src/Planner/CollectUsedIndetifiers.h
 create mode 100644 tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
 create mode 100644 tests/queries/0_stateless/02514_analyzer_drop_join_on.sql

diff --git a/src/Planner/CollectUsedIndetifiers.cpp b/src/Planner/CollectColumnIndetifiers.cpp
similarity index 64%
rename from src/Planner/CollectUsedIndetifiers.cpp
rename to src/Planner/CollectColumnIndetifiers.cpp
index f475bc586f3..13a53067a00 100644
--- a/src/Planner/CollectUsedIndetifiers.cpp
+++ b/src/Planner/CollectColumnIndetifiers.cpp
@@ -1,4 +1,4 @@
-#include <Planner/CollectUsedIndetifiers.h>
+#include <Planner/CollectColumnIndetifiers.h>
 
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/ColumnNode.h>
@@ -11,11 +11,11 @@ namespace DB
 namespace
 {
 
-class CollectUsedIdentifiersVisitor : public InDepthQueryTreeVisitor<CollectUsedIdentifiersVisitor, true>
+class CollectTopLevelColumnIdentifiersVisitor : public InDepthQueryTreeVisitor<CollectTopLevelColumnIdentifiersVisitor, true>
 {
 public:
 
-    explicit CollectUsedIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_)
+    explicit CollectTopLevelColumnIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_)
         : used_identifiers(used_identifiers_)
         , planner_context(planner_context_)
     {}
@@ -49,16 +49,16 @@ public:
 
 }
 
-void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out)
+void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out)
 {
-    CollectUsedIdentifiersVisitor visitor(planner_context, out);
+    CollectTopLevelColumnIdentifiersVisitor visitor(planner_context, out);
     visitor.visit(node);
 }
 
-ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context)
+ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context)
 {
     ColumnIdentifierSet out;
-    collectUsedIdentifiers(node, planner_context, out);
+    collectTopLevelColumnIdentifiers(node, planner_context, out);
     return out;
 }
 
diff --git a/src/Planner/CollectColumnIndetifiers.h b/src/Planner/CollectColumnIndetifiers.h
new file mode 100644
index 00000000000..8c84908ee6b
--- /dev/null
+++ b/src/Planner/CollectColumnIndetifiers.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <Analyzer/IQueryTreeNode.h>
+#include <Planner/Planner.h>
+#include <Planner/TableExpressionData.h>
+
+namespace DB
+{
+
+using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
+
+/// Collect all top level column identifiers from query tree node.
+/// Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree.
+/// For example, in the following query:
+/// SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y
+/// The top level column identifiers are: `t1.b`, `t2.y`
+ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context);
+
+void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out);
+
+}
+
diff --git a/src/Planner/CollectUsedIndetifiers.h b/src/Planner/CollectUsedIndetifiers.h
deleted file mode 100644
index 06c50d41e59..00000000000
--- a/src/Planner/CollectUsedIndetifiers.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#pragma once
-
-#include <Analyzer/IQueryTreeNode.h>
-#include <Planner/Planner.h>
-#include <Planner/TableExpressionData.h>
-
-namespace DB
-{
-
-using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
-
-ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context);
-void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out);
-
-
-}
-
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index fc8dafd1b49..ea14d29bd6f 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -64,7 +64,7 @@
 #include <Planner/CollectTableExpressionData.h>
 #include <Planner/PlannerJoinTree.h>
 #include <Planner/PlannerExpressionAnalysis.h>
-#include <Planner/CollectUsedIndetifiers.h>
+#include <Planner/CollectColumnIndetifiers.h>
 
 namespace DB
 {
@@ -375,7 +375,7 @@ void Planner::buildQueryPlanIfNeeded()
 
     collectSets(query_tree, *planner_context);
 
-    auto top_level_identifiers = collectUsedIdentifiers(query_tree, planner_context);
+    auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
 
     query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, top_level_identifiers, planner_context);
     auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context);
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 5f1b27bb1cc..37c542d0494 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -33,7 +33,7 @@
 #include <Interpreters/HashJoin.h>
 #include <Interpreters/ArrayJoinAction.h>
 
-#include <Planner/CollectUsedIndetifiers.h>
+#include <Planner/CollectColumnIndetifiers.h>
 #include <Planner/Planner.h>
 #include <Planner/PlannerJoins.h>
 #include <Planner/PlannerActionsVisitor.h>
@@ -214,7 +214,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
     auto & join_node = join_tree_node->as<JoinNode &>();
 
     ColumnIdentifierSet current_scope_columns = outer_scope_columns;
-    collectUsedIdentifiers(join_tree_node, planner_context, current_scope_columns);
+    collectTopLevelColumnIdentifiers(join_tree_node, planner_context, current_scope_columns);
 
     auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(),
         select_query_info,
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
new file mode 100644
index 00000000000..100b2fc42bf
--- /dev/null
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -0,0 +1,141 @@
+Expression ((Project names + Projection))
+Header: count() UInt64
+  Aggregating
+  Header: default.a.a2_4 String
+          count() UInt64
+    Expression ((Before GROUP BY + DROP unused columns after JOIN))
+    Header: default.a.a2_4 String
+      Join (JOIN FillRightFirst)
+      Header: default.a.a2_4 String
+              default.c.c1_2 UInt64
+              default.d.d1_3 UInt64
+        Expression ((JOIN actions + DROP unused columns after JOIN))
+        Header: default.a.a2_4 String
+                default.c.c1_2 UInt64
+          Join (JOIN FillRightFirst)
+          Header: default.a.a2_4 String
+                  default.b.b1_0 UInt64
+                  default.c.c1_2 UInt64
+            Expression ((JOIN actions + DROP unused columns after JOIN))
+            Header: default.a.a2_4 String
+                    default.b.b1_0 UInt64
+              Join (JOIN FillRightFirst)
+              Header: default.a.a2_4 String
+                      default.a.a1_1 UInt64
+                      default.b.b1_0 UInt64
+                Expression ((JOIN actions + Change column names to column identifiers))
+                Header: default.a.a2_4 String
+                        default.a.a1_1 UInt64
+                  ReadFromStorage (Memory)
+                  Header: a2 String
+                          a1 UInt64
+                Expression ((JOIN actions + Change column names to column identifiers))
+                Header: default.b.b1_0 UInt64
+                  ReadFromStorage (Memory)
+                  Header: b1 UInt64
+            Expression ((JOIN actions + Change column names to column identifiers))
+            Header: default.c.c1_2 UInt64
+              ReadFromStorage (Memory)
+              Header: c1 UInt64
+        Expression ((JOIN actions + Change column names to column identifiers))
+        Header: default.d.d1_3 UInt64
+          ReadFromStorage (Memory)
+          Header: d1 UInt64
+Expression ((Project names + (Projection + DROP unused columns after JOIN)))
+Header: a2 String
+        d2 String
+  Join (JOIN FillRightFirst)
+  Header: default.a.k_2 UInt64
+          default.a.a2_0 String
+          default.d.d2_1 String
+          default.d.k_5 UInt64
+    Expression (DROP unused columns after JOIN)
+    Header: default.a.k_2 UInt64
+            default.a.a2_0 String
+      Join (JOIN FillRightFirst)
+      Header: default.a.k_2 UInt64
+              default.a.a2_0 String
+              default.c.k_4 UInt64
+        Expression (DROP unused columns after JOIN)
+        Header: default.a.k_2 UInt64
+                default.a.a2_0 String
+          Join (JOIN FillRightFirst)
+          Header: default.a.k_2 UInt64
+                  default.a.a2_0 String
+                  default.b.k_3 UInt64
+            Expression (Change column names to column identifiers)
+            Header: default.a.k_2 UInt64
+                    default.a.a2_0 String
+              ReadFromStorage (Memory)
+              Header: k UInt64
+                      a2 String
+            Expression (Change column names to column identifiers)
+            Header: default.b.k_3 UInt64
+              ReadFromStorage (Memory)
+              Header: k UInt64
+        Expression (Change column names to column identifiers)
+        Header: default.c.k_4 UInt64
+          ReadFromStorage (Memory)
+          Header: k UInt64
+    Expression (Change column names to column identifiers)
+    Header: default.d.k_5 UInt64
+            default.d.d2_1 String
+      ReadFromStorage (Memory)
+      Header: k UInt64
+              d2 String
+Expression (Project names)
+Header: bx String
+  Sorting (Sorting for ORDER BY)
+  Header: default.a.a2_6 String
+          b.bx_0 String
+    Expression ((Before ORDER BY + (Projection + )))
+    Header: default.a.a2_6 String
+            b.bx_0 String
+      Join (JOIN FillRightFirst)
+      Header: default.a.a2_6 String
+              b.bx_0 String
+              default.c.c2_5 String
+              default.c.c1_3 UInt64
+              d.d1_4 UInt64
+        Filter (( + (JOIN actions + DROP unused columns after JOIN)))
+        Header: default.a.a2_6 String
+                b.bx_0 String
+                default.c.c2_5 String
+                default.c.c1_3 UInt64
+          Join (JOIN FillRightFirst)
+          Header: default.a.a2_6 String
+                  b.bx_0 String
+                  b.b1_1 UInt64
+                  default.c.c2_5 String
+                  default.c.c1_3 UInt64
+            Expression ((JOIN actions + DROP unused columns after JOIN))
+            Header: default.a.a2_6 String
+                    b.bx_0 String
+                    b.b1_1 UInt64
+              Join (JOIN FillRightFirst)
+              Header: default.a.a2_6 String
+                      default.a.a1_2 UInt64
+                      b.bx_0 String
+                      b.b1_1 UInt64
+                Expression ((JOIN actions + Change column names to column identifiers))
+                Header: default.a.a2_6 String
+                        default.a.a1_2 UInt64
+                  ReadFromStorage (Memory)
+                  Header: a2 String
+                          a1 UInt64
+                Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
+                Header: b.b1_1 UInt64
+                        b.bx_0 String
+                  ReadFromStorage (Memory)
+                  Header: b2 String
+                          b1 UInt64
+            Expression ((JOIN actions + Change column names to column identifiers))
+            Header: default.c.c2_5 String
+                    default.c.c1_3 UInt64
+              ReadFromStorage (Memory)
+              Header: c2 String
+                      c1 UInt64
+        Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers)))))
+        Header: d.d1_4 UInt64
+          ReadFromStorage (SystemNumbers)
+          Header: number UInt64
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql
new file mode 100644
index 00000000000..576e68c2289
--- /dev/null
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql
@@ -0,0 +1,46 @@
+DROP TABLE IF EXISTS a;
+DROP TABLE IF EXISTS b;
+DROP TABLE IF EXISTS c;
+DROP TABLE IF EXISTS d;
+
+CREATE TABLE a (k UInt64, a1 UInt64, a2 String) ENGINE = Memory;
+INSERT INTO a VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c');
+
+CREATE TABLE b (k UInt64, b1 UInt64, b2 String) ENGINE = Memory;
+INSERT INTO b VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c');
+
+CREATE TABLE c (k UInt64, c1 UInt64, c2 String) ENGINE = Memory;
+INSERT INTO c VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c');
+
+CREATE TABLE d (k UInt64, d1 UInt64, d2 String) ENGINE = Memory;
+INSERT INTO d VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c');
+
+SET allow_experimental_analyzer = 1;
+
+EXPLAIN PLAN header = 1
+SELECT count()
+FROM a
+JOIN b ON b.b1 = a.a1
+JOIN c ON c.c1 = b.b1
+JOIN d ON d.d1 = c.c1
+GROUP BY a.a2
+;
+
+EXPLAIN PLAN header = 1
+SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k)
+;
+
+EXPLAIN PLAN header = 1
+SELECT b.bx
+FROM a
+JOIN (SELECT b1, b2 || 'x'  AS bx FROM b ) AS b ON b.b1 = a.a1
+JOIN c ON c.c1 = b.b1
+JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1
+WHERE c.c2 != ''
+ORDER BY a.a2
+;
+
+DROP TABLE IF EXISTS a;
+DROP TABLE IF EXISTS b;
+DROP TABLE IF EXISTS c;
+DROP TABLE IF EXISTS d;

From eed2a295245b5b8229815a8f606609cbee322a81 Mon Sep 17 00:00:00 2001
From: Vladimir C <vdimir@clickhouse.com>
Date: Thu, 29 Dec 2022 11:21:29 +0100
Subject: [PATCH 180/262] Fix style

---
 src/Planner/CollectColumnIndetifiers.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Planner/CollectColumnIndetifiers.cpp b/src/Planner/CollectColumnIndetifiers.cpp
index 13a53067a00..50e89658f9d 100644
--- a/src/Planner/CollectColumnIndetifiers.cpp
+++ b/src/Planner/CollectColumnIndetifiers.cpp
@@ -20,7 +20,7 @@ public:
         , planner_context(planner_context_)
     {}
 
-    bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]])
+    static bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType & child)
     {
         const auto & node_type = child->getNodeType();
         return node_type !=  QueryTreeNodeType::TABLE

From 4dd628cd86577f7f0640fd5b50d0f9040fad1f88 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 12:05:31 +0100
Subject: [PATCH 181/262] Clean trash from changelog for v22.3.16.1190-lts

---
 docs/changelogs/v22.3.16.1190-lts.md | 159 ---------------------------
 1 file changed, 159 deletions(-)

diff --git a/docs/changelogs/v22.3.16.1190-lts.md b/docs/changelogs/v22.3.16.1190-lts.md
index 1b22d9a88be..a43d34551ca 100644
--- a/docs/changelogs/v22.3.16.1190-lts.md
+++ b/docs/changelogs/v22.3.16.1190-lts.md
@@ -7,186 +7,27 @@ sidebar_label: 2023
 
 ### ClickHouse release v22.3.16.1190-lts (bb4e0934e5a) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189)
 
-#### Backward Incompatible Change
-* JSONExtract family of functions will now attempt to coerce to the request type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)).
-* Backported in [#43484](https://github.com/ClickHouse/ClickHouse/issues/43484): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)).
-
-#### New Feature
-* - Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)).
-* Add Hudi and DeltaLake table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do)).
-* Add 4LW command `csnp` for manually creating snapshots. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)).
-* Add function ascii like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)).
-* Published function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)).
-
-#### Performance Improvement
-* Currently, the only saturable operators are And and Or, and their code paths are affected by this change. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)).
-* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)).
-* Support parallel parsing for LineAsString input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)).
-* Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)).
-
 #### Improvement
-* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)).
 * Backported in [#42527](https://github.com/ClickHouse/ClickHouse/issues/42527): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* ClickHouse Client and ClickHouse Local will show progress by default even in non-interactive mode. If `/dev/tty` is available, the progress will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)).
-* - Add `notLike` to key condition atom map, so condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)).
-* Add support for FixedString input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)).
-* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)).
-* Added ** glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
-* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)).
-* Add a new variable call `limit` in query_info, indicating whether this query is a limit-trivial query. If so, we will adjust the approximate total rows for later estimation. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)).
-* Implement `ATTACH` of `MergeTree` table for `s3_plain` disk (plus some fixes for `s3_plain`). [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)).
-* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)).
-* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)).
-* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)).
-* Added new field allow_readonly in system.table_functions to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
-* Allow to use Date32 arguments for formatDateTime and FROM_UNIXTIME functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)).
-* Backported in [#42839](https://github.com/ClickHouse/ClickHouse/issues/42839): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)).
-* Increase the size of upload part exponentially for backup to S3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)).
 
 #### Bug Fix
 * Backported in [#43829](https://github.com/ClickHouse/ClickHouse/issues/43829): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)).
 
 #### Build/Testing/Packaging Improvement
-* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)).
-* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)).
-* use llvm `l64.lld` in macOS suppress ld warnings, close [#42282](https://github.com/ClickHouse/ClickHouse/issues/42282). [#42470](https://github.com/ClickHouse/ClickHouse/pull/42470) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)).
-* * Improve bugfix validation check: fix bug with skipping the check, port separate status in CI, run after check labels and style check. Close [#40349](https://github.com/ClickHouse/ClickHouse/issues/40349). [#42702](https://github.com/ClickHouse/ClickHouse/pull/42702) ([Vladimir C](https://github.com/vdimir)).
-* Backported in [#43050](https://github.com/ClickHouse/ClickHouse/issues/43050): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Backported in [#42963](https://github.com/ClickHouse/ClickHouse/issues/42963): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Backported in [#43039](https://github.com/ClickHouse/ClickHouse/issues/43039): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Backported in [#44109](https://github.com/ClickHouse/ClickHouse/issues/44109): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Backported in [#44431](https://github.com/ClickHouse/ClickHouse/issues/44431): Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 * Backported in [#44557](https://github.com/ClickHouse/ClickHouse/issues/44557): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
 
 #### Bug Fix (user-visible misbehavior in official stable or prestable release)
 
-* Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)).
-* Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} <Fatal> BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} <Fatal> BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} <Fatal> BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} <Fatal> BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} <Fatal> BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} <Fatal> BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} <Fatal> BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)).
-* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)).
-* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)).
-* Fix create Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)).
-* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)).
-* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)).
-* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)).
-* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)).
-* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)).
-* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)).
-* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)).
-* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)).
-* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)).
-* * Fix incorrect saved_block_sample with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)).
-* A null pointer will be generated when select if as from ‘three table join’ , For example, the SQL:. [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)).
-* Fix memory sanitizer report in ClusterDiscovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)).
-* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)).
-* Fix ATTACH TABLE in PostgreSQL database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)).
-* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)).
 * Backported in [#43512](https://github.com/ClickHouse/ClickHouse/issues/43512): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)).
 * Backported in [#43750](https://github.com/ClickHouse/ClickHouse/issues/43750): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)).
-* Backported in [#43427](https://github.com/ClickHouse/ClickHouse/issues/43427): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)).
 * Backported in [#43616](https://github.com/ClickHouse/ClickHouse/issues/43616): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)).
-* Backported in [#43720](https://github.com/ClickHouse/ClickHouse/issues/43720): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)).
 * Backported in [#43885](https://github.com/ClickHouse/ClickHouse/issues/43885): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)).
 * Backported in [#44179](https://github.com/ClickHouse/ClickHouse/issues/44179): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * Backported in [#44283](https://github.com/ClickHouse/ClickHouse/issues/44283): Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)).
 
-#### Build Improvement
-
-* ... Add support for format ipv6 on s390x. [#42412](https://github.com/ClickHouse/ClickHouse/pull/42412) ([Suzy Wang](https://github.com/SuzyWangIBMer)).
-
 #### NO CL ENTRY
 
-* NO CL ENTRY:  'Revert "Sonar Cloud Workflow"'. [#42725](https://github.com/ClickHouse/ClickHouse/pull/42725) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
 * NO CL ENTRY:  'Fix multipart upload for large S3 object, backport to 22.3'. [#44217](https://github.com/ClickHouse/ClickHouse/pull/44217) ([ianton-ru](https://github.com/ianton-ru)).
 
-#### NOT FOR CHANGELOG / INSIGNIFICANT
-
-* Build with libcxx(abi) 15 [#42513](https://github.com/ClickHouse/ClickHouse/pull/42513) ([Robert Schulze](https://github.com/rschu1ze)).
-* Sonar Cloud Workflow [#42534](https://github.com/ClickHouse/ClickHouse/pull/42534) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Invalid type in where for Merge table (logical error) [#42576](https://github.com/ClickHouse/ClickHouse/pull/42576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)).
-* Fix frequent memory drift message and clarify things in comments [#42582](https://github.com/ClickHouse/ClickHouse/pull/42582) ([Azat Khuzhin](https://github.com/azat)).
-* Try to save `IDataPartStorage` interface [#42618](https://github.com/ClickHouse/ClickHouse/pull/42618) ([Anton Popov](https://github.com/CurtizJ)).
-* Analyzer change setting into allow_experimental_analyzer [#42649](https://github.com/ClickHouse/ClickHouse/pull/42649) ([Maksim Kita](https://github.com/kitaisreal)).
-* Analyzer IQueryTreeNode remove getName method [#42651](https://github.com/ClickHouse/ClickHouse/pull/42651) ([Maksim Kita](https://github.com/kitaisreal)).
-* Minor fix iotest_nonblock build [#42658](https://github.com/ClickHouse/ClickHouse/pull/42658) ([Jordi Villar](https://github.com/jrdi)).
-* Add tests and doc for some url-related functions [#42664](https://github.com/ClickHouse/ClickHouse/pull/42664) ([Vladimir C](https://github.com/vdimir)).
-* Update version_date.tsv and changelogs after v22.10.1.1875-stable [#42676](https://github.com/ClickHouse/ClickHouse/pull/42676) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Fix error handling in clickhouse_helper.py [#42678](https://github.com/ClickHouse/ClickHouse/pull/42678) ([Ilya Yatsishin](https://github.com/qoega)).
-* Fix execution of version_helper.py to use git tweaks [#42679](https://github.com/ClickHouse/ClickHouse/pull/42679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* MergeTree indexes use RPNBuilderTree [#42681](https://github.com/ClickHouse/ClickHouse/pull/42681) ([Maksim Kita](https://github.com/kitaisreal)).
-* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Update version after release [#42699](https://github.com/ClickHouse/ClickHouse/pull/42699) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Update version_date.tsv and changelogs after v22.10.1.1877-stable [#42700](https://github.com/ClickHouse/ClickHouse/pull/42700) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* OrderByLimitByDuplicateEliminationPass improve performance [#42704](https://github.com/ClickHouse/ClickHouse/pull/42704) ([Maksim Kita](https://github.com/kitaisreal)).
-* Analyzer improve subqueries representation [#42705](https://github.com/ClickHouse/ClickHouse/pull/42705) ([Maksim Kita](https://github.com/kitaisreal)).
-* Update version_date.tsv and changelogs after v22.9.4.32-stable [#42712](https://github.com/ClickHouse/ClickHouse/pull/42712) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Update version_date.tsv and changelogs after v22.8.7.34-lts [#42713](https://github.com/ClickHouse/ClickHouse/pull/42713) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Update version_date.tsv and changelogs after v22.7.7.24-stable [#42714](https://github.com/ClickHouse/ClickHouse/pull/42714) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Move SonarCloud Job to nightly [#42718](https://github.com/ClickHouse/ClickHouse/pull/42718) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Update version_date.tsv and changelogs after v22.8.8.3-lts [#42738](https://github.com/ClickHouse/ClickHouse/pull/42738) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Minor fix implicit cast CaresPTRResolver [#42747](https://github.com/ClickHouse/ClickHouse/pull/42747) ([Jordi Villar](https://github.com/jrdi)).
-* Fix build on master [#42752](https://github.com/ClickHouse/ClickHouse/pull/42752) ([Igor Nikonov](https://github.com/devcrafter)).
-* Update version_date.tsv and changelogs after v22.3.14.18-lts [#42759](https://github.com/ClickHouse/ClickHouse/pull/42759) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Fix anchor links [#42760](https://github.com/ClickHouse/ClickHouse/pull/42760) ([Sergei Trifonov](https://github.com/serxa)).
-* Update version_date.tsv and changelogs after v22.3.14.23-lts [#42764](https://github.com/ClickHouse/ClickHouse/pull/42764) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Update README.md [#42783](https://github.com/ClickHouse/ClickHouse/pull/42783) ([Yuko Takagi](https://github.com/yukotakagi)).
-* Slightly better code with projections [#42794](https://github.com/ClickHouse/ClickHouse/pull/42794) ([Anton Popov](https://github.com/CurtizJ)).
-* Fix some races in MergeTree [#42805](https://github.com/ClickHouse/ClickHouse/pull/42805) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Fix typo in comments [#42809](https://github.com/ClickHouse/ClickHouse/pull/42809) ([Gabriel](https://github.com/Gabriel39)).
-* Fix compilation of LLVM with cmake cache [#42816](https://github.com/ClickHouse/ClickHouse/pull/42816) ([Azat Khuzhin](https://github.com/azat)).
-* Fix link in docs [#42821](https://github.com/ClickHouse/ClickHouse/pull/42821) ([Sergei Trifonov](https://github.com/serxa)).
-* Link to proper place in docs [#42822](https://github.com/ClickHouse/ClickHouse/pull/42822) ([Sergei Trifonov](https://github.com/serxa)).
-* Fix argument type check in AggregateFunctionAnalysisOfVariance [#42823](https://github.com/ClickHouse/ClickHouse/pull/42823) ([Vladimir C](https://github.com/vdimir)).
-* Tests/lambda analyzer [#42824](https://github.com/ClickHouse/ClickHouse/pull/42824) ([Denny Crane](https://github.com/den-crane)).
-* Fix Missing Quotes - Sonar Nightly [#42831](https://github.com/ClickHouse/ClickHouse/pull/42831) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Add exclusions from the Snyk scan [#42834](https://github.com/ClickHouse/ClickHouse/pull/42834) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Fix Missing Env Vars - Sonar Nightly [#42843](https://github.com/ClickHouse/ClickHouse/pull/42843) ([Julio Jimenez](https://github.com/juliojimenez)).
-* Fix typo [#42855](https://github.com/ClickHouse/ClickHouse/pull/42855) ([GoGoWen](https://github.com/GoGoWen)).
-* Add timezone to 02458_datediff_date32 [#42857](https://github.com/ClickHouse/ClickHouse/pull/42857) ([Vladimir C](https://github.com/vdimir)).
-* Adjust cancel and rerun workflow names to the actual [#42862](https://github.com/ClickHouse/ClickHouse/pull/42862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Analyzer subquery in JOIN TREE with aggregation [#42865](https://github.com/ClickHouse/ClickHouse/pull/42865) ([Maksim Kita](https://github.com/kitaisreal)).
-* Fix getauxval for sanitizer builds [#42866](https://github.com/ClickHouse/ClickHouse/pull/42866) ([Amos Bird](https://github.com/amosbird)).
-* Update version_date.tsv and changelogs after v22.10.2.11-stable [#42871](https://github.com/ClickHouse/ClickHouse/pull/42871) ([robot-clickhouse](https://github.com/robot-clickhouse)).
-* Validate Query Tree in debug [#42879](https://github.com/ClickHouse/ClickHouse/pull/42879) ([Dmitry Novik](https://github.com/novikd)).
-* changed type name for s3 plain storage [#42890](https://github.com/ClickHouse/ClickHouse/pull/42890) ([Aleksandr](https://github.com/AVMusorin)).
-* Cleanup implementation of regexpReplace(All|One) [#42907](https://github.com/ClickHouse/ClickHouse/pull/42907) ([Robert Schulze](https://github.com/rschu1ze)).
-* Do not show status for Bugfix validate check in non bugfix PRs [#42932](https://github.com/ClickHouse/ClickHouse/pull/42932) ([Vladimir C](https://github.com/vdimir)).
-* fix(typo): Passible -> Possible [#42933](https://github.com/ClickHouse/ClickHouse/pull/42933) ([Yakko Majuri](https://github.com/yakkomajuri)).
-* Pin the cryptography version to not break lambdas [#42934](https://github.com/ClickHouse/ClickHouse/pull/42934) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Fix: bad cast from type DB::ColumnLowCardinality to DB::ColumnString [#42937](https://github.com/ClickHouse/ClickHouse/pull/42937) ([Igor Nikonov](https://github.com/devcrafter)).
-* Attach thread pool for loading parts to the query [#42947](https://github.com/ClickHouse/ClickHouse/pull/42947) ([Azat Khuzhin](https://github.com/azat)).
-* Fix macOS M1 builds due to sprintf deprecation [#42962](https://github.com/ClickHouse/ClickHouse/pull/42962) ([Jordi Villar](https://github.com/jrdi)).
-* Less use of CH-specific bit_cast() [#42968](https://github.com/ClickHouse/ClickHouse/pull/42968) ([Robert Schulze](https://github.com/rschu1ze)).
-* Remove some utils [#42972](https://github.com/ClickHouse/ClickHouse/pull/42972) ([Alexey Milovidov](https://github.com/alexey-milovidov)).
-* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)).
-* Fix old bug to remove `refs/head` from ref name [#42981](https://github.com/ClickHouse/ClickHouse/pull/42981) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Add debug information to nightly builds [#42997](https://github.com/ClickHouse/ClickHouse/pull/42997) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Add `on: workflow_call` to debug CI [#43000](https://github.com/ClickHouse/ClickHouse/pull/43000) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Simple fixes for restart replica description [#43004](https://github.com/ClickHouse/ClickHouse/pull/43004) ([Igor Nikonov](https://github.com/devcrafter)).
-* Cleanup match code [#43006](https://github.com/ClickHouse/ClickHouse/pull/43006) ([Robert Schulze](https://github.com/rschu1ze)).
-* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)).
-* fix bandwidth throttlers initialization order [#43015](https://github.com/ClickHouse/ClickHouse/pull/43015) ([Sergei Trifonov](https://github.com/serxa)).
-* Add test for issue [#42520](https://github.com/ClickHouse/ClickHouse/issues/42520) [#43027](https://github.com/ClickHouse/ClickHouse/pull/43027) ([Robert Schulze](https://github.com/rschu1ze)).
-* Fix msan warning [#43065](https://github.com/ClickHouse/ClickHouse/pull/43065) ([Raúl Marín](https://github.com/Algunenano)).
-* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)).
-* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)).
-* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
-

From b156209ed6b84b874a31621699e6a61159da2ade Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 11 Jan 2023 14:17:44 +0300
Subject: [PATCH 182/262] Update test.py

---
 tests/integration/test_storage_rabbitmq/test.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py
index 63b8d1215aa..43c964d9d93 100644
--- a/tests/integration/test_storage_rabbitmq/test.py
+++ b/tests/integration/test_storage_rabbitmq/test.py
@@ -1,3 +1,10 @@
+import pytest
+
+# FIXME This test is too flaky
+# https://github.com/ClickHouse/ClickHouse/issues/45160
+
+pytestmark = pytest.mark.skip
+
 import json
 import os.path as p
 import random
@@ -9,7 +16,6 @@ from random import randrange
 import math
 
 import pika
-import pytest
 from google.protobuf.internal.encoder import _VarintBytes
 from helpers.client import QueryRuntimeException
 from helpers.cluster import ClickHouseCluster, check_rabbitmq_is_available

From 07a3967d6bc8b69778a9610a2e145514a9e25316 Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <nik-kochetov@yandex-team.ru>
Date: Wed, 11 Jan 2023 11:20:02 +0000
Subject: [PATCH 183/262] Disable test_ttl_move_memory_usage as too flaky.

---
 .../test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py        | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
index a1e10cde031..ebdecb2f16c 100644
--- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
+++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
@@ -22,6 +22,9 @@ def started_single_node_cluster():
 
 
 def test_move_and_s3_memory_usage(started_single_node_cluster):
+
+    pytest.skip("Test is too flaky. Disable it for now.")
+
     if small_node.is_built_with_sanitizer() or small_node.is_debug_build():
         pytest.skip("Disabled for debug and sanitizers. Too slow.")
 

From 1b6e036d46d3f245d75f4db65c6a1f30c4ccf733 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 11 Jan 2023 11:54:28 +0000
Subject: [PATCH 184/262] Review fixes

---
 ...fiers.cpp => CollectColumnIdentifiers.cpp} | 20 +++++++++----------
 ...detifiers.h => CollectColumnIdentifiers.h} | 15 +++++++-------
 src/Planner/Planner.cpp                       |  2 +-
 src/Planner/PlannerJoinTree.cpp               |  2 +-
 src/Planner/PlannerJoinTree.h                 |  2 --
 src/Planner/TableExpressionData.h             |  1 +
 .../02514_analyzer_drop_join_on.reference     | 15 ++++++++++++++
 .../02514_analyzer_drop_join_on.sql           | 17 +++++++---------
 8 files changed, 43 insertions(+), 31 deletions(-)
 rename src/Planner/{CollectColumnIndetifiers.cpp => CollectColumnIdentifiers.cpp} (71%)
 rename src/Planner/{CollectColumnIndetifiers.h => CollectColumnIdentifiers.h} (57%)

diff --git a/src/Planner/CollectColumnIndetifiers.cpp b/src/Planner/CollectColumnIdentifiers.cpp
similarity index 71%
rename from src/Planner/CollectColumnIndetifiers.cpp
rename to src/Planner/CollectColumnIdentifiers.cpp
index 50e89658f9d..f7cdf196ad1 100644
--- a/src/Planner/CollectColumnIndetifiers.cpp
+++ b/src/Planner/CollectColumnIdentifiers.cpp
@@ -1,4 +1,4 @@
-#include <Planner/CollectColumnIndetifiers.h>
+#include <Planner/CollectColumnIdentifiers.h>
 
 #include <Analyzer/InDepthQueryTreeVisitor.h>
 #include <Analyzer/ColumnNode.h>
@@ -23,12 +23,12 @@ public:
     static bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType & child)
     {
         const auto & node_type = child->getNodeType();
-        return node_type !=  QueryTreeNodeType::TABLE
-            && node_type !=  QueryTreeNodeType::TABLE_FUNCTION
-            && node_type !=  QueryTreeNodeType::QUERY
-            && node_type !=  QueryTreeNodeType::UNION
-            && node_type !=  QueryTreeNodeType::JOIN
-            && node_type !=  QueryTreeNodeType::ARRAY_JOIN;
+        return node_type != QueryTreeNodeType::TABLE
+            && node_type != QueryTreeNodeType::TABLE_FUNCTION
+            && node_type != QueryTreeNodeType::QUERY
+            && node_type != QueryTreeNodeType::UNION
+            && node_type != QueryTreeNodeType::JOIN
+            && node_type != QueryTreeNodeType::ARRAY_JOIN;
     }
 
     void visitImpl(const QueryTreeNodePtr & node)
@@ -36,11 +36,11 @@ public:
         if (node->getNodeType() != QueryTreeNodeType::COLUMN)
             return;
 
-        const auto * column_ident = planner_context->getColumnNodeIdentifierOrNull(node);
-        if (!column_ident)
+        const auto * column_identifier = planner_context->getColumnNodeIdentifierOrNull(node);
+        if (!column_identifier)
             return;
 
-        used_identifiers.insert(*column_ident);
+        used_identifiers.insert(*column_identifier);
     }
 
     ColumnIdentifierSet & used_identifiers;
diff --git a/src/Planner/CollectColumnIndetifiers.h b/src/Planner/CollectColumnIdentifiers.h
similarity index 57%
rename from src/Planner/CollectColumnIndetifiers.h
rename to src/Planner/CollectColumnIdentifiers.h
index 8c84908ee6b..b0cad10ba4f 100644
--- a/src/Planner/CollectColumnIndetifiers.h
+++ b/src/Planner/CollectColumnIdentifiers.h
@@ -7,13 +7,14 @@
 namespace DB
 {
 
-using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
-
-/// Collect all top level column identifiers from query tree node.
-/// Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree.
-/// For example, in the following query:
-/// SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y
-/// The top level column identifiers are: `t1.b`, `t2.y`
+/** Collect all top level column identifiers from query tree node.
+  * Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree.
+  * For example, in the following query:
+  * SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y
+  * The top level column identifiers are: `t1.b`, `t2.y`
+  *
+  * There is precondition that table expression data is collected in planner context.
+  */
 ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context);
 
 void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out);
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index ea14d29bd6f..b865e137038 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -64,7 +64,7 @@
 #include <Planner/CollectTableExpressionData.h>
 #include <Planner/PlannerJoinTree.h>
 #include <Planner/PlannerExpressionAnalysis.h>
-#include <Planner/CollectColumnIndetifiers.h>
+#include <Planner/CollectColumnIdentifiers.h>
 
 namespace DB
 {
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 37c542d0494..69bf7dd79bb 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -33,7 +33,7 @@
 #include <Interpreters/HashJoin.h>
 #include <Interpreters/ArrayJoinAction.h>
 
-#include <Planner/CollectColumnIndetifiers.h>
+#include <Planner/CollectColumnIdentifiers.h>
 #include <Planner/Planner.h>
 #include <Planner/PlannerJoins.h>
 #include <Planner/PlannerActionsVisitor.h>
diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h
index 9d83bf62fc1..742d6853267 100644
--- a/src/Planner/PlannerJoinTree.h
+++ b/src/Planner/PlannerJoinTree.h
@@ -11,8 +11,6 @@
 namespace DB
 {
 
-using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
-
 /// Build query plan for query JOIN TREE node
 QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
     SelectQueryInfo & select_query_info,
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index e737788cebf..6b4a9b4748d 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -13,6 +13,7 @@ namespace ErrorCodes
 
 using ColumnIdentifier = std::string;
 using ColumnIdentifiers = std::vector<ColumnIdentifier>;
+using ColumnIdentifierSet = std::unordered_set<ColumnIdentifier>;
 
 /** Table expression data is created for each table expression that take part in query.
   * Table expression data has information about columns that participate in query, their name to identifier mapping,
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
index 100b2fc42bf..abd49790ced 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference
@@ -1,3 +1,8 @@
+-- { echoOn }
+
+EXPLAIN PLAN header = 1
+SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1 = c.c1 GROUP BY a.a2
+;
 Expression ((Project names + Projection))
 Header: count() UInt64
   Aggregating
@@ -41,6 +46,9 @@ Header: count() UInt64
         Header: default.d.d1_3 UInt64
           ReadFromStorage (Memory)
           Header: d1 UInt64
+EXPLAIN PLAN header = 1
+SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k)
+;
 Expression ((Project names + (Projection + DROP unused columns after JOIN)))
 Header: a2 String
         d2 String
@@ -83,6 +91,13 @@ Header: a2 String
       ReadFromStorage (Memory)
       Header: k UInt64
               d2 String
+EXPLAIN PLAN header = 1
+SELECT b.bx FROM a
+JOIN (SELECT b1, b2 || 'x'  AS bx FROM b ) AS b ON b.b1 = a.a1
+JOIN c ON c.c1 = b.b1
+JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1
+WHERE c.c2 != '' ORDER BY a.a2
+;
 Expression (Project names)
 Header: bx String
   Sorting (Sorting for ORDER BY)
diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql
index 576e68c2289..2406be13aa8 100644
--- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql
+++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql
@@ -17,13 +17,10 @@ INSERT INTO d VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c');
 
 SET allow_experimental_analyzer = 1;
 
+-- { echoOn }
+
 EXPLAIN PLAN header = 1
-SELECT count()
-FROM a
-JOIN b ON b.b1 = a.a1
-JOIN c ON c.c1 = b.b1
-JOIN d ON d.d1 = c.c1
-GROUP BY a.a2
+SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1 = c.c1 GROUP BY a.a2
 ;
 
 EXPLAIN PLAN header = 1
@@ -31,15 +28,15 @@ SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k)
 ;
 
 EXPLAIN PLAN header = 1
-SELECT b.bx
-FROM a
+SELECT b.bx FROM a
 JOIN (SELECT b1, b2 || 'x'  AS bx FROM b ) AS b ON b.b1 = a.a1
 JOIN c ON c.c1 = b.b1
 JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1
-WHERE c.c2 != ''
-ORDER BY a.a2
+WHERE c.c2 != '' ORDER BY a.a2
 ;
 
+-- { echoOff }
+
 DROP TABLE IF EXISTS a;
 DROP TABLE IF EXISTS b;
 DROP TABLE IF EXISTS c;

From 659fa963655a61f33bef64b944aa70e3f236de75 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Wed, 11 Jan 2023 13:06:38 +0100
Subject: [PATCH 185/262] More logging to facilitate debugging

---
 src/Processors/Transforms/TTLTransform.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp
index e79dcb34c41..3250d012d5c 100644
--- a/src/Processors/Transforms/TTLTransform.cpp
+++ b/src/Processors/Transforms/TTLTransform.cpp
@@ -144,6 +144,8 @@ void TTLTransform::finalize()
         else
             LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", delete_algorithm->getNumberOfRemovedRows(), data_part->name);
     }
+    else
+        LOG_DEBUG(log, "No delete algorithm was applied for part {}", data_part->name);
 }
 
 IProcessor::Status TTLTransform::prepare()

From c0f529600d2d17fa111ce875e10bb1557fc1bd74 Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 11 Jan 2023 12:23:33 +0000
Subject: [PATCH 186/262] fix darwin build

---
 src/Common/CancelableSharedMutex.h | 3 ++-
 src/Common/SharedMutex.h           | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h
index dfd9631c564..af87b213479 100644
--- a/src/Common/CancelableSharedMutex.h
+++ b/src/Common/CancelableSharedMutex.h
@@ -1,12 +1,13 @@
 #pragma once
 
+#include <shared_mutex>
+
 #ifdef OS_LINUX /// Because of futex
 
 #include <Common/CancelToken.h>
 #include <base/types.h>
 #include <base/defines.h>
 #include <atomic>
-#include <shared_mutex> // for std::unique_lock and std::shared_lock
 
 namespace DB
 {
diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h
index 26c649c6fa8..e0143d4042d 100644
--- a/src/Common/SharedMutex.h
+++ b/src/Common/SharedMutex.h
@@ -1,11 +1,12 @@
 #pragma once
 
+#include <shared_mutex>
+
 #ifdef OS_LINUX /// Because of futex
 
 #include <base/types.h>
 #include <base/defines.h>
 #include <atomic>
-#include <shared_mutex> // for std::unique_lock and std::shared_lock
 
 namespace DB
 {

From 22c30ca38c373d21f6d95dc0323dc34a6bfead5d Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 10:59:48 +0100
Subject: [PATCH 187/262] Add typing to get_previous_release_tag.py

---
 tests/ci/get_previous_release_tag.py | 40 ++++++++++++++++++----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py
index 579035bd943..c6fe6cd5fb5 100755
--- a/tests/ci/get_previous_release_tag.py
+++ b/tests/ci/get_previous_release_tag.py
@@ -2,21 +2,25 @@
 
 import re
 import logging
+from typing import List, Optional, Tuple
 
 import requests  # type: ignore
 
 CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags"
-CLICKHOUSE_PACKAGE_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
+CLICKHOUSE_PACKAGE_URL = (
+    "https://github.com/ClickHouse/ClickHouse/releases/download/"
+    "v{version}-{type}/clickhouse-common-static_{version}_amd64.deb"
+)
 VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)"
 
 logger = logging.getLogger(__name__)
 
 
 class Version:
-    def __init__(self, version):
+    def __init__(self, version: str):
         self.version = version
 
-    def __lt__(self, other):
+    def __lt__(self, other: "Version") -> bool:
         return list(map(int, self.version.split("."))) < list(
             map(int, other.version.split("."))
         )
@@ -26,7 +30,7 @@ class Version:
 
 
 class ReleaseInfo:
-    def __init__(self, release_tag):
+    def __init__(self, release_tag: str):
         self.version = Version(release_tag[1:].split("-")[0])
         self.type = release_tag[1:].split("-")[1]
 
@@ -37,7 +41,9 @@ class ReleaseInfo:
         return f"ReleaseInfo: {self.version}-{self.type}"
 
 
-def find_previous_release(server_version, releases):
+def find_previous_release(
+    server_version: Optional[Version], releases: List[ReleaseInfo]
+) -> Tuple[bool, Optional[ReleaseInfo]]:
     releases.sort(key=lambda x: x.version, reverse=True)
 
     if server_version is None:
@@ -59,21 +65,23 @@ def find_previous_release(server_version, releases):
                 != 404
             ):
                 return True, release
-            else:
-                logger.debug(
-                    "The tag %s-%s exists but the package is not yet available on GitHub",
-                    release.version,
-                    release.type,
-                )
+
+            logger.debug(
+                "The tag v%s-%s exists but the package is not yet available on GitHub",
+                release.version,
+                release.type,
+            )
 
     return False, None
 
 
-def get_previous_release(server_version):
+def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseInfo]:
     page = 1
     found = False
     while not found:
-        response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100})
+        response = requests.get(
+            CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}, timeout=10
+        )
         if not response.ok:
             raise Exception(
                 "Cannot load the list of tags from github: " + response.reason
@@ -94,7 +102,11 @@ def get_previous_release(server_version):
     return previous_release
 
 
-if __name__ == "__main__":
+def main():
     logging.basicConfig(level=logging.INFO)
     server_version = Version(input())
     print(get_previous_release(server_version))
+
+
+if __name__ == "__main__":
+    main()

From 21573028ea02b3c0a9cfe6dc2420a0360c2d3731 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 07:38:41 -0500
Subject: [PATCH 188/262] Update
 docs/en/engines/table-engines/integrations/deltalake.md

---
 docs/en/engines/table-engines/integrations/deltalake.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md
index eb4d8e934a7..251d2fef52e 100644
--- a/docs/en/engines/table-engines/integrations/deltalake.md
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@@ -18,7 +18,7 @@ CREATE TABLE deltalake
 
 **Engine parameters**
 
--   `path` — Bucket url with path to the existing Delta Lake table.
+-   `url` — Bucket url with path to the existing Delta Lake table.
 -   `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
 
 **Example**

From 73ef2657dd906974e5a4178e55099faec553ee77 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 07:40:10 -0500
Subject: [PATCH 189/262] Update
 docs/en/engines/table-engines/integrations/hudi.md

---
 docs/en/engines/table-engines/integrations/hudi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md
index 6da1634ba5a..75b1969101b 100644
--- a/docs/en/engines/table-engines/integrations/hudi.md
+++ b/docs/en/engines/table-engines/integrations/hudi.md
@@ -18,7 +18,7 @@ CREATE TABLE hudi_table
 
 **Engine parameters**
 
--   `path` — Bucket url with the path to an existing Hudi table.
+-   `url` — Bucket url with the path to an existing Hudi table.
 -   `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3).
 
 **Example**

From 367d4fc4bf0cfc992f48ab5297f38da5017efb73 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 07:40:52 -0500
Subject: [PATCH 190/262] Update docs/en/sql-reference/table-functions/hudi.md

---
 docs/en/sql-reference/table-functions/hudi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/hudi.md b/docs/en/sql-reference/table-functions/hudi.md
index c1ccd0cda2f..b8d0724a7b9 100644
--- a/docs/en/sql-reference/table-functions/hudi.md
+++ b/docs/en/sql-reference/table-functions/hudi.md
@@ -10,7 +10,7 @@ Provides a read-only table-like interface to Apache [Hudi](https://hudi.apache.o
 ## Syntax
 
 ``` sql
-hudi(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+hudi(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
 ```
 
 ## Arguments

From d4c4f84161014f9434fc4535cd545b950ba2bd5d Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 07:41:36 -0500
Subject: [PATCH 191/262] Update docs/en/sql-reference/table-functions/hudi.md

---
 docs/en/sql-reference/table-functions/hudi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/table-functions/hudi.md b/docs/en/sql-reference/table-functions/hudi.md
index b8d0724a7b9..5a97b2401b4 100644
--- a/docs/en/sql-reference/table-functions/hudi.md
+++ b/docs/en/sql-reference/table-functions/hudi.md
@@ -15,7 +15,7 @@ hudi(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,co
 
 ## Arguments
 
-- `path` — Bucket url with the path to an existing Hudi table in S3.
+- `url` — Bucket url with the path to an existing Hudi table in S3.
 - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
 - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
 - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.

From 02261b0e2a83bab18dcff08f48246ef122ab1449 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 07:42:15 -0500
Subject: [PATCH 192/262] Update
 docs/en/engines/table-engines/integrations/hudi.md

---
 docs/en/engines/table-engines/integrations/hudi.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md
index 75b1969101b..4e335e6c075 100644
--- a/docs/en/engines/table-engines/integrations/hudi.md
+++ b/docs/en/engines/table-engines/integrations/hudi.md
@@ -13,7 +13,7 @@ Note that the Hudi table must already exist in S3, this command does not take DD
 
 ``` sql
 CREATE TABLE hudi_table
-    ENGINE = Hudi(path, [aws_access_key_id, aws_secret_access_key,])
+    ENGINE = Hudi(url, [aws_access_key_id, aws_secret_access_key,])
 ```
 
 **Engine parameters**

From 1ddc9c3bb60195db72dac5fb966d7870e9b0dbc8 Mon Sep 17 00:00:00 2001
From: rfraposa <richraposa@gmail.com>
Date: Wed, 11 Jan 2023 05:44:51 -0700
Subject: [PATCH 193/262] Update merge-tree-settings.md

---
 .../settings/merge-tree-settings.md           | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)

diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md
index ec492605930..ed4ee37fc37 100644
--- a/docs/en/operations/settings/merge-tree-settings.md
+++ b/docs/en/operations/settings/merge-tree-settings.md
@@ -643,3 +643,106 @@ Default value: `0` (limit never applied).
 ``` xml
 <min_marks_to_honor_max_concurrent_queries>10</min_marks_to_honor_max_concurrent_queries>
 ```
+
+## ratio_of_defaults_for_sparse_serialization {#ratio_of_defaults_for_sparse_serialization}
+
+Minimal ratio of the number of _default_ values to the number of _all_ values in a column. Setting this value causes the column to be stored using sparse serializations.
+
+If a column is sparse (contains mostly zeros), ClickHouse can encode it in a sparse format and automatically optimize calculations - the data does not require full decompression during queries. To enable this sparse serialization, define the `ratio_of_defaults_for_sparse_serialization` setting to be less than 1.0. If the value is greater than or equal to 1.0 (the default), then the columns will be always written using the normal full serialization.
+
+Possible values:
+
+- Float between 0 and 1 to enable sparse serialization
+- 1.0 (or greater) if you do not want to use sparse serialization
+
+Default value: `1.0` (sparse serialization is disabled)
+
+**Example**
+
+Notice the `s` column in the following table is an empty string for 95% of the rows. In `my_regular_table` we do not use sparse serialization, and in `my_sparse_table` we set `ratio_of_defaults_for_sparse_serialization` to 0.95:
+
+```sql
+CREATE TABLE my_regular_table
+(
+    `id` UInt64,
+    `s` String
+)
+ENGINE = MergeTree
+ORDER BY id;
+
+INSERT INTO my_regular_table
+SELECT
+    number AS id,
+    number % 20 = 0 ? toString(number): '' AS s
+FROM
+    numbers(10000000);
+
+
+CREATE TABLE my_sparse_table
+(
+    `id` UInt64,
+    `s` String
+)
+ENGINE = MergeTree
+ORDER BY id
+SETTINGS ratio_of_defaults_for_sparse_serialization = 0.95;
+
+INSERT INTO my_sparse_table
+SELECT
+    number,
+    number % 20 = 0 ? toString(number): ''
+FROM
+    numbers(10000000);
+```
+
+Notice the `s` column in `my_sparse_table` uses less storage space on disk:
+
+```sql
+SELECT table, name, data_compressed_bytes, data_uncompressed_bytes FROM system.columns
+WHERE table LIKE 'my_%_table';
+```
+
+```response
+┌─table────────────┬─name─┬─data_compressed_bytes─┬─data_uncompressed_bytes─┐
+│ my_regular_table │ id   │              37790741 │                75488328 │
+│ my_regular_table │ s    │               2451377 │                12683106 │
+│ my_sparse_table  │ id   │              37790741 │                75488328 │
+│ my_sparse_table  │ s    │               2283454 │                 9855751 │
+└──────────────────┴──────┴───────────────────────┴─────────────────────────┘
+```
+
+You can verify if a column is using the sparse encoding by viewing the `serialization_kind` column of the `system.parts_columns` table:
+
+```sql
+SELECT column, serialization_kind FROM system.parts_columns
+WHERE table LIKE 'my_sparse_table';
+```
+
+You can see which parts of `s` were stored using the sparse serialization:
+
+```response
+┌─column─┬─serialization_kind─┐
+│ id     │ Default            │
+│ s      │ Default            │
+│ id     │ Default            │
+│ s      │ Default            │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+│ id     │ Default            │
+│ s      │ Sparse             │
+└────────┴────────────────────┘
+```
\ No newline at end of file

From 1d002e45a904dc945d0696798fd293a3d65b4625 Mon Sep 17 00:00:00 2001
From: Igor Nikonov <igor@clickhouse.com>
Date: Wed, 11 Jan 2023 12:49:01 +0000
Subject: [PATCH 194/262] Fix test & review comments

---
 src/Storages/MergeTree/MergeTreeData.cpp      | 21 ++++++++++++-------
 src/Storages/MergeTree/MergeTreeSettings.h    |  2 +-
 ...21_incorrect_dealy_for_insert_bug_44902.sh |  4 ++--
 3 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp
index 47fc02165b6..5830e0145bc 100644
--- a/src/Storages/MergeTree/MergeTreeData.cpp
+++ b/src/Storages/MergeTree/MergeTreeData.cpp
@@ -3870,27 +3870,32 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex
     {
         size_t parts_over_threshold = 0;
         size_t allowed_parts_over_threshold = 1;
-        if (active_parts_over_threshold >= outdated_parts_over_threshold)
+        const bool use_active_parts_threshold = (active_parts_over_threshold >= outdated_parts_over_threshold);
+        if (use_active_parts_threshold)
         {
-            parts_over_threshold =  active_parts_over_threshold;
+            parts_over_threshold = active_parts_over_threshold;
             allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert;
         }
         else
         {
             parts_over_threshold = outdated_parts_over_threshold;
-            allowed_parts_over_threshold = outdated_parts_over_threshold;
+            allowed_parts_over_threshold = outdated_parts_over_threshold; /// if throw threshold is not set, will use max delay
             if (settings->inactive_parts_to_throw_insert > 0)
                 allowed_parts_over_threshold = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert;
         }
 
-        chassert(allowed_parts_over_threshold > 0 && parts_over_threshold <= allowed_parts_over_threshold);
+        if (allowed_parts_over_threshold == 0 || parts_over_threshold > allowed_parts_over_threshold) [[unlikely]]
+            throw Exception(
+                ErrorCodes::LOGICAL_ERROR,
+                "Incorrect calculation of {} parts over threshold: allowed_parts_over_threshold={}, parts_over_threshold={}",
+                (use_active_parts_threshold ? "active" : "inactive"),
+                allowed_parts_over_threshold,
+                parts_over_threshold);
 
         const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000);
         double delay_factor = static_cast<double>(parts_over_threshold) / allowed_parts_over_threshold;
-        UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms;
-        /// min() as a save guard here
-        delay_milliseconds = std::max(
-            min_delay_milliseconds, std::min(max_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor)));
+        const UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms;
+        delay_milliseconds = std::max(min_delay_milliseconds, static_cast<UInt64>(max_delay_milliseconds * delay_factor));
     }
 
     ProfileEvents::increment(ProfileEvents::DelayedInserts);
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index 0b8188f67c7..d1f957740e2 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -68,7 +68,7 @@ struct Settings;
     M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \
     \
     /** Inserts settings. */ \
-    M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \
+    M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \
     M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \
     M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \
     M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \
diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh
index 6cbd77b262a..5f91ef19a5a 100755
--- a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh
+++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh
@@ -15,10 +15,10 @@ for i in {0..4}
 do
     query_id="${CLICKHOUSE_DATABASE}_02521_${i}_$RANDOM$RANDOM"
     $CLICKHOUSE_CLIENT --query_id="$query_id" -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)"
-    $CLICKHOUSE_CLIENT -q "system flush logs"
+    $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
     $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select ProfileEvents['DelayedInsertsMilliseconds'] as delay from system.query_log where event_date >= yesterday() and query_id = {query_id:String} order by delay desc limit 1"
 done
 
-$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS'
+$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS' | head -n 1
 
 $CLICKHOUSE_CLIENT -q "DROP TABLE test_02521_insert_delay"

From 6e9669cfaebf4a0a8c8c5a15f9ead146833627e1 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 07:53:37 -0500
Subject: [PATCH 195/262] Apply suggestions from code review

---
 docs/en/engines/table-engines/integrations/deltalake.md | 2 +-
 docs/en/sql-reference/table-functions/deltalake.md      | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md
index 251d2fef52e..83526ac944d 100644
--- a/docs/en/engines/table-engines/integrations/deltalake.md
+++ b/docs/en/engines/table-engines/integrations/deltalake.md
@@ -13,7 +13,7 @@ Note that the Delta Lake table must already exist in S3, this command does not t
 
 ``` sql
 CREATE TABLE deltalake
-    ENGINE = DeltaLake(path, [aws_access_key_id, aws_secret_access_key,])
+    ENGINE = DeltaLake(url, [aws_access_key_id, aws_secret_access_key,])
 ```
 
 **Engine parameters**
diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md
index 10e7c20e17a..f1cc4659a2a 100644
--- a/docs/en/sql-reference/table-functions/deltalake.md
+++ b/docs/en/sql-reference/table-functions/deltalake.md
@@ -10,12 +10,12 @@ Provides a read-only table-like interface to [Delta Lake](https://github.com/del
 ## Syntax
 
 ``` sql
-deltaLake(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
+deltaLake(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression])
 ```
 
 ## Arguments
 
-- `path` — Bucket url with path to existing Delta Lake table in S3.
+- `url` — Bucket url with path to existing Delta Lake table in S3.
 - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user.  You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3).
 - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file.
 - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`.

From 764abb641089bb220038704673377b55531fc1d7 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Wed, 11 Jan 2023 15:55:18 +0300
Subject: [PATCH 196/262] try to fix flaky test_ttl_move_memory_usage

---
 .../test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
index a1e10cde031..9d53b7c048b 100644
--- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
+++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
@@ -48,7 +48,9 @@ def test_move_and_s3_memory_usage(started_single_node_cluster):
     )
     small_node.query("system flush logs")
     max_usage = small_node.query(
-        "select max(CurrentMetric_MemoryTracking) from system.metric_log"
+        """select max(m.val - am.val * 4096) from 
+        (select toStartOfMinute(event_time) as time, max(CurrentMetric_MemoryTracking) as val from system.metric_log group by time) as m join 
+        (select toStartOfMinute(event_time) as time, min(value) as val from system.asynchronous_metric_log where metric='jemalloc.arenas.all.pdirty' group by time) as am using time"""
     )
     # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled.
     # Also actual value can be bigger because of memory drift.

From a79f6d19fa0d8cad4a38255f7ae547e1bfee02bf Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 11 Jan 2023 13:04:05 +0000
Subject: [PATCH 197/262] add docs for `system.moves` table

---
 docs/en/operations/system-tables/moves.md | 42 +++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 docs/en/operations/system-tables/moves.md

diff --git a/docs/en/operations/system-tables/moves.md b/docs/en/operations/system-tables/moves.md
new file mode 100644
index 00000000000..e790946a15f
--- /dev/null
+++ b/docs/en/operations/system-tables/moves.md
@@ -0,0 +1,42 @@
+---
+slug: /en/operations/system-tables/moves
+---
+# moves
+
+The table contains information about in-progress [data part moves](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables. Each data part movement is represented by a single row.
+
+Columns:
+
+-   `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database.
+
+-   `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part.
+
+-   `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started.
+
+-   `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving.
+
+-   `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system.
+
+-   `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved.
+
+-   `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size.
+
+-   `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement.
+
+**Example**
+
+```sql
+SELECT * FROM system.moves
+```
+
+```text
+┌─database─┬─table─┬─────elapsed─┬─target_disk_name─┬─target_disk_path─┬─part_name─┬─part_size─┬─thread_id─┐
+│ default  │ test2 │ 1.668056039 │ s3               │ ./disks/s3/      │ all_3_3_0 │       136 │    296146 │
+└──────────┴───────┴─────────────┴──────────────────┴──────────────────┴───────────┴───────────┴───────────┘
+```
+
+**See Also**
+
+-   [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine
+-   [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes)
+-   [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command

From 4767147745787180647243cafd76b939ed09dc25 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 08:23:44 -0500
Subject: [PATCH 198/262] format query response

---
 docs/en/operations/system-tables/moves.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/operations/system-tables/moves.md b/docs/en/operations/system-tables/moves.md
index e790946a15f..54f07540507 100644
--- a/docs/en/operations/system-tables/moves.md
+++ b/docs/en/operations/system-tables/moves.md
@@ -29,7 +29,7 @@ Columns:
 SELECT * FROM system.moves
 ```
 
-```text
+```response
 ┌─database─┬─table─┬─────elapsed─┬─target_disk_name─┬─target_disk_path─┬─part_name─┬─part_size─┬─thread_id─┐
 │ default  │ test2 │ 1.668056039 │ s3               │ ./disks/s3/      │ all_3_3_0 │       136 │    296146 │
 └──────────┴───────┴─────────────┴──────────────────┴──────────────────┴───────────┴───────────┴───────────┘

From 8d099a44172837e8c54c149d3d811322c757132c Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Wed, 11 Jan 2023 13:43:51 +0000
Subject: [PATCH 199/262] make more SQL queries copyable from docs in one click

---
 docs/en/interfaces/formats.md                 | 19 ++---
 docs/en/operations/system-tables/disks.md     |  2 +-
 .../system-tables/merge_tree_settings.md      |  2 +-
 docs/en/operations/system-tables/numbers.md   |  2 +-
 .../en/operations/system-tables/numbers_mt.md |  2 +-
 docs/en/operations/system-tables/one.md       |  2 +-
 docs/en/operations/system-tables/processes.md |  2 +-
 .../sql-reference/table-functions/format.md   |  5 +-
 .../sql-reference/table-functions/format.md   |  5 +-
 .../mergetree-family/summingmergetree.md      |  4 +-
 docs/zh/operations/system-tables/disks.md     |  2 +-
 .../system-tables/merge_tree_settings.md      |  2 +-
 docs/zh/operations/system-tables/numbers.md   |  2 +-
 docs/zh/operations/system-tables/one.md       |  2 +-
 docs/zh/sql-reference/data-types/array.md     | 69 +++++++--------
 docs/zh/sql-reference/data-types/enum.md      | 85 ++++++++++++-------
 .../data-types/special-data-types/nothing.md  |  8 +-
 docs/zh/sql-reference/data-types/tuple.md     | 36 ++++----
 .../functions/functions-for-nulls.md          | 85 ++++++++++---------
 .../functions/other-functions.md              | 75 ++++++++--------
 .../sql-reference/functions/uuid-functions.md | 22 +++--
 docs/zh/sql-reference/operators/index.md      | 24 ++----
 .../sql-reference/table-functions/format.md   |  5 +-
 23 files changed, 235 insertions(+), 227 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index c78b34e0b0d..75ef0ac3cc0 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -1265,7 +1265,7 @@ For input it uses the following correspondence between BSON types and ClickHouse
 | `\x10` int32                             | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md)                                                         |
 | `\x12` int64                             | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) |
 
-Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). 
+Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8).
 Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value.
 
 Note: this format don't work properly on Big-Endian platforms.
@@ -2319,25 +2319,22 @@ INSERT INTO `test2` VALUES (1),(2),(3);
 Queries:
 
 ```sql
-:) desc file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
-
-DESCRIBE TABLE file(dump.sql, MySQLDump)
-SETTINGS input_format_mysql_dump_table_name = 'test2'
-
-Query id: 25e66c89-e10a-42a8-9b42-1ee8bbbde5ef
+DESCRIBE TABLE file(dump.sql, MySQLDump) SETTINGS input_format_mysql_dump_table_name = 'test2'
+```
 
+```text
 ┌─name─┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ x    │ Nullable(Int32) │              │                    │         │                  │                │
 └──────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘
+```
 
-:) select * from file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2'
-
+```sql
 SELECT *
 FROM file(dump.sql, MySQLDump)
          SETTINGS input_format_mysql_dump_table_name = 'test2'
+```
 
-Query id: 17d59664-ebce-4053-bb79-d46a516fb590
-
+```text
 ┌─x─┐
 │ 1 │
 │ 2 │
diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md
index 4096a8c765c..d492e42c2ec 100644
--- a/docs/en/operations/system-tables/disks.md
+++ b/docs/en/operations/system-tables/disks.md
@@ -17,7 +17,7 @@ Columns:
 **Example**
 
 ```sql
-:) SELECT * FROM system.disks;
+SELECT * FROM system.disks;
 ```
 
 ```text
diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md
index 9f8006d77a7..a05d4abccda 100644
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@@ -15,7 +15,7 @@ Columns:
 
 **Example**
 ```sql
-:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
+SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 
 ```text
diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md
index f2204dbf0ba..0dc001ebb6f 100644
--- a/docs/en/operations/system-tables/numbers.md
+++ b/docs/en/operations/system-tables/numbers.md
@@ -12,7 +12,7 @@ Reads from this table are not parallelized.
 **Example**
 
 ```sql
-:) SELECT * FROM system.numbers LIMIT 10;
+SELECT * FROM system.numbers LIMIT 10;
 ```
 
 ```text
diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md
index deb7be7dc68..cc461b29ad0 100644
--- a/docs/en/operations/system-tables/numbers_mt.md
+++ b/docs/en/operations/system-tables/numbers_mt.md
@@ -10,7 +10,7 @@ Used for tests.
 **Example**
 
 ```sql
-:) SELECT * FROM system.numbers_mt LIMIT 10;
+SELECT * FROM system.numbers_mt LIMIT 10;
 ```
 
 ```text
diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md
index d71c82f5e94..ee2907a6d6d 100644
--- a/docs/en/operations/system-tables/one.md
+++ b/docs/en/operations/system-tables/one.md
@@ -12,7 +12,7 @@ This is similar to the `DUAL` table found in other DBMSs.
 **Example**
 
 ```sql
-:) SELECT * FROM system.one LIMIT 10;
+SELECT * FROM system.one LIMIT 10;
 ```
 
 ```text
diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md
index 2cf15a9bb2b..95c46f551ef 100644
--- a/docs/en/operations/system-tables/processes.md
+++ b/docs/en/operations/system-tables/processes.md
@@ -20,7 +20,7 @@ Columns:
 -   `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server).
 
 ```sql
-:) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
+SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
 ```
 
 ```text
diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md
index 78b67a47d4e..4a0ee58d758 100644
--- a/docs/en/sql-reference/table-functions/format.md
+++ b/docs/en/sql-reference/table-functions/format.md
@@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
 
 **Query:**
 ``` sql
-:) select * from format(JSONEachRow, 
+SELECT * FROM format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@@ -49,8 +49,7 @@ $$)
 
 **Query:**
 ```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md
index 5dc463e5b27..a91b4ca2b1e 100644
--- a/docs/ru/sql-reference/table-functions/format.md
+++ b/docs/ru/sql-reference/table-functions/format.md
@@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
 
 **Query:**
 ``` sql
-:) select * from format(JSONEachRow,
+SELECT * FROM format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@@ -49,8 +49,7 @@ $$)
 
 **Query:**
 ```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
diff --git a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
index 620a56006db..f59d327b4ae 100644
--- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
+++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md
@@ -69,7 +69,9 @@ ORDER BY key
 
 向其中插入数据：
 
-    :) INSERT INTO summtt Values(1,1),(1,2),(2,1)
+``` sql
+INSERT INTO summtt Values(1,1),(1,2),(2,1)
+```
 
 ClickHouse可能不会完整的汇总所有行（[见下文](#data-processing)）,因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。
 
diff --git a/docs/zh/operations/system-tables/disks.md b/docs/zh/operations/system-tables/disks.md
index 36f7e8de4f1..0e774632074 100644
--- a/docs/zh/operations/system-tables/disks.md
+++ b/docs/zh/operations/system-tables/disks.md
@@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/disks
 **示例**
 
 ```sql
-:) SELECT * FROM system.disks;
+SELECT * FROM system.disks;
 ```
 
 ```text
diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md
index c3c424c01fe..c2bdcd14d24 100644
--- a/docs/zh/operations/system-tables/merge_tree_settings.md
+++ b/docs/zh/operations/system-tables/merge_tree_settings.md
@@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/merge_tree_settings
 
 **示例**
 ```sql
-:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
+SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 
 ```text
diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md
index f3db66f365b..8cb92351ae7 100644
--- a/docs/zh/operations/system-tables/numbers.md
+++ b/docs/zh/operations/system-tables/numbers.md
@@ -12,7 +12,7 @@ slug: /zh/operations/system-tables/numbers
 **示例**
 
 ```sql
-:) SELECT * FROM system.numbers LIMIT 10;
+SELECT * FROM system.numbers LIMIT 10;
 ```
 
 ```text
diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md
index 6929b1b4245..2e44a9bd89a 100644
--- a/docs/zh/operations/system-tables/one.md
+++ b/docs/zh/operations/system-tables/one.md
@@ -12,7 +12,7 @@ slug: /zh/operations/system-tables/one
 **示例**
 
 ```sql
-:) SELECT * FROM system.one LIMIT 10;
+SELECT * FROM system.one LIMIT 10;
 ```
 
 ```text
diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md
index e2f18a42de8..0c38eb86004 100644
--- a/docs/zh/sql-reference/data-types/array.md
+++ b/docs/zh/sql-reference/data-types/array.md
@@ -19,29 +19,25 @@ slug: /zh/sql-reference/data-types/array
 
 创建数组示例：
 
-    :) SELECT array(1, 2) AS x, toTypeName(x)
+```sql
+SELECT array(1, 2) AS x, toTypeName(x)
+```
 
-    SELECT
-        [1, 2] AS x,
-        toTypeName(x)
+```text
+┌─x─────┬─toTypeName(array(1, 2))─┐
+│ [1,2] │ Array(UInt8)            │
+└───────┴─────────────────────────┘
+```
 
-    ┌─x─────┬─toTypeName(array(1, 2))─┐
-    │ [1,2] │ Array(UInt8)            │
-    └───────┴─────────────────────────┘
+``` sql
+SELECT [1, 2] AS x, toTypeName(x)
+```
 
-    1 rows in set. Elapsed: 0.002 sec.
-
-    :) SELECT [1, 2] AS x, toTypeName(x)
-
-    SELECT
-        [1, 2] AS x,
-        toTypeName(x)
-
-    ┌─x─────┬─toTypeName([1, 2])─┐
-    │ [1,2] │ Array(UInt8)       │
-    └───────┴────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+``` text
+┌─x─────┬─toTypeName([1, 2])─┐
+│ [1,2] │ Array(UInt8)       │
+└───────┴────────────────────┘
+```
 
 ## 使用数据类型 {#shi-yong-shu-ju-lei-xing}
 
@@ -50,26 +46,23 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素
 如果 ClickHouse 无法确定数据类型，它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。
 
 自动数据类型检测示例：
+```sql
+SELECT array(1, 2, NULL) AS x, toTypeName(x)
+```
 
-    :) SELECT array(1, 2, NULL) AS x, toTypeName(x)
-
-    SELECT
-        [1, 2, NULL] AS x,
-        toTypeName(x)
-
-    ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
-    │ [1,2,NULL] │ Array(Nullable(UInt8))        │
-    └────────────┴───────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```text
+┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
+│ [1,2,NULL] │ Array(Nullable(UInt8))        │
+└────────────┴───────────────────────────────┘
+```
 
 如果您尝试创建不兼容的数据类型数组，ClickHouse 将引发异常：
 
-    :) SELECT array(1, 'a')
+```sql
+SELECT array(1, 'a')
+```
 
-    SELECT [1, 'a']
-
-    Received exception from server (version 1.1.54388):
-    Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
-
-    0 rows in set. Elapsed: 0.246 sec.
+```text
+Received exception from server (version 1.1.54388):
+Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
+```
diff --git a/docs/zh/sql-reference/data-types/enum.md b/docs/zh/sql-reference/data-types/enum.md
index 0cf8a02d76b..9832df3da02 100644
--- a/docs/zh/sql-reference/data-types/enum.md
+++ b/docs/zh/sql-reference/data-types/enum.md
@@ -20,49 +20,64 @@ slug: /zh/sql-reference/data-types/enum
 
 这个 `x` 列只能存储类型定义中列出的值：`'hello'`或`'world'`。如果您尝试保存任何其他值，ClickHouse 抛出异常。
 
-    :) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
+```sql
+INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
+```
 
-    INSERT INTO t_enum VALUES
+```text
+Ok.
 
-    Ok.
+3 rows in set. Elapsed: 0.002 sec.
+```
 
-    3 rows in set. Elapsed: 0.002 sec.
+```sql
+INSERT INTO t_enum VALUES('a')
+```
 
-    :) insert into t_enum values('a')
-
-    INSERT INTO t_enum VALUES
-
-
-    Exception on client:
-    Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
+```text
+Exception on client:
+Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
+```
 
 当您从表中查询数据时，ClickHouse 从 `Enum` 中输出字符串值。
 
-    SELECT * FROM t_enum
+```sql
+SELECT * FROM t_enum
+```
 
-    ┌─x─────┐
-    │ hello │
-    │ world │
-    │ hello │
-    └───────┘
+```text
+┌─x─────┐
+│ hello │
+│ world │
+│ hello │
+└───────┘
+```
 
 如果需要看到对应行的数值，则必须将 `Enum` 值转换为整数类型。
 
-    SELECT CAST(x, 'Int8') FROM t_enum
+```sql
+SELECT CAST(x, 'Int8') FROM t_enum
+```
 
-    ┌─CAST(x, 'Int8')─┐
-    │               1 │
-    │               2 │
-    │               1 │
-    └─────────────────┘
+```text
+┌─CAST(x, 'Int8')─┐
+│               1 │
+│               2 │
+│               1 │
+└─────────────────┘
+```
 
 在查询中创建枚举值，您还需要使用 `CAST`。
 
-    SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
+```sql
+SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
+```
 
-    ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
-    │ Enum8('a' = 1, 'b' = 2)                              │
-    └──────────────────────────────────────────────────────┘
+```text
+┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
+│ Enum8('a' = 1, 'b' = 2)                              │
+└──────────────────────────────────────────────────────┘
+```
 
 ## 规则及用法 {#gui-ze-ji-yong-fa}
 
@@ -72,15 +87,19 @@ slug: /zh/sql-reference/data-types/enum
 
 `Enum` 包含在 [可为空](nullable.md) 类型中。因此，如果您使用此查询创建一个表
 
-    CREATE TABLE t_enum_nullable
-    (
-        x Nullable( Enum8('hello' = 1, 'world' = 2) )
-    )
-    ENGINE = TinyLog
+```sql
+CREATE TABLE t_enum_nullable
+(
+    x Nullable( Enum8('hello' = 1, 'world' = 2) )
+)
+ENGINE = TinyLog
+```
 
 不仅可以存储 `'hello'` 和 `'world'` ，还可以存储 `NULL`。
 
-    INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
+```sql
+INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL)
+```
 
 在内存中，`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。
 
diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
index 2b10934f566..e123622edf6 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
@@ -9,11 +9,11 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing
 
 `Nothing` 类型也可以用来表示空数组：
 
-``` bash
-:) SELECT toTypeName(array())
-
-SELECT toTypeName([])
+```sql
+SELECT toTypeName(array())
+```
 
+```text
 ┌─toTypeName(array())─┐
 │ Array(Nothing)      │
 └─────────────────────┘
diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md
index e991fa7145a..905a872da24 100644
--- a/docs/zh/sql-reference/data-types/tuple.md
+++ b/docs/zh/sql-reference/data-types/tuple.md
@@ -17,17 +17,15 @@ slug: /zh/sql-reference/data-types/tuple
 
 创建元组的示例：
 
-    :) SELECT tuple(1,'a') AS x, toTypeName(x)
+```sql
+SELECT tuple(1,'a') AS x, toTypeName(x)
+```
 
-    SELECT
-        (1, 'a') AS x,
-        toTypeName(x)
-
-    ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
-    │ (1,'a') │ Tuple(UInt8, String)      │
-    └─────────┴───────────────────────────┘
-
-    1 rows in set. Elapsed: 0.021 sec.
+```text
+┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
+│ (1,'a') │ Tuple(UInt8, String)      │
+└─────────┴───────────────────────────┘
+```
 
 ## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing}
 
@@ -35,14 +33,12 @@ slug: /zh/sql-reference/data-types/tuple
 
 自动数据类型检测示例：
 
-    SELECT tuple(1, NULL) AS x, toTypeName(x)
+```sql
+SELECT tuple(1, NULL) AS x, toTypeName(x)
+```
 
-    SELECT
-        (1, NULL) AS x,
-        toTypeName(x)
-
-    ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
-    │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
-    └──────────┴─────────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```text
+┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
+│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
+└──────────┴─────────────────────────────────┘
+```
diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md
index 1ae53f5ddc1..9ecf39e56c5 100644
--- a/docs/zh/sql-reference/functions/functions-for-nulls.md
+++ b/docs/zh/sql-reference/functions/functions-for-nulls.md
@@ -22,24 +22,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls
 
 存在以下内容的表
 
-    ┌─x─┬────y─┐
-    │ 1 │ ᴺᵁᴸᴸ │
-    │ 2 │    3 │
-    └───┴──────┘
+```text
+┌─x─┬────y─┐
+│ 1 │ ᴺᵁᴸᴸ │
+│ 2 │    3 │
+└───┴──────┘
+```
 
 对其进行查询
 
-    :) SELECT x FROM t_null WHERE isNull(y)
+```sql
+SELECT x FROM t_null WHERE isNull(y)
+```
 
-    SELECT x
-    FROM t_null
-    WHERE isNull(y)
-
-    ┌─x─┐
-    │ 1 │
-    └───┘
-
-    1 rows in set. Elapsed: 0.010 sec.
+```text
+┌─x─┐
+│ 1 │
+└───┘
+```
 
 ## isNotNull {#isnotnull}
 
@@ -60,24 +60,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls
 
 存在以下内容的表
 
-    ┌─x─┬────y─┐
-    │ 1 │ ᴺᵁᴸᴸ │
-    │ 2 │    3 │
-    └───┴──────┘
+```text
+┌─x─┬────y─┐
+│ 1 │ ᴺᵁᴸᴸ │
+│ 2 │    3 │
+└───┴──────┘
+```
 
 对其进行查询
 
-    :) SELECT x FROM t_null WHERE isNotNull(y)
+```sql
+SELECT x FROM t_null WHERE isNotNull(y)
+```
 
-    SELECT x
-    FROM t_null
-    WHERE isNotNull(y)
-
-    ┌─x─┐
-    │ 2 │
-    └───┘
-
-    1 rows in set. Elapsed: 0.010 sec.
+```text
+┌─x─┐
+│ 2 │
+└───┘
+```
 
 ## 合并 {#coalesce}
 
@@ -98,26 +98,27 @@ slug: /zh/sql-reference/functions/functions-for-nulls
 
 考虑可以指定多种联系客户的方式的联系人列表。
 
-    ┌─name─────┬─mail─┬─phone─────┬──icq─┐
-    │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │  123 │
-    │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ │
-    └──────────┴──────┴───────────┴──────┘
+```text
+┌─name─────┬─mail─┬─phone─────┬──icq─┐
+│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │  123 │
+│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ │
+└──────────┴──────┴───────────┴──────┘
+```
 
 `mail`和`phone`字段是String类型，但`icq`字段是`UInt32`，所以它需要转换为`String`。
 
 从联系人列表中获取客户的第一个可用联系方式：
 
-    :) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
+```sql
+SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
+```
 
-    SELECT coalesce(mail, phone, CAST(icq, 'Nullable(String)'))
-    FROM aBook
-
-    ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
-    │ client 1 │ 123-45-67                                            │
-    │ client 2 │ ᴺᵁᴸᴸ                                                 │
-    └──────────┴──────────────────────────────────────────────────────┘
-
-    2 rows in set. Elapsed: 0.006 sec.
+```text
+┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
+│ client 1 │ 123-45-67                                            │
+│ client 2 │ ᴺᵁᴸᴸ                                                 │
+└──────────┴──────────────────────────────────────────────────────┘
+```
 
 ## ifNull {#ifnull}
 
diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md
index 07acf8fdfe0..a5c67e94921 100644
--- a/docs/zh/sql-reference/functions/other-functions.md
+++ b/docs/zh/sql-reference/functions/other-functions.md
@@ -398,23 +398,25 @@ FROM
 
 **`toTypeName ' 与 ' toColumnTypeName`的区别示例**
 
-    :) select toTypeName(cast('2018-01-01 01:02:03' AS DateTime))
+```sql
+SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```
 
-    SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```text
+┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
+│ DateTime                                            │
+└─────────────────────────────────────────────────────┘
+```
 
-    ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
-    │ DateTime                                            │
-    └─────────────────────────────────────────────────────┘
+```sql
+SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
+```
 
-    1 rows in set. Elapsed: 0.008 sec.
-
-    :) select toColumnTypeName(cast('2018-01-01 01:02:03' AS DateTime))
-
-    SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
-
-    ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
-    │ Const(UInt32)                                             │
-    └───────────────────────────────────────────────────────────┘
+```text
+┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
+│ Const(UInt32)                                             │
+└───────────────────────────────────────────────────────────┘
+```
 
 该示例显示`DateTime`数据类型作为`Const(UInt32)`存储在内存中。
 
@@ -460,26 +462,25 @@ FROM
 
 **示例**
 
-    :) SELECT defaultValueOfArgumentType( CAST(1 AS Int8) )
+```sql
+SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
+```
 
-    SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
+```text
+┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
+│                                           0 │
+└─────────────────────────────────────────────┘
+```
 
-    ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
-    │                                           0 │
-    └─────────────────────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
-
-    :) SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) )
-
-    SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
-
-    ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
-    │                                                  ᴺᵁᴸᴸ │
-    └───────────────────────────────────────────────────────┘
-
-    1 rows in set. Elapsed: 0.002 sec.
+```sql
+SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
+```
 
+```text
+┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
+│                                                  ᴺᵁᴸᴸ │
+└───────────────────────────────────────────────────────┘
+```
 
 ## indexHint  {#indexhint}
 输出符合索引选择范围内的所有数据，同时不实用参数中的表达式进行过滤。
@@ -506,9 +507,11 @@ SELECT count() FROM ontime
 
 对该表进行如下的查询：
 
+```sql
+SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
 ```
-:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
 
+```text
 SELECT
     FlightDate AS k,
     count()
@@ -530,9 +533,11 @@ ORDER BY k ASC
 
 在这个查询中，由于没有使用索引，所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询：
 
+```sql
+SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
 ```
-:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
 
+```text
 SELECT
     FlightDate AS k,
     count()
@@ -552,9 +557,11 @@ ORDER BY k ASC
 
 现在将表达式`k = '2017-09-15'`传递给`indexHint`函数：
 
+```sql
+SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
 ```
-:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
 
+```text
 SELECT
     FlightDate AS k,
     count()
diff --git a/docs/zh/sql-reference/functions/uuid-functions.md b/docs/zh/sql-reference/functions/uuid-functions.md
index 8ee65dd52d0..e635fd4fba8 100644
--- a/docs/zh/sql-reference/functions/uuid-functions.md
+++ b/docs/zh/sql-reference/functions/uuid-functions.md
@@ -21,13 +21,13 @@ UUID类型的值。
 
 此示例演示如何在表中创建UUID类型的列，并对其写入数据。
 
-``` sql
-:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
-
-:) INSERT INTO t_uuid SELECT generateUUIDv4()
-
-:) SELECT * FROM t_uuid
+```sql
+CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog
+INSERT INTO t_uuid SELECT generateUUIDv4()
+SELECT * FROM t_uuid
+```
 
+```text
 ┌────────────────────────────────────x─┐
 │ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
 └──────────────────────────────────────┘
@@ -47,9 +47,11 @@ UUID类型的值
 
 **使用示例**
 
-``` sql
-:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```sql
+SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```
 
+```text
 ┌─────────────────────────────────uuid─┐
 │ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
 └──────────────────────────────────────┘
@@ -70,10 +72,12 @@ UUIDStringToNum(String)
 **使用示例**
 
 ``` sql
-:) SELECT
+SELECT
     '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
     UUIDStringToNum(uuid) AS bytes
+```
 
+```text
 ┌─uuid─────────────────────────────────┬─bytes────────────┐
 │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
 └──────────────────────────────────────┴──────────────────┘
diff --git a/docs/zh/sql-reference/operators/index.md b/docs/zh/sql-reference/operators/index.md
index 7e0bd9a9cfb..8544f9f5a91 100644
--- a/docs/zh/sql-reference/operators/index.md
+++ b/docs/zh/sql-reference/operators/index.md
@@ -226,18 +226,14 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。
 
 <!-- -->
 
-``` bash
-:) SELECT x+100 FROM t_null WHERE y IS NULL
-
-SELECT x + 100
-FROM t_null
-WHERE isNull(y)
+``` sql
+SELECT x+100 FROM t_null WHERE y IS NULL
+```
 
+``` text
 ┌─plus(x, 100)─┐
 │          101 │
 └──────────────┘
-
-1 rows in set. Elapsed: 0.002 sec.
 ```
 
 ### IS NOT NULL {#is-not-null}
@@ -249,16 +245,12 @@ WHERE isNull(y)
 
 <!-- -->
 
-``` bash
-:) SELECT * FROM t_null WHERE y IS NOT NULL
-
-SELECT *
-FROM t_null
-WHERE isNotNull(y)
+``` sql
+SELECT * FROM t_null WHERE y IS NOT NULL
+```
 
+``` text
 ┌─x─┬─y─┐
 │ 2 │ 3 │
 └───┴───┘
-
-1 rows in set. Elapsed: 0.002 sec.
 ```
diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md
index ea2087fde5e..bc017ccc3c7 100644
--- a/docs/zh/sql-reference/table-functions/format.md
+++ b/docs/zh/sql-reference/table-functions/format.md
@@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext
 
 **Query:**
 ``` sql
-:) select * from format(JSONEachRow,
+SELECT * FROM format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}
@@ -49,8 +49,7 @@ $$)
 
 **Query:**
 ```sql
-
-:) desc format(JSONEachRow,
+DESC format(JSONEachRow,
 $$
 {"a": "Hello", "b": 111}
 {"a": "World", "b": 123}

From ed63f88cd12b7824147634b9dbb4f58c394e35f3 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 14:54:26 +0100
Subject: [PATCH 200/262] Improve README.md for clickhouse-com-content

---
 docs/tools/release.sh | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docs/tools/release.sh b/docs/tools/release.sh
index 67499631baa..c198f488822 100755
--- a/docs/tools/release.sh
+++ b/docs/tools/release.sh
@@ -25,7 +25,10 @@ then
     # Add files.
     cp -R "${BUILD_DIR}"/* .
     echo -n "${BASE_DOMAIN}" > CNAME
-    echo -n "" > README.md
+    cat > README.md << 'EOF'
+## This repo is the source for https://content.clickhouse.com
+It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job.
+EOF
     echo -n "" > ".nojekyll"
     cp "${BASE_DIR}/../../LICENSE" .
     git add ./*

From fe8f373aa6af3c710eca067192834e522e9e71d8 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 11 Jan 2023 15:20:18 +0100
Subject: [PATCH 201/262] Less stupid tests

---
 contrib/azure                              |  2 +-
 src/Disks/tests/gtest_azure_xml_reader.cpp | 10 +++----
 src/Disks/tests/gtest_disk.cpp             | 34 +++++-----------------
 src/Disks/tests/gtest_disk.h               |  8 -----
 src/Storages/tests/gtest_storage_log.cpp   | 10 ++-----
 5 files changed, 15 insertions(+), 49 deletions(-)

diff --git a/contrib/azure b/contrib/azure
index 000f7ee8fd2..0d2a6b84021 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 000f7ee8fd22fa69e5ddb8fd6fd36b12c7a1bc2f
+Subproject commit 0d2a6b840215fdfb1733287f3dc236d46ee0f268
diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp
index 3caf34f938a..b3c14e7b8bd 100644
--- a/src/Disks/tests/gtest_azure_xml_reader.cpp
+++ b/src/Disks/tests/gtest_azure_xml_reader.cpp
@@ -16,12 +16,10 @@ TEST(AzureXMLWrapper, TestLeak)
 {
     std::string str = "<hello>world</hello>";
 
-    {
-        Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
-        reader.Read();
-        Azure::Storage::_internal::XmlReader reader2(std::move(reader));
-        Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
-    }
+    Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
+    reader.Read();
+    Azure::Storage::_internal::XmlReader reader2(std::move(reader));
+    Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
 }
 
 #endif
diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp
index 8a24873c5ed..2b9db7e5ea2 100644
--- a/src/Disks/tests/gtest_disk.cpp
+++ b/src/Disks/tests/gtest_disk.cpp
@@ -7,49 +7,29 @@
 namespace fs = std::filesystem;
 
 
-template <typename T>
-DB::DiskPtr createDisk();
-
-
-template <>
-DB::DiskPtr createDisk<DB::DiskLocal>()
+DB::DiskPtr createDisk()
 {
     fs::create_directory("tmp/");
     return std::make_shared<DB::DiskLocal>("local_disk", "tmp/", 0);
 }
 
-
-template <typename T>
 void destroyDisk(DB::DiskPtr & disk)
-{
-    disk.reset();
-}
-
-
-template <>
-void destroyDisk<DB::DiskLocal>(DB::DiskPtr & disk)
 {
     disk.reset();
     fs::remove_all("tmp/");
 }
 
-
-template <typename T>
 class DiskTest : public testing::Test
 {
 public:
-    void SetUp() override { disk = createDisk<T>(); }
-    void TearDown() override { destroyDisk<T>(disk); }
+    void SetUp() override { disk = createDisk(); }
+    void TearDown() override { destroyDisk(disk); }
 
     DB::DiskPtr disk;
 };
 
 
-using DiskImplementations = testing::Types<DB::DiskLocal>;
-TYPED_TEST_SUITE(DiskTest, DiskImplementations);
-
-
-TYPED_TEST(DiskTest, createDirectories)
+TEST_F(DiskTest, createDirectories)
 {
     this->disk->createDirectories("test_dir1/");
     EXPECT_TRUE(this->disk->isDirectory("test_dir1/"));
@@ -59,7 +39,7 @@ TYPED_TEST(DiskTest, createDirectories)
 }
 
 
-TYPED_TEST(DiskTest, writeFile)
+TEST_F(DiskTest, writeFile)
 {
     {
         std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file");
@@ -77,7 +57,7 @@ TYPED_TEST(DiskTest, writeFile)
 }
 
 
-TYPED_TEST(DiskTest, readFile)
+TEST_F(DiskTest, readFile)
 {
     {
         std::unique_ptr<DB::WriteBuffer> out = this->disk->writeFile("test_file");
@@ -112,7 +92,7 @@ TYPED_TEST(DiskTest, readFile)
 }
 
 
-TYPED_TEST(DiskTest, iterateDirectory)
+TEST_F(DiskTest, iterateDirectory)
 {
     this->disk->createDirectories("test_dir/nested_dir/");
 
diff --git a/src/Disks/tests/gtest_disk.h b/src/Disks/tests/gtest_disk.h
index 07a1269bb2e..3f0e84f3961 100644
--- a/src/Disks/tests/gtest_disk.h
+++ b/src/Disks/tests/gtest_disk.h
@@ -3,14 +3,6 @@
 #include <Disks/DiskLocal.h>
 #include <Disks/IDisk.h>
 
-template <typename T>
 DB::DiskPtr createDisk();
 
-template <>
-DB::DiskPtr createDisk<DB::DiskLocal>();
-
-template <typename T>
 void destroyDisk(DB::DiskPtr & disk);
-
-template <>
-void destroyDisk<DB::DiskLocal>(DB::DiskPtr & disk);
diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp
index c9613f1512d..b63de6a66ef 100644
--- a/src/Storages/tests/gtest_storage_log.cpp
+++ b/src/Storages/tests/gtest_storage_log.cpp
@@ -39,21 +39,20 @@ DB::StoragePtr createStorage(DB::DiskPtr & disk)
     return table;
 }
 
-template <typename T>
 class StorageLogTest : public testing::Test
 {
 public:
 
     void SetUp() override
     {
-        disk = createDisk<T>();
+        disk = createDisk();
         table = createStorage(disk);
     }
 
     void TearDown() override
     {
         table->flushAndShutdown();
-        destroyDisk<T>(disk);
+        destroyDisk(disk);
     }
 
     const DB::DiskPtr & getDisk() { return disk; }
@@ -65,9 +64,6 @@ private:
 };
 
 
-using DiskImplementations = testing::Types<DB::DiskLocal>;
-TYPED_TEST_SUITE(StorageLogTest, DiskImplementations);
-
 // Returns data written to table in Values format.
 std::string writeData(int rows, DB::StoragePtr & table, const DB::ContextPtr context)
 {
@@ -153,7 +149,7 @@ std::string readData(DB::StoragePtr & table, const DB::ContextPtr context)
     return out_buf.str();
 }
 
-TYPED_TEST(StorageLogTest, testReadWrite)
+TEST_F(StorageLogTest, testReadWrite)
 {
     using namespace DB;
     const auto & context_holder = getContext();

From e581a56ed0d8620fc1266e7e2487a2150435f47d Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 11 Jan 2023 15:30:48 +0100
Subject: [PATCH 202/262] Better

---
 contrib/azure                              | 2 +-
 src/Disks/tests/gtest_azure_xml_reader.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/contrib/azure b/contrib/azure
index 0d2a6b84021..3b857189b40 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 0d2a6b840215fdfb1733287f3dc236d46ee0f268
+Subproject commit 3b857189b401e68f34c3cd164f5b270887c76b86
diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp
index b3c14e7b8bd..8cb352ad2f7 100644
--- a/src/Disks/tests/gtest_azure_xml_reader.cpp
+++ b/src/Disks/tests/gtest_azure_xml_reader.cpp
@@ -17,9 +17,9 @@ TEST(AzureXMLWrapper, TestLeak)
     std::string str = "<hello>world</hello>";
 
     Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length());
-    reader.Read();
     Azure::Storage::_internal::XmlReader reader2(std::move(reader));
     Azure::Storage::_internal::XmlReader reader3 = std::move(reader2);
+    reader3.Read();
 }
 
 #endif

From b3fc6a970625f26d60b1199caf45a4ff25ceab9a Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 15:43:57 +0100
Subject: [PATCH 203/262] Clean out unused code for old site/docs building
 system

---
 docs/tools/build.py          | 120 ++++++++++++++++++-----------
 docs/tools/make_links.sh     |  22 ------
 docs/tools/mdx_clickhouse.py | 142 -----------------------------------
 docs/tools/redirects.py      |  53 -------------
 docs/tools/requirements.txt  |  29 -------
 docs/tools/util.py           | 136 ---------------------------------
 docs/tools/website.py        |  63 ----------------
 7 files changed, 77 insertions(+), 488 deletions(-)
 delete mode 100755 docs/tools/make_links.sh
 delete mode 100755 docs/tools/mdx_clickhouse.py
 delete mode 100644 docs/tools/redirects.py
 delete mode 100644 docs/tools/util.py
 delete mode 100644 docs/tools/website.py

diff --git a/docs/tools/build.py b/docs/tools/build.py
index 7f78af5e203..5653a9b949d 100755
--- a/docs/tools/build.py
+++ b/docs/tools/build.py
@@ -1,45 +1,96 @@
 #!/usr/bin/env python3
 
+from pathlib import Path
 import argparse
 import logging
-import os
 import shutil
-import subprocess
 import sys
 
 import livereload
 
-import redirects
-import website
+
+def write_redirect_html(output_path: Path, to_url: str) -> None:
+    output_dir = output_path.parent
+    output_dir.mkdir(parents=True, exist_ok=True)
+    output_path.write_text(
+        f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
+<!DOCTYPE HTML>
+<html lang="en-US">
+    <head>
+        <meta charset="UTF-8">
+        <meta http-equiv="refresh" content="0; url={to_url}">
+        <script type="text/javascript">
+            window.location.href = "{to_url}";
+        </script>
+        <title>Page Redirection</title>
+    </head>
+    <body>
+        If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
+    </body>
+</html>"""
+    )
 
 
-def build(args):
-    if os.path.exists(args.output_dir):
+def build_static_redirects(output_dir: Path):
+    for static_redirect in [
+        ("benchmark.html", "/benchmark/dbms/"),
+        ("benchmark_hardware.html", "/benchmark/hardware/"),
+        (
+            "tutorial.html",
+            "/docs/en/getting_started/tutorial/",
+        ),
+        (
+            "reference_en.html",
+            "/docs/en/single/",
+        ),
+        (
+            "reference_ru.html",
+            "/docs/ru/single/",
+        ),
+        (
+            "docs/index.html",
+            "/docs/en/",
+        ),
+    ]:
+        write_redirect_html(output_dir / static_redirect[0], static_redirect[1])
+
+
+def build(root_dir: Path, output_dir: Path):
+    if output_dir.exists():
         shutil.rmtree(args.output_dir)
 
-    if not args.skip_website:
-        website.build_website(args)
-        redirects.build_static_redirects(args)
+    (output_dir / "data").mkdir(parents=True)
+
+    logging.info("Building website")
+
+    # This file can be requested to check for available ClickHouse releases.
+    shutil.copy2(
+        root_dir / "utils" / "list-versions" / "version_date.tsv",
+        output_dir / "data" / "version_date.tsv",
+    )
+
+    # This file can be requested to install ClickHouse.
+    shutil.copy2(
+        root_dir / "docs" / "_includes" / "install" / "universal.sh",
+        output_dir / "data" / "install.sh",
+    )
+
+    build_static_redirects(output_dir)
 
 
 if __name__ == "__main__":
-    os.chdir(os.path.join(os.path.dirname(__file__), ".."))
+    root_dir = Path(__file__).parent.parent.parent
+    docs_dir = root_dir / "docs"
 
-    # A root path to ClickHouse source code.
-    src_dir = ".."
-
-    website_dir = os.path.join(src_dir, "website")
-
-    arg_parser = argparse.ArgumentParser()
-    arg_parser.add_argument("--lang", default="en,ru,zh,ja")
-    arg_parser.add_argument("--theme-dir", default=website_dir)
-    arg_parser.add_argument("--website-dir", default=website_dir)
-    arg_parser.add_argument("--src-dir", default=src_dir)
-    arg_parser.add_argument("--output-dir", default="build")
-    arg_parser.add_argument("--nav-limit", type=int, default="0")
-    arg_parser.add_argument("--skip-multi-page", action="store_true")
-    arg_parser.add_argument("--skip-website", action="store_true")
-    arg_parser.add_argument("--htmlproofer", action="store_true")
+    arg_parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    arg_parser.add_argument(
+        "--output-dir",
+        type=Path,
+        default=docs_dir / "build",
+        help="path to the output dir",
+    )
     arg_parser.add_argument("--livereload", type=int, default="0")
     arg_parser.add_argument("--verbose", action="store_true")
 
@@ -49,26 +100,9 @@ if __name__ == "__main__":
         level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr
     )
 
-    logging.getLogger("MARKDOWN").setLevel(logging.INFO)
-
-    args.rev = (
-        subprocess.check_output("git rev-parse HEAD", shell=True)
-        .decode("utf-8")
-        .strip()
-    )
-    args.rev_short = (
-        subprocess.check_output("git rev-parse --short HEAD", shell=True)
-        .decode("utf-8")
-        .strip()
-    )
-    args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}"
-
-    build(args)
+    build(root_dir, args.output_dir)
 
     if args.livereload:
-        new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")]
-        new_args = sys.executable + " " + " ".join(new_args)
-
         server = livereload.Server()
         server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload)
         sys.exit(0)
diff --git a/docs/tools/make_links.sh b/docs/tools/make_links.sh
deleted file mode 100755
index 801086178bf..00000000000
--- a/docs/tools/make_links.sh
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash
-
-# Fixes missing documentation in other languages
-# by putting relative symbolic links to the original doc file.
-
-BASE_DIR=$(dirname $(readlink -f $0))
-
-function do_make_links()
-{
-    set -x
-    langs=(en zh ru ja)
-    src_file="$1"
-    for lang in "${langs[@]}"
-    do
-        dst_file="${src_file/\/en\///${lang}/}"
-        mkdir -p $(dirname "${dst_file}")
-        ln -sr "${src_file}" "${dst_file}" 2>/dev/null
-    done
-}
-
-export -f do_make_links
-find "${BASE_DIR}/../en" -iname '*.md' -exec /bin/bash -c 'do_make_links "{}"' \;
diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py
deleted file mode 100755
index bce9f215759..00000000000
--- a/docs/tools/mdx_clickhouse.py
+++ /dev/null
@@ -1,142 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-
-import datetime
-import os
-import subprocess
-
-import jinja2
-import markdown.inlinepatterns
-import markdown.extensions
-import markdown.util
-import macros.plugin
-
-import slugify as slugify_impl
-
-
-def slugify(value, separator):
-    return slugify_impl.slugify(
-        value, separator=separator, word_boundary=True, save_order=True
-    )
-
-
-MARKDOWN_EXTENSIONS = [
-    "mdx_clickhouse",
-    "admonition",
-    "attr_list",
-    "def_list",
-    "codehilite",
-    "nl2br",
-    "sane_lists",
-    "pymdownx.details",
-    "pymdownx.magiclink",
-    "pymdownx.superfences",
-    "extra",
-    {"toc": {"permalink": True, "slugify": slugify}},
-]
-
-
-class ClickHouseLinkMixin(object):
-    def handleMatch(self, m, data):
-        try:
-            el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data)
-        except IndexError:
-            return
-
-        if el is not None:
-            href = el.get("href") or ""
-            is_external = href.startswith("http:") or href.startswith("https:")
-            if is_external:
-                if not href.startswith("https://clickhouse.com"):
-                    el.set("rel", "external nofollow noreferrer")
-        return el, start, end
-
-
-class ClickHouseAutolinkPattern(
-    ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor
-):
-    pass
-
-
-class ClickHouseLinkPattern(
-    ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor
-):
-    pass
-
-
-class ClickHousePreprocessor(markdown.util.Processor):
-    def run(self, lines):
-        for line in lines:
-            if "<!--hide-->" not in line:
-                yield line
-
-
-class ClickHouseMarkdown(markdown.extensions.Extension):
-    def extendMarkdown(self, md, md_globals):
-        md.preprocessors["clickhouse"] = ClickHousePreprocessor()
-        md.inlinePatterns["link"] = ClickHouseLinkPattern(
-            markdown.inlinepatterns.LINK_RE, md
-        )
-        md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern(
-            markdown.inlinepatterns.AUTOLINK_RE, md
-        )
-
-
-def makeExtension(**kwargs):
-    return ClickHouseMarkdown(**kwargs)
-
-
-def get_translations(dirname, lang):
-    import babel.support
-
-    return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"])
-
-
-class PatchedMacrosPlugin(macros.plugin.MacrosPlugin):
-    disabled = False
-
-    def on_config(self, config):
-        super(PatchedMacrosPlugin, self).on_config(config)
-        self.env.comment_start_string = "{##"
-        self.env.comment_end_string = "##}"
-        self.env.loader = jinja2.FileSystemLoader(
-            [
-                os.path.join(config.data["site_dir"]),
-                os.path.join(config.data["extra"]["includes_dir"]),
-            ]
-        )
-
-    def on_env(self, env, config, files):
-        import util
-
-        env.add_extension("jinja2.ext.i18n")
-        dirname = os.path.join(config.data["theme"].dirs[0], "locale")
-        lang = config.data["theme"]["language"]
-        env.install_gettext_translations(get_translations(dirname, lang), newstyle=True)
-        util.init_jinja2_filters(env)
-        return env
-
-    def render(self, markdown):
-        if not self.disabled:
-            return self.render_impl(markdown)
-        else:
-            return markdown
-
-    def on_page_markdown(self, markdown, page, config, files):
-        markdown = super(PatchedMacrosPlugin, self).on_page_markdown(
-            markdown, page, config, files
-        )
-
-        if os.path.islink(page.file.abs_src_path):
-            lang = config.data["theme"]["language"]
-            page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1)
-
-        return markdown
-
-    def render_impl(self, markdown):
-        md_template = self.env.from_string(markdown)
-        return md_template.render(**self.variables)
-
-
-macros.plugin.MacrosPlugin = PatchedMacrosPlugin
diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py
deleted file mode 100644
index 1b5490a040f..00000000000
--- a/docs/tools/redirects.py
+++ /dev/null
@@ -1,53 +0,0 @@
-import os
-
-
-def write_redirect_html(out_path, to_url):
-    out_dir = os.path.dirname(out_path)
-    try:
-        os.makedirs(out_dir)
-    except OSError:
-        pass
-    with open(out_path, "w") as f:
-        f.write(
-            f"""<!--[if IE 6]> Redirect: {to_url} <![endif]-->
-<!DOCTYPE HTML>
-<html lang="en-US">
-    <head>
-        <meta charset="UTF-8">
-        <meta http-equiv="refresh" content="0; url={to_url}">
-        <script type="text/javascript">
-            window.location.href = "{to_url}";
-        </script>
-        <title>Page Redirection</title>
-    </head>
-    <body>
-        If you are not redirected automatically, follow this <a href="{to_url}">link</a>.
-    </body>
-</html>"""
-        )
-
-
-def build_static_redirects(args):
-    for static_redirect in [
-        ("benchmark.html", "/benchmark/dbms/"),
-        ("benchmark_hardware.html", "/benchmark/hardware/"),
-        (
-            "tutorial.html",
-            "/docs/en/getting_started/tutorial/",
-        ),
-        (
-            "reference_en.html",
-            "/docs/en/single/",
-        ),
-        (
-            "reference_ru.html",
-            "/docs/ru/single/",
-        ),
-        (
-            "docs/index.html",
-            "/docs/en/",
-        ),
-    ]:
-        write_redirect_html(
-            os.path.join(args.output_dir, static_redirect[0]), static_redirect[1]
-        )
diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt
index afd6b1a889d..0e0f7c6d044 100644
--- a/docs/tools/requirements.txt
+++ b/docs/tools/requirements.txt
@@ -1,30 +1 @@
-Babel==2.9.1
-Jinja2==3.0.3
-Markdown==3.3.2
-MarkupSafe==2.1.1
-PyYAML==6.0
-Pygments>=2.12.0
-beautifulsoup4==4.9.1
-click==7.1.2
-ghp_import==2.1.0
-importlib_metadata==4.11.4
-jinja2-highlight==0.6.1
 livereload==2.6.3
-mergedeep==1.3.4
-mkdocs-macros-plugin==0.4.20
-mkdocs-macros-test==0.1.0
-mkdocs-material==8.2.15
-mkdocs==1.3.0
-mkdocs_material_extensions==1.0.3
-packaging==21.3
-pymdown_extensions==9.4
-pyparsing==3.0.9
-python-slugify==4.0.1
-python_dateutil==2.8.2
-pytz==2022.1
-six==1.15.0
-soupsieve==2.3.2
-termcolor==1.1.0
-text_unidecode==1.3
-tornado==6.1
-zipp==3.8.0
diff --git a/docs/tools/util.py b/docs/tools/util.py
deleted file mode 100644
index dc9fb640b47..00000000000
--- a/docs/tools/util.py
+++ /dev/null
@@ -1,136 +0,0 @@
-import collections
-import contextlib
-import datetime
-import multiprocessing
-import os
-import shutil
-import sys
-import socket
-import tempfile
-import threading
-
-import jinja2
-import yaml
-
-
-@contextlib.contextmanager
-def temp_dir():
-    path = tempfile.mkdtemp(dir=os.environ.get("TEMP"))
-    try:
-        yield path
-    finally:
-        shutil.rmtree(path)
-
-
-@contextlib.contextmanager
-def cd(new_cwd):
-    old_cwd = os.getcwd()
-    os.chdir(new_cwd)
-    try:
-        yield
-    finally:
-        os.chdir(old_cwd)
-
-
-def get_free_port():
-    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
-        s.bind(("", 0))
-        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
-        return s.getsockname()[1]
-
-
-def run_function_in_parallel(func, args_list, threads=False):
-    processes = []
-    exit_code = 0
-    for task in args_list:
-        cls = threading.Thread if threads else multiprocessing.Process
-        processes.append(cls(target=func, args=task))
-        processes[-1].start()
-    for process in processes:
-        process.join()
-        if not threads:
-            if process.exitcode and not exit_code:
-                exit_code = process.exitcode
-    if exit_code:
-        sys.exit(exit_code)
-
-
-def read_md_file(path):
-    in_meta = False
-    meta = {}
-    meta_text = []
-    content = []
-    if os.path.exists(path):
-        with open(path, "r") as f:
-            for line in f:
-                if line.startswith("---"):
-                    if in_meta:
-                        in_meta = False
-                        meta = yaml.full_load("".join(meta_text))
-                    else:
-                        in_meta = True
-                else:
-                    if in_meta:
-                        meta_text.append(line)
-                    else:
-                        content.append(line)
-    return meta, "".join(content)
-
-
-def write_md_file(path, meta, content):
-    dirname = os.path.dirname(path)
-    if not os.path.exists(dirname):
-        os.makedirs(dirname)
-
-    with open(path, "w") as f:
-        if meta:
-            print("---", file=f)
-            yaml.dump(meta, f)
-            print("---", file=f)
-            if not content.startswith("\n"):
-                print("", file=f)
-        f.write(content)
-
-
-def represent_ordereddict(dumper, data):
-    value = []
-    for item_key, item_value in data.items():
-        node_key = dumper.represent_data(item_key)
-        node_value = dumper.represent_data(item_value)
-
-        value.append((node_key, node_value))
-
-    return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value)
-
-
-yaml.add_representer(collections.OrderedDict, represent_ordereddict)
-
-
-def init_jinja2_filters(env):
-    import website
-
-    chunk_size = 10240
-    env.filters["chunks"] = lambda line: [
-        line[i : i + chunk_size] for i in range(0, len(line), chunk_size)
-    ]
-    env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime(
-        d, "%Y-%m-%d"
-    ).strftime("%a, %d %b %Y %H:%M:%S GMT")
-
-
-def init_jinja2_env(args):
-    import mdx_clickhouse
-
-    env = jinja2.Environment(
-        loader=jinja2.FileSystemLoader(
-            [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")]
-        ),
-        extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"],
-    )
-    env.extend(jinja2_highlight_cssclass="syntax p-3 my-3")
-    translations_dir = os.path.join(args.website_dir, "locale")
-    env.install_gettext_translations(
-        mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True
-    )
-    init_jinja2_filters(env)
-    return env
diff --git a/docs/tools/website.py b/docs/tools/website.py
deleted file mode 100644
index 2a34458fd29..00000000000
--- a/docs/tools/website.py
+++ /dev/null
@@ -1,63 +0,0 @@
-import hashlib
-import json
-import logging
-import os
-import shutil
-import subprocess
-
-import util
-
-
-def build_website(args):
-    logging.info("Building website")
-    env = util.init_jinja2_env(args)
-
-    shutil.copytree(
-        args.website_dir,
-        args.output_dir,
-        ignore=shutil.ignore_patterns(
-            "*.md",
-            "*.sh",
-            "*.css",
-            "*.json",
-            "js/*.js",
-            "build",
-            "docs",
-            "public",
-            "node_modules",
-            "src",
-            "templates",
-            "locale",
-            ".gitkeep",
-        ),
-    )
-
-    # This file can be requested to check for available ClickHouse releases.
-    shutil.copy2(
-        os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"),
-        os.path.join(args.output_dir, "data", "version_date.tsv"),
-    )
-
-    # This file can be requested to install ClickHouse.
-    shutil.copy2(
-        os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"),
-        os.path.join(args.output_dir, "data", "install.sh"),
-    )
-
-    for root, _, filenames in os.walk(args.output_dir):
-        for filename in filenames:
-            if filename == "main.html":
-                continue
-
-            path = os.path.join(root, filename)
-            if not filename.endswith(".html"):
-                continue
-            logging.info("Processing %s", path)
-            with open(path, "rb") as f:
-                content = f.read().decode("utf-8")
-
-            template = env.from_string(content)
-            content = template.render(args.__dict__)
-
-            with open(path, "wb") as f:
-                f.write(content.encode("utf-8"))

From d34a755cfb14fed23c00eb86ad22c4102682bec0 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 09:59:23 -0500
Subject: [PATCH 204/262] switch text to response for query blocks

---
 docs/zh/operations/system-tables/numbers.md   |  2 +-
 docs/zh/operations/system-tables/one.md       |  2 +-
 docs/zh/sql-reference/data-types/array.md     |  8 +++----
 docs/zh/sql-reference/data-types/enum.md      | 10 ++++----
 .../data-types/special-data-types/nothing.md  |  2 +-
 docs/zh/sql-reference/data-types/tuple.md     |  4 ++--
 .../functions/functions-for-nulls.md          | 12 +++++-----
 .../functions/other-functions.md              | 23 ++++++++++---------
 .../sql-reference/functions/uuid-functions.md |  9 ++++----
 docs/zh/sql-reference/operators/index.md      |  8 +++----
 .../sql-reference/table-functions/format.md   |  4 ++--
 11 files changed, 43 insertions(+), 41 deletions(-)

diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md
index 8cb92351ae7..801c43f8e91 100644
--- a/docs/zh/operations/system-tables/numbers.md
+++ b/docs/zh/operations/system-tables/numbers.md
@@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/numbers
 SELECT * FROM system.numbers LIMIT 10;
 ```
 
-```text
+```response
 ┌─number─┐
 │      0 │
 │      1 │
diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md
index 2e44a9bd89a..29dd25c5282 100644
--- a/docs/zh/operations/system-tables/one.md
+++ b/docs/zh/operations/system-tables/one.md
@@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/one
 SELECT * FROM system.one LIMIT 10;
 ```
 
-```text
+```response
 ┌─dummy─┐
 │     0 │
 └───────┘
diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md
index 0c38eb86004..46c40b889ad 100644
--- a/docs/zh/sql-reference/data-types/array.md
+++ b/docs/zh/sql-reference/data-types/array.md
@@ -23,7 +23,7 @@ slug: /zh/sql-reference/data-types/array
 SELECT array(1, 2) AS x, toTypeName(x)
 ```
 
-```text
+```response
 ┌─x─────┬─toTypeName(array(1, 2))─┐
 │ [1,2] │ Array(UInt8)            │
 └───────┴─────────────────────────┘
@@ -33,7 +33,7 @@ SELECT array(1, 2) AS x, toTypeName(x)
 SELECT [1, 2] AS x, toTypeName(x)
 ```
 
-``` text
+```response
 ┌─x─────┬─toTypeName([1, 2])─┐
 │ [1,2] │ Array(UInt8)       │
 └───────┴────────────────────┘
@@ -50,7 +50,7 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素
 SELECT array(1, 2, NULL) AS x, toTypeName(x)
 ```
 
-```text
+```response
 ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐
 │ [1,2,NULL] │ Array(Nullable(UInt8))        │
 └────────────┴───────────────────────────────┘
@@ -62,7 +62,7 @@ SELECT array(1, 2, NULL) AS x, toTypeName(x)
 SELECT array(1, 'a')
 ```
 
-```text
+```response
 Received exception from server (version 1.1.54388):
 Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not.
 ```
diff --git a/docs/zh/sql-reference/data-types/enum.md b/docs/zh/sql-reference/data-types/enum.md
index 9832df3da02..496a4c5a78c 100644
--- a/docs/zh/sql-reference/data-types/enum.md
+++ b/docs/zh/sql-reference/data-types/enum.md
@@ -24,7 +24,7 @@ slug: /zh/sql-reference/data-types/enum
 INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello')
 ```
 
-```text
+```response
 Ok.
 
 3 rows in set. Elapsed: 0.002 sec.
@@ -34,7 +34,7 @@ Ok.
 INSERT INTO t_enum VALUES('a')
 ```
 
-```text
+```response
 Exception on client:
 Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2)
 ```
@@ -45,7 +45,7 @@ Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world'
 SELECT * FROM t_enum
 ```
 
-```text
+```response
 ┌─x─────┐
 │ hello │
 │ world │
@@ -59,7 +59,7 @@ SELECT * FROM t_enum
 SELECT CAST(x, 'Int8') FROM t_enum
 ```
 
-```text
+```response
 ┌─CAST(x, 'Int8')─┐
 │               1 │
 │               2 │
@@ -73,7 +73,7 @@ SELECT CAST(x, 'Int8') FROM t_enum
 SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))
 ```
 
-```text
+```response
 ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐
 │ Enum8('a' = 1, 'b' = 2)                              │
 └──────────────────────────────────────────────────────┘
diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
index e123622edf6..19a78cb540e 100644
--- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md
+++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md
@@ -13,7 +13,7 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing
 SELECT toTypeName(array())
 ```
 
-```text
+```response
 ┌─toTypeName(array())─┐
 │ Array(Nothing)      │
 └─────────────────────┘
diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md
index 905a872da24..004c80ff916 100644
--- a/docs/zh/sql-reference/data-types/tuple.md
+++ b/docs/zh/sql-reference/data-types/tuple.md
@@ -21,7 +21,7 @@ slug: /zh/sql-reference/data-types/tuple
 SELECT tuple(1,'a') AS x, toTypeName(x)
 ```
 
-```text
+```response
 ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐
 │ (1,'a') │ Tuple(UInt8, String)      │
 └─────────┴───────────────────────────┘
@@ -37,7 +37,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x)
 SELECT tuple(1, NULL) AS x, toTypeName(x)
 ```
 
-```text
+```response
 ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐
 │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │
 └──────────┴─────────────────────────────────┘
diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md
index 9ecf39e56c5..4dd30970923 100644
--- a/docs/zh/sql-reference/functions/functions-for-nulls.md
+++ b/docs/zh/sql-reference/functions/functions-for-nulls.md
@@ -22,7 +22,7 @@ slug: /zh/sql-reference/functions/functions-for-nulls
 
 存在以下内容的表
 
-```text
+```response
 ┌─x─┬────y─┐
 │ 1 │ ᴺᵁᴸᴸ │
 │ 2 │    3 │
@@ -35,7 +35,7 @@ slug: /zh/sql-reference/functions/functions-for-nulls
 SELECT x FROM t_null WHERE isNull(y)
 ```
 
-```text
+```response
 ┌─x─┐
 │ 1 │
 └───┘
@@ -60,7 +60,7 @@ SELECT x FROM t_null WHERE isNull(y)
 
 存在以下内容的表
 
-```text
+```response
 ┌─x─┬────y─┐
 │ 1 │ ᴺᵁᴸᴸ │
 │ 2 │    3 │
@@ -73,7 +73,7 @@ SELECT x FROM t_null WHERE isNull(y)
 SELECT x FROM t_null WHERE isNotNull(y)
 ```
 
-```text
+```response
 ┌─x─┐
 │ 2 │
 └───┘
@@ -98,7 +98,7 @@ SELECT x FROM t_null WHERE isNotNull(y)
 
 考虑可以指定多种联系客户的方式的联系人列表。
 
-```text
+```response
 ┌─name─────┬─mail─┬─phone─────┬──icq─┐
 │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │  123 │
 │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ      │ ᴺᵁᴸᴸ │
@@ -113,7 +113,7 @@ SELECT x FROM t_null WHERE isNotNull(y)
 SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook
 ```
 
-```text
+```response
 ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐
 │ client 1 │ 123-45-67                                            │
 │ client 2 │ ᴺᵁᴸᴸ                                                 │
diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md
index a5c67e94921..2eeaad63694 100644
--- a/docs/zh/sql-reference/functions/other-functions.md
+++ b/docs/zh/sql-reference/functions/other-functions.md
@@ -33,7 +33,7 @@ slug: /zh/sql-reference/functions/other-functions
 SELECT 'some/long/path/to/file' AS a, basename(a)
 ```
 
-``` text
+```response
 ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
 │ some\long\path\to\file │ file                                   │
 └────────────────────────┴────────────────────────────────────────┘
@@ -43,7 +43,7 @@ SELECT 'some/long/path/to/file' AS a, basename(a)
 SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
 ```
 
-``` text
+```response
 ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐
 │ some\long\path\to\file │ file                                   │
 └────────────────────────┴────────────────────────────────────────┘
@@ -53,7 +53,7 @@ SELECT 'some\\long\\path\\to\\file' AS a, basename(a)
 SELECT 'some-file-name' AS a, basename(a)
 ```
 
-``` text
+```response
 ┌─a──────────────┬─basename('some-file-name')─┐
 │ some-file-name │ some-file-name             │
 └────────────────┴────────────────────────────┘
@@ -402,7 +402,7 @@ FROM
 SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
 ```
 
-```text
+```response
 ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
 │ DateTime                                            │
 └─────────────────────────────────────────────────────┘
@@ -412,7 +412,7 @@ SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
 SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
 ```
 
-```text
+```response
 ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐
 │ Const(UInt32)                                             │
 └───────────────────────────────────────────────────────────┘
@@ -466,7 +466,7 @@ SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))
 SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
 ```
 
-```text
+```response
 ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐
 │                                           0 │
 └─────────────────────────────────────────────┘
@@ -476,7 +476,7 @@ SELECT defaultValueOfArgumentType(CAST(1, 'Int8'))
 SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
 ```
 
-```text
+```response
 ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐
 │                                                  ᴺᵁᴸᴸ │
 └───────────────────────────────────────────────────────┘
@@ -497,7 +497,8 @@ SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))
 
 ```
 SELECT count() FROM ontime
-
+```
+```response
 ┌─count()─┐
 │ 4276457 │
 └─────────┘
@@ -511,7 +512,7 @@ SELECT count() FROM ontime
 SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k
 ```
 
-```text
+```response
 SELECT
     FlightDate AS k,
     count()
@@ -537,7 +538,7 @@ ORDER BY k ASC
 SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k
 ```
 
-```text
+```response
 SELECT
     FlightDate AS k,
     count()
@@ -561,7 +562,7 @@ ORDER BY k ASC
 SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k
 ```
 
-```text
+```response
 SELECT
     FlightDate AS k,
     count()
diff --git a/docs/zh/sql-reference/functions/uuid-functions.md b/docs/zh/sql-reference/functions/uuid-functions.md
index e635fd4fba8..57b75a6c889 100644
--- a/docs/zh/sql-reference/functions/uuid-functions.md
+++ b/docs/zh/sql-reference/functions/uuid-functions.md
@@ -27,7 +27,7 @@ INSERT INTO t_uuid SELECT generateUUIDv4()
 SELECT * FROM t_uuid
 ```
 
-```text
+```response
 ┌────────────────────────────────────x─┐
 │ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
 └──────────────────────────────────────┘
@@ -51,7 +51,7 @@ UUID类型的值
 SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
 ```
 
-```text
+```response
 ┌─────────────────────────────────uuid─┐
 │ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
 └──────────────────────────────────────┘
@@ -77,7 +77,7 @@ SELECT
     UUIDStringToNum(uuid) AS bytes
 ```
 
-```text
+```response
 ┌─uuid─────────────────────────────────┬─bytes────────────┐
 │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
 └──────────────────────────────────────┴──────────────────┘
@@ -101,7 +101,8 @@ UUIDNumToString(FixedString(16))
 SELECT
     'a/<@];!~p{jTj={)' AS bytes,
     UUIDNumToString(toFixedString(bytes, 16)) AS uuid
-
+```
+```response
 ┌─bytes────────────┬─uuid─────────────────────────────────┐
 │ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
 └──────────────────┴──────────────────────────────────────┘
diff --git a/docs/zh/sql-reference/operators/index.md b/docs/zh/sql-reference/operators/index.md
index 8544f9f5a91..353386903c4 100644
--- a/docs/zh/sql-reference/operators/index.md
+++ b/docs/zh/sql-reference/operators/index.md
@@ -143,7 +143,7 @@ SELECT
 FROM test.Orders;
 ```
 
-``` text
+``` response
 ┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐
 │      2008 │         10 │       11 │        13 │          23 │          44 │
 └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘
@@ -161,7 +161,7 @@ FROM test.Orders;
 SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR
 ```
 
-``` text
+``` response
 ┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐
 │ 2019-10-23 11:16:28 │                                    2019-10-27 14:16:28 │
 └─────────────────────┴────────────────────────────────────────────────────────┘
@@ -230,7 +230,7 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。
 SELECT x+100 FROM t_null WHERE y IS NULL
 ```
 
-``` text
+``` response
 ┌─plus(x, 100)─┐
 │          101 │
 └──────────────┘
@@ -249,7 +249,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL
 SELECT * FROM t_null WHERE y IS NOT NULL
 ```
 
-``` text
+``` response
 ┌─x─┬─y─┐
 │ 2 │ 3 │
 └───┴───┘
diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md
index bc017ccc3c7..f84d047e599 100644
--- a/docs/zh/sql-reference/table-functions/format.md
+++ b/docs/zh/sql-reference/table-functions/format.md
@@ -38,7 +38,7 @@ $$)
 
 **Result:**
 
-```text
+```response
 ┌───b─┬─a─────┐
 │ 111 │ Hello │
 │ 123 │ World │
@@ -60,7 +60,7 @@ $$)
 
 **Result:**
 
-```text
+```response
 ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ b    │ Nullable(Float64) │              │                    │         │                  │                │
 │ a    │ Nullable(String)  │              │                    │         │                  │                │

From 7168c217b0bd26ff47fd13a8f0e8f8fcc01b2839 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 10:08:11 -0500
Subject: [PATCH 205/262] switch text to response for query blocks

---
 docs/en/interfaces/formats.md                 | 28 +++++++++----------
 docs/en/operations/system-tables/disks.md     |  2 +-
 .../system-tables/merge_tree_settings.md      |  2 +-
 docs/en/operations/system-tables/numbers.md   |  2 +-
 .../en/operations/system-tables/numbers_mt.md |  2 +-
 docs/en/operations/system-tables/one.md       |  2 +-
 docs/en/operations/system-tables/processes.md |  2 +-
 .../sql-reference/table-functions/format.md   |  4 +--
 .../sql-reference/table-functions/format.md   |  4 +--
 9 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md
index 75ef0ac3cc0..e28c486afca 100644
--- a/docs/en/interfaces/formats.md
+++ b/docs/en/interfaces/formats.md
@@ -101,7 +101,7 @@ The `TabSeparated` format supports outputting total values (when using WITH TOTA
 SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT TabSeparated
 ```
 
-``` text
+``` response
 2014-03-17      1406958
 2014-03-18      1383658
 2014-03-19      1405797
@@ -177,7 +177,7 @@ INSERT INTO nestedt Values ( 1, [1], ['a'])
 SELECT * FROM nestedt FORMAT TSV
 ```
 
-``` text
+``` response
 1  [1]    ['a']
 ```
 
@@ -761,7 +761,7 @@ SELECT * FROM json_as_string;
 
 Result:
 
-``` text
+``` response
 ┌─json──────────────────────────────┐
 │ {"foo":{"bar":{"x":"y"},"baz":1}} │
 │ {}                                │
@@ -782,7 +782,7 @@ SELECT * FROM json_square_brackets;
 
 Result:
 
-```text
+```response
 ┌─field──────────────────────┐
 │ {"id": 1, "name": "name1"} │
 │ {"id": 2, "name": "name2"} │
@@ -1118,7 +1118,7 @@ When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHo
 
 Consider the `UserActivity` table as an example:
 
-``` text
+``` response
 ┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐
 │ 4324182021466249494 │         5 │      146 │   -1 │
 │ 4324182021466249494 │         6 │      185 │    1 │
@@ -1127,7 +1127,7 @@ Consider the `UserActivity` table as an example:
 
 The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns:
 
-``` text
+``` response
 {"UserID":"4324182021466249494","PageViews":5,"Duration":146,"Sign":-1}
 {"UserID":"4324182021466249494","PageViews":6,"Duration":185,"Sign":1}
 ```
@@ -1171,7 +1171,7 @@ Without this setting, ClickHouse throws an exception.
 SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json'
 ```
 
-``` text
+``` response
 ┌─name────────────────────────────┬─value─┐
 │ input_format_import_nested_json │ 0     │
 └─────────────────────────────────┴───────┘
@@ -1181,7 +1181,7 @@ SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested
 INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}}
 ```
 
-``` text
+``` response
 Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1)
 ```
 
@@ -1191,7 +1191,7 @@ INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"],
 SELECT * FROM json_each_row_nested
 ```
 
-``` text
+``` response
 ┌─n.s───────────┬─n.i────┐
 │ ['abc','def'] │ [1,23] │
 └───────────────┴────────┘
@@ -1300,7 +1300,7 @@ Example (shown for the [PrettyCompact](#prettycompact) format):
 SELECT * FROM t_null
 ```
 
-``` text
+``` response
 ┌─x─┬────y─┐
 │ 1 │ ᴺᵁᴸᴸ │
 └───┴──────┘
@@ -1312,7 +1312,7 @@ Rows are not escaped in Pretty\* formats. Example is shown for the [PrettyCompac
 SELECT 'String with \'quotes\' and \t character' AS Escaping_test
 ```
 
-``` text
+``` response
 ┌─Escaping_test────────────────────────┐
 │ String with 'quotes' and      character │
 └──────────────────────────────────────┘
@@ -1327,7 +1327,7 @@ The Pretty format supports outputting total values (when using WITH TOTALS) and
 SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT PrettyCompact
 ```
 
-``` text
+``` response
 ┌──EventDate─┬───────c─┐
 │ 2014-03-17 │ 1406958 │
 │ 2014-03-18 │ 1383658 │
@@ -1488,7 +1488,7 @@ Example:
 SELECT * FROM t_null FORMAT Vertical
 ```
 
-``` text
+``` response
 Row 1:
 ──────
 x: 1
@@ -1501,7 +1501,7 @@ Rows are not escaped in Vertical format:
 SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical
 ```
 
-``` text
+``` response
 Row 1:
 ──────
 test: string with 'quotes' and      with some special
diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md
index d492e42c2ec..a079f3338d2 100644
--- a/docs/en/operations/system-tables/disks.md
+++ b/docs/en/operations/system-tables/disks.md
@@ -20,7 +20,7 @@ Columns:
 SELECT * FROM system.disks;
 ```
 
-```text
+```response
 ┌─name────┬─path─────────────────┬───free_space─┬──total_space─┬─keep_free_space─┐
 │ default │ /var/lib/clickhouse/ │ 276392587264 │ 490652508160 │               0 │
 └─────────┴──────────────────────┴──────────────┴──────────────┴─────────────────┘
diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md
index a05d4abccda..672c79e335b 100644
--- a/docs/en/operations/system-tables/merge_tree_settings.md
+++ b/docs/en/operations/system-tables/merge_tree_settings.md
@@ -18,7 +18,7 @@ Columns:
 SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 
-```text
+```response
 Row 1:
 ──────
 name:        index_granularity
diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md
index 0dc001ebb6f..68efeb2ee38 100644
--- a/docs/en/operations/system-tables/numbers.md
+++ b/docs/en/operations/system-tables/numbers.md
@@ -15,7 +15,7 @@ Reads from this table are not parallelized.
 SELECT * FROM system.numbers LIMIT 10;
 ```
 
-```text
+```response
 ┌─number─┐
 │      0 │
 │      1 │
diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md
index cc461b29ad0..653a8d43cc9 100644
--- a/docs/en/operations/system-tables/numbers_mt.md
+++ b/docs/en/operations/system-tables/numbers_mt.md
@@ -13,7 +13,7 @@ Used for tests.
 SELECT * FROM system.numbers_mt LIMIT 10;
 ```
 
-```text
+```response
 ┌─number─┐
 │      0 │
 │      1 │
diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md
index ee2907a6d6d..6d3519f0069 100644
--- a/docs/en/operations/system-tables/one.md
+++ b/docs/en/operations/system-tables/one.md
@@ -15,7 +15,7 @@ This is similar to the `DUAL` table found in other DBMSs.
 SELECT * FROM system.one LIMIT 10;
 ```
 
-```text
+```response
 ┌─dummy─┐
 │     0 │
 └───────┘
diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md
index 95c46f551ef..76219813ad7 100644
--- a/docs/en/operations/system-tables/processes.md
+++ b/docs/en/operations/system-tables/processes.md
@@ -23,7 +23,7 @@ Columns:
 SELECT * FROM system.processes LIMIT 10 FORMAT Vertical;
 ```
 
-```text
+```response
 Row 1:
 ──────
 is_initial_query:     1
diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md
index 4a0ee58d758..3af48249e3c 100644
--- a/docs/en/sql-reference/table-functions/format.md
+++ b/docs/en/sql-reference/table-functions/format.md
@@ -38,7 +38,7 @@ $$)
 
 **Result:**
 
-```text
+```response
 ┌───b─┬─a─────┐
 │ 111 │ Hello │
 │ 123 │ World │
@@ -60,7 +60,7 @@ $$)
 
 **Result:**
 
-```text
+```response
 ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ b    │ Nullable(Float64) │              │                    │         │                  │                │
 │ a    │ Nullable(String)  │              │                    │         │                  │                │
diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md
index a91b4ca2b1e..204658914e0 100644
--- a/docs/ru/sql-reference/table-functions/format.md
+++ b/docs/ru/sql-reference/table-functions/format.md
@@ -38,7 +38,7 @@ $$)
 
 **Result:**
 
-```text
+```response
 ┌───b─┬─a─────┐
 │ 111 │ Hello │
 │ 123 │ World │
@@ -60,7 +60,7 @@ $$)
 
 **Result:**
 
-```text
+```response
 ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐
 │ b    │ Nullable(Float64) │              │                    │         │                  │                │
 │ a    │ Nullable(String)  │              │                    │         │                  │                │

From 806dd1357ca44bc3995a4af80039567e70aefca2 Mon Sep 17 00:00:00 2001
From: DanRoscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 10:13:31 -0500
Subject: [PATCH 206/262] switch text to response for query blocks

---
 docs/zh/operations/system-tables/merge_tree_settings.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md
index c2bdcd14d24..3118d6b7530 100644
--- a/docs/zh/operations/system-tables/merge_tree_settings.md
+++ b/docs/zh/operations/system-tables/merge_tree_settings.md
@@ -19,7 +19,7 @@ slug: /zh/operations/system-tables/merge_tree_settings
 SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical;
 ```
 
-```text
+```response
 Row 1:
 ──────
 name:        index_granularity

From 807e84da98fea573d90041555d9dcefc182bd454 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 15:44:21 +0100
Subject: [PATCH 207/262] Delete unused website directory

---
 utils/check-style/check-style | 10 +++++-----
 website/README.md             |  1 -
 website/data/.gitkeep         |  1 -
 3 files changed, 5 insertions(+), 7 deletions(-)
 delete mode 100644 website/README.md
 delete mode 100644 website/data/.gitkeep

diff --git a/utils/check-style/check-style b/utils/check-style/check-style
index b5e1a4748a5..5c36d85fc74 100755
--- a/utils/check-style/check-style
+++ b/utils/check-style/check-style
@@ -252,12 +252,12 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' |
     while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done
 
 # Check for executable bit on non-executable files
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable."
 
 # Check for BOM
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM"
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM"
 
 # Too many exclamation marks
 find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' |
@@ -336,7 +336,7 @@ for test_case in "${expect_tests[@]}"; do
 done
 
 # Conflict markers
-find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
+find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' |
     xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files"
 
 # Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors
diff --git a/website/README.md b/website/README.md
deleted file mode 100644
index 67937044ba0..00000000000
--- a/website/README.md
+++ /dev/null
@@ -1 +0,0 @@
-# This is not a website
diff --git a/website/data/.gitkeep b/website/data/.gitkeep
deleted file mode 100644
index 0d540696911..00000000000
--- a/website/data/.gitkeep
+++ /dev/null
@@ -1 +0,0 @@
-# This directory will contain miscellaneous data files on ClickHouse website
\ No newline at end of file

From 6499e8e687182f2bebb3f74c7321400edd9964d8 Mon Sep 17 00:00:00 2001
From: kssenii <sumarokovakseniia@mail.ru>
Date: Wed, 11 Jan 2023 16:14:32 +0100
Subject: [PATCH 208/262] Calculate only required column in
 system.detached_parts

---
 .../System/StorageSystemDetachedParts.cpp     | 56 ++++++++++++++-----
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp
index 1828c5932ad..01c7b7d69e4 100644
--- a/src/Storages/System/StorageSystemDetachedParts.cpp
+++ b/src/Storages/System/StorageSystemDetachedParts.cpp
@@ -60,7 +60,7 @@ static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from
 }
 
 Pipe StorageSystemDetachedParts::read(
-    const Names & /* column_names */,
+    const Names & column_names,
     const StorageSnapshotPtr & storage_snapshot,
     SelectQueryInfo & query_info,
     ContextPtr context,
@@ -68,37 +68,63 @@ Pipe StorageSystemDetachedParts::read(
     const size_t /*max_block_size*/,
     const size_t /*num_streams*/)
 {
+    storage_snapshot->check(column_names);
+
     StoragesInfoStream stream(query_info, context);
 
     /// Create the result.
     Block block = storage_snapshot->metadata->getSampleBlock();
-    MutableColumns new_columns = block.cloneEmptyColumns();
 
+    NameSet names_set(column_names.begin(), column_names.end());
+    std::vector<UInt8> columns_mask(block.columns());
+    Block header;
+
+    for (size_t i = 0; i < block.columns(); ++i)
+    {
+        if (names_set.contains(block.getByPosition(i).name))
+        {
+            columns_mask[i] = 1;
+            header.insert(block.getByPosition(i));
+        }
+    }
+
+    MutableColumns new_columns = header.cloneEmptyColumns();
     while (StoragesInfo info = stream.next())
     {
         const auto parts = info.data->getDetachedParts();
         for (const auto & p : parts)
         {
-            size_t i = 0;
+            size_t src_index = 0, res_index = 0;
             String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name;
-            new_columns[i++]->insert(info.database);
-            new_columns[i++]->insert(info.table);
-            new_columns[i++]->insert(p.valid_name ? p.partition_id : Field());
-            new_columns[i++]->insert(p.dir_name);
-            new_columns[i++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path));
-            new_columns[i++]->insert(p.disk->getName());
-            new_columns[i++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string());
-            new_columns[i++]->insert(p.valid_name ? p.prefix : Field());
-            new_columns[i++]->insert(p.valid_name ? p.min_block : Field());
-            new_columns[i++]->insert(p.valid_name ? p.max_block : Field());
-            new_columns[i++]->insert(p.valid_name ? p.level : Field());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(info.database);
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(info.table);
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.dir_name);
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path));
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.disk->getName());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field());
+            if (columns_mask[src_index++])
+                new_columns[res_index++]->insert(p.valid_name ? p.level : Field());
         }
     }
 
     UInt64 num_rows = new_columns.at(0)->size();
     Chunk chunk(std::move(new_columns), num_rows);
 
-    return Pipe(std::make_shared<SourceFromSingleChunk>(std::move(block), std::move(chunk)));
+    return Pipe(std::make_shared<SourceFromSingleChunk>(std::move(header), std::move(chunk)));
 }
 
 }

From 3e9d142066c6591c83a4c73be36f8c3958d70c99 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 11 Jan 2023 16:17:21 +0000
Subject: [PATCH 209/262] Fix wrong column nullability in StoreageJoin

---
 src/Interpreters/HashJoin.cpp  | 12 ++++++------
 src/Interpreters/TableJoin.cpp | 10 ----------
 src/Storages/StorageJoin.cpp   | 16 ++++++++++++++--
 src/Storages/StorageJoin.h     |  6 +++---
 4 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp
index dc041094381..5ff4f9beb05 100644
--- a/src/Interpreters/HashJoin.cpp
+++ b/src/Interpreters/HashJoin.cpp
@@ -886,20 +886,20 @@ public:
             const auto & lhs = lhs_block.getByPosition(i);
             const auto & rhs = rhs_block.getByPosition(i);
             if (lhs.name != rhs.name)
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]",
-                    lhs_block.dumpStructure(), rhs_block.dumpStructure());
+                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})",
+                    lhs_block.dumpStructure(), rhs_block.dumpStructure(), lhs.name, rhs.name);
 
             const auto & ltype = recursiveRemoveLowCardinality(lhs.type);
             const auto & rtype = recursiveRemoveLowCardinality(rhs.type);
             if (!ltype->equals(*rtype))
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]",
-                    lhs_block.dumpStructure(), rhs_block.dumpStructure());
+                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})",
+                    lhs_block.dumpStructure(), rhs_block.dumpStructure(), ltype->getName(), rtype->getName());
 
             const auto & lcol = recursiveRemoveLowCardinality(lhs.column);
             const auto & rcol = recursiveRemoveLowCardinality(rhs.column);
             if (lcol->getDataType() != rcol->getDataType())
-                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]",
-                    lhs_block.dumpStructure(), rhs_block.dumpStructure());
+                throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})",
+                    lhs_block.dumpStructure(), rhs_block.dumpStructure(), lcol->getDataType(), rcol->getDataType());
         }
     }
 
diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp
index aa4f821657f..78218ac59a5 100644
--- a/src/Interpreters/TableJoin.cpp
+++ b/src/Interpreters/TableJoin.cpp
@@ -458,16 +458,6 @@ TableJoin::createConvertingActions(
                 LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: empty", side);
                 return;
             }
-            auto format_cols = [](const auto & cols) -> std::string
-            {
-                std::vector<std::string> str_cols;
-                str_cols.reserve(cols.size());
-                for (const auto & col : cols)
-                    str_cols.push_back(fmt::format("'{}': {}", col.name, col.type->getName()));
-                return fmt::format("[{}]", fmt::join(str_cols, ", "));
-            };
-            LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: {} -> {}",
-                side, format_cols(dag->getRequiredColumns()), format_cols(dag->getResultColumns()));
         };
         log_actions("Left", left_converting_actions);
         log_actions("Right", right_converting_actions);
diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp
index 55f3b889f22..320f05e038f 100644
--- a/src/Storages/StorageJoin.cpp
+++ b/src/Storages/StorageJoin.cpp
@@ -229,11 +229,13 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr<TableJoin> analyzed_join,
     return join_clone;
 }
 
-
 void StorageJoin::insertBlock(const Block & block, ContextPtr context)
 {
+    Block block_to_insert = block;
+    convertRightBlock(block_to_insert);
+
     TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context);
-    join->addJoinedBlock(block, true);
+    join->addJoinedBlock(block_to_insert, true);
 }
 
 size_t StorageJoin::getSize(ContextPtr context) const
@@ -265,6 +267,16 @@ ColumnWithTypeAndName StorageJoin::joinGet(const Block & block, const Block & bl
     return join->joinGet(block, block_with_columns_to_add);
 }
 
+void StorageJoin::convertRightBlock(Block & block) const
+{
+    bool need_covert = use_nulls && isLeftOrFull(kind);
+    if (!need_covert)
+        return;
+
+    for (auto & col : block)
+        JoinCommon::convertColumnToNullable(col);
+}
+
 void registerStorageJoin(StorageFactory & factory)
 {
     auto creator_fn = [](const StorageFactory::Arguments & args)
diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h
index 3d7a9d9b5ec..96afd442c72 100644
--- a/src/Storages/StorageJoin.h
+++ b/src/Storages/StorageJoin.h
@@ -77,9 +77,7 @@ public:
     {
         auto metadata_snapshot = getInMemoryMetadataPtr();
         Block block = metadata_snapshot->getSampleBlock();
-        if (use_nulls && isLeftOrFull(kind))
-            for (auto & col : block)
-                JoinCommon::convertColumnToNullable(col);
+        convertRightBlock(block);
         return block;
     }
 
@@ -108,6 +106,8 @@ private:
     void finishInsert() override {}
     size_t getSize(ContextPtr context) const override;
     RWLockImpl::LockHolder tryLockTimedWithContext(const RWLock & lock, RWLockImpl::Type type, ContextPtr context) const;
+
+    void convertRightBlock(Block & block) const;
 };
 
 }

From 1f8535ca8357e85a05faf55a7ba1553f5a0f78aa Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 18:49:01 +0100
Subject: [PATCH 210/262] Restart NightlyBuilds if the runner died

---
 tests/ci/workflow_approve_rerun_lambda/app.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py
index 9fc4266d9d4..0be93e26c13 100644
--- a/tests/ci/workflow_approve_rerun_lambda/app.py
+++ b/tests/ci/workflow_approve_rerun_lambda/app.py
@@ -64,6 +64,7 @@ NEED_RERUN_WORKFLOWS = {
     "DocsCheck",
     "DocsReleaseChecks",
     "MasterCI",
+    "NightlyBuilds",
     "PullRequestCI",
     "ReleaseBranchCI",
 }

From f9240a8f903f0e8e9df399520f5c0dc4a7957586 Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Wed, 11 Jan 2023 18:47:04 +0000
Subject: [PATCH 211/262] Add 02531_storage_join_null_44940

---
 .../02531_storage_join_null_44940.reference    |  3 +++
 .../02531_storage_join_null_44940.sql          | 18 ++++++++++++++++++
 2 files changed, 21 insertions(+)
 create mode 100644 tests/queries/0_stateless/02531_storage_join_null_44940.reference
 create mode 100644 tests/queries/0_stateless/02531_storage_join_null_44940.sql

diff --git a/tests/queries/0_stateless/02531_storage_join_null_44940.reference b/tests/queries/0_stateless/02531_storage_join_null_44940.reference
new file mode 100644
index 00000000000..b7e40c360c0
--- /dev/null
+++ b/tests/queries/0_stateless/02531_storage_join_null_44940.reference
@@ -0,0 +1,3 @@
+3	\N	3
+2	2	2
+1	1	1
diff --git a/tests/queries/0_stateless/02531_storage_join_null_44940.sql b/tests/queries/0_stateless/02531_storage_join_null_44940.sql
new file mode 100644
index 00000000000..136fc8bbef1
--- /dev/null
+++ b/tests/queries/0_stateless/02531_storage_join_null_44940.sql
@@ -0,0 +1,18 @@
+
+SET allow_suspicious_low_cardinality_types = 1;
+
+DROP TABLE IF EXISTS t1__fuzz_8;
+DROP TABLE IF EXISTS full_join__fuzz_4;
+
+CREATE TABLE t1__fuzz_8 (`x` LowCardinality(UInt32), `str` Nullable(Int16)) ENGINE = Memory;
+INSERT INTO t1__fuzz_8 VALUES (1, 1), (2, 2);
+
+CREATE TABLE full_join__fuzz_4 (`x` LowCardinality(UInt32), `s` LowCardinality(String)) ENGINE = Join(`ALL`, FULL, x) SETTINGS join_use_nulls = 1;
+INSERT INTO full_join__fuzz_4 VALUES (1, '1'), (2, '2'), (3, '3');
+
+SET join_use_nulls = 1;
+
+SELECT * FROM t1__fuzz_8 FULL OUTER JOIN full_join__fuzz_4 USING (x) ORDER BY x DESC, str ASC, s ASC NULLS LAST;
+
+DROP TABLE IF EXISTS t1__fuzz_8;
+DROP TABLE IF EXISTS full_join__fuzz_4;

From 9c4ea5a16ba8e844e65e175a4dd76b8618241254 Mon Sep 17 00:00:00 2001
From: Yakov Olkhovskiy <yakov@clickhouse.com>
Date: Wed, 11 Jan 2023 18:51:37 +0000
Subject: [PATCH 212/262] fix part ID generation for IP types for backward
 compatibility

---
 src/Storages/MergeTree/MergeTreePartition.cpp      |  9 +++++----
 .../queries/0_stateless/02530_ip_part_id.reference |  2 ++
 tests/queries/0_stateless/02530_ip_part_id.sql     | 14 ++++++++++++++
 3 files changed, 21 insertions(+), 4 deletions(-)
 create mode 100644 tests/queries/0_stateless/02530_ip_part_id.reference
 create mode 100644 tests/queries/0_stateless/02530_ip_part_id.sql

diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp
index 0fd081a8425..5d4b4853812 100644
--- a/src/Storages/MergeTree/MergeTreePartition.cpp
+++ b/src/Storages/MergeTree/MergeTreePartition.cpp
@@ -5,6 +5,7 @@
 #include <Interpreters/Context.h>
 #include <Common/FieldVisitors.h>
 #include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeIPv4andIPv6.h>
 #include <DataTypes/DataTypeTuple.h>
 #include <Columns/ColumnTuple.h>
 #include <Common/SipHash.h>
@@ -93,9 +94,7 @@ namespace
         }
         void operator() (const IPv6 & x) const
         {
-            UInt8 type = Field::Types::IPv6;
-            hash.update(type);
-            hash.update(x);
+            return operator()(String(reinterpret_cast<const char *>(&x), 16));
         }
         void operator() (const Float64 & x) const
         {
@@ -213,7 +212,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const
     bool are_all_integral = true;
     for (const Field & field : value)
     {
-        if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64)
+        if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64 && field.getType() != Field::Types::IPv4)
         {
             are_all_integral = false;
             break;
@@ -232,6 +231,8 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const
 
             if (typeid_cast<const DataTypeDate *>(partition_key_sample.getByPosition(i).type.get()))
                 result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet<UInt64>())));
+            else if (typeid_cast<const DataTypeIPv4 *>(partition_key_sample.getByPosition(i).type.get()))
+                result += toString(value[i].get<IPv4>().toUnderType());
             else
                 result += applyVisitor(to_string_visitor, value[i]);
 
diff --git a/tests/queries/0_stateless/02530_ip_part_id.reference b/tests/queries/0_stateless/02530_ip_part_id.reference
new file mode 100644
index 00000000000..a13e1bafdaa
--- /dev/null
+++ b/tests/queries/0_stateless/02530_ip_part_id.reference
@@ -0,0 +1,2 @@
+1.2.3.4	::ffff:1.2.3.4	16909060_1_1_0
+1.2.3.4	::ffff:1.2.3.4	1334d7cc23ffb5a5c0262304b3313426_1_1_0
diff --git a/tests/queries/0_stateless/02530_ip_part_id.sql b/tests/queries/0_stateless/02530_ip_part_id.sql
new file mode 100644
index 00000000000..bf704eaa1c2
--- /dev/null
+++ b/tests/queries/0_stateless/02530_ip_part_id.sql
@@ -0,0 +1,14 @@
+DROP TABLE IF EXISTS ip_part_test;
+
+CREATE TABLE ip_part_test ( ipv4 IPv4, ipv6 IPv6 ) ENGINE = MergeTree PARTITION BY ipv4 ORDER BY ipv4 AS SELECT '1.2.3.4', '::ffff:1.2.3.4';
+
+SELECT *, _part FROM ip_part_test;
+
+DROP TABLE IF EXISTS ip_part_test;
+
+CREATE TABLE ip_part_test ( ipv4 IPv4, ipv6 IPv6 ) ENGINE = MergeTree PARTITION BY ipv6 ORDER BY ipv6 AS SELECT '1.2.3.4', '::ffff:1.2.3.4';
+
+SELECT *, _part FROM ip_part_test;
+
+DROP TABLE IF EXISTS ip_part_test;
+

From 8b9d99e2e26b285f0c4545b44b09eecbae62afd8 Mon Sep 17 00:00:00 2001
From: rfraposa <richraposa@gmail.com>
Date: Wed, 11 Jan 2023 11:51:53 -0700
Subject: [PATCH 213/262] Update syntax.md

---
 docs/en/sql-reference/syntax.md | 44 +++++++++++++++++++++++++++++++++
 1 file changed, 44 insertions(+)

diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index 362a2cf47bb..368b7d357d0 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -128,6 +128,50 @@ Result:
 └────────────────────────────┘
 ```
 
+## Defining and Using Query Parameters
+
+Query parameters can be defined using the syntax `param_name=value`, where `name` is the name of the parameter. Parameters can by defined using the `SET` command, or from the command-line using `--param`.
+
+To retrieve a query parameter, you specify the name of the parameter along with its data type surrounded by curly braces:
+
+```sql
+{name:datatype}
+```
+
+For example, the following SQL defines parameters named `a`, `b`, `c` and `d` - each of a different data type:
+
+```sql
+SET param_a = 13, param_b = 'str';
+SET param_c = '2022-08-04 18:30:53';
+SET param_d = '{\'10\': [11, 12], \'13\': [14, 15]}';
+
+SELECT
+   {a: UInt32},
+   {b: String},
+   {c: DateTime},
+   {d: Map(String, Array(UInt8))};
+```
+
+If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is being retrieved as a `String`:
+
+```sql
+clickhouse-client --param_message='hello' --query="SELECT {message: String}"
+```
+
+Result:
+
+```response
+hello
+```
+
+If the query parameter represents the name of a database, table, function or other identifier, use `Identifier` for its type. For example, the following query returns rows from a table named `uk_price_paid`:
+
+```sql
+SET param_mytablename = "uk_price_paid";
+SELECT * FROM {mytablename:Identifier};
+```
+
+
 ## Functions
 
 Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`.

From a389180f42a0b50ab00c99daf0b1d6d77d676690 Mon Sep 17 00:00:00 2001
From: Rich Raposa <richraposa@gmail.com>
Date: Wed, 11 Jan 2023 12:05:35 -0700
Subject: [PATCH 214/262] Update syntax.md

---
 docs/en/sql-reference/syntax.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index 368b7d357d0..ea926d1d8c7 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -143,7 +143,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` -
 ```sql
 SET param_a = 13, param_b = 'str';
 SET param_c = '2022-08-04 18:30:53';
-SET param_d = '{\'10\': [11, 12], \'13\': [14, 15]}';
+SET param_d = {'10': [11, 12], '13': [14, 15]}';
 
 SELECT
    {a: UInt32},

From f8ac49bb86f69f3ffe3dac5abf03ba517383a0f9 Mon Sep 17 00:00:00 2001
From: Rich Raposa <richraposa@gmail.com>
Date: Wed, 11 Jan 2023 12:09:23 -0700
Subject: [PATCH 215/262] Update syntax.md

---
 docs/en/sql-reference/syntax.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md
index ea926d1d8c7..045c9777ad7 100644
--- a/docs/en/sql-reference/syntax.md
+++ b/docs/en/sql-reference/syntax.md
@@ -152,6 +152,12 @@ SELECT
    {d: Map(String, Array(UInt8))};
 ```
 
+Result:
+
+```response
+13	str	2022-08-04 18:30:53	{'10':[11,12],'13':[14,15]}
+```
+
 If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is being retrieved as a `String`:
 
 ```sql

From 1b94c839d5efc9250bdf298f2bfd0847d8fccf00 Mon Sep 17 00:00:00 2001
From: Sergei Trifonov <sergei@clickhouse.com>
Date: Wed, 11 Jan 2023 21:16:22 +0100
Subject: [PATCH 216/262] Add docs for `SYSTEM RELOAD USERS`

---
 docs/en/sql-reference/statements/system.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md
index a806cbd91a5..a82d1447453 100644
--- a/docs/en/sql-reference/statements/system.md
+++ b/docs/en/sql-reference/statements/system.md
@@ -111,6 +111,10 @@ This will also create system tables even if message queue is empty.
 
 Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper.
 
+## RELOAD USERS
+
+Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage.
+
 ## SHUTDOWN
 
 Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`)

From ca367a6a64e9f821f2d6164b97ec68318a155c25 Mon Sep 17 00:00:00 2001
From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com>
Date: Wed, 11 Jan 2023 22:24:01 +0100
Subject: [PATCH 217/262] Add CACHE_INVALIDATOR for sqlancer builds

---
 docker/test/sqlancer/Dockerfile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile
index 2ebc61e35a9..a2d84c7689f 100644
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@@ -6,6 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 
 RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git default-jdk maven python3 --yes --no-install-recommends
+ARG CACHE_INVALIDATOR=0
 RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
 RUN mkdir /sqlancer && \
 	cd /sqlancer && \

From 34871e6934e3b92f1ecdae82c50c092c874c15ac Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 12 Jan 2023 00:26:03 +0300
Subject: [PATCH 218/262] Add a checkbox for documentation

---
 .github/PULL_REQUEST_TEMPLATE.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index d3bbefe1d65..5d09d3a9ef3 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -19,6 +19,9 @@ tests/ci/run_check.py
 ...
 
 ### Documentation entry for user-facing changes
+
+- [ ] Documentation is written (mandatory for new features)
+
 <!---
 Directly edit documentation source files in the "docs" folder with the same pull-request as code changes
 

From 9219dc1c6e60f7c0b13d434e4ee0bf330f5b2526 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 22:43:10 +0100
Subject: [PATCH 219/262] Make sqlancer lighter by some optimizations

---
 docker/test/sqlancer/Dockerfile | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile
index a2d84c7689f..a68b626eb66 100644
--- a/docker/test/sqlancer/Dockerfile
+++ b/docker/test/sqlancer/Dockerfile
@@ -5,13 +5,18 @@ FROM ubuntu:22.04
 ARG apt_archive="http://archive.ubuntu.com"
 RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list
 
-RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git default-jdk maven python3 --yes --no-install-recommends
+RUN apt-get update --yes && \
+	env DEBIAN_FRONTEND=noninteractive apt-get install wget git default-jdk maven python3 --yes --no-install-recommends && \
+	apt-get clean
+
+# We need to get the repository's HEAD each time despite, so we invalidate layers' cache
 ARG CACHE_INVALIDATOR=0
-RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip
 RUN mkdir /sqlancer && \
-	cd /sqlancer && \
-	unzip /sqlancer.zip
-RUN cd /sqlancer/sqlancer-master && mvn package -DskipTests
+	wget -q -O- https://github.com/sqlancer/sqlancer/archive/master.tar.gz | \
+		tar zx -C /sqlancer && \
+	cd /sqlancer/sqlancer-master && \
+	mvn package -DskipTests && \
+	rm -r /root/.m2
 
 COPY run.sh /
 COPY process_sqlancer_result.py /

From b56d355b96f8eefa84799b365aa2ba2c20483a7f Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 11 Jan 2023 22:59:36 +0100
Subject: [PATCH 220/262] Fix pissible stack-use-after-return in
 LimitReadBuffer

---
 src/Core/ExternalTable.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 4528fe19e03..821ae9d02fa 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -126,6 +126,10 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter
 
 void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream)
 {
+    /// After finishing this function we will be ready to receive the next file, for this we clear all the information received.
+    /// We should SCOPE_EXIT because read_buffer should be reset correctly if there will be an exception.
+    SCOPE_EXIT(clear());
+
     const Settings & settings = getContext()->getSettingsRef();
 
     if (settings.http_max_multipart_form_data_size)
@@ -167,9 +171,6 @@ void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header,
 
     CompletedPipelineExecutor executor(pipeline);
     executor.execute();
-
-    /// We are ready to receive the next file, for this we clear all the information received
-    clear();
 }
 
 }

From fee76a8919753ca5ea8d6921031270ebc73e17cc Mon Sep 17 00:00:00 2001
From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com>
Date: Wed, 11 Jan 2023 23:00:43 +0100
Subject: [PATCH 221/262] Better comment

---
 src/Core/ExternalTable.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/ExternalTable.cpp b/src/Core/ExternalTable.cpp
index 821ae9d02fa..3d120cbf5fd 100644
--- a/src/Core/ExternalTable.cpp
+++ b/src/Core/ExternalTable.cpp
@@ -127,7 +127,7 @@ ExternalTable::ExternalTable(const boost::program_options::variables_map & exter
 void ExternalTablesHandler::handlePart(const Poco::Net::MessageHeader & header, ReadBuffer & stream)
 {
     /// After finishing this function we will be ready to receive the next file, for this we clear all the information received.
-    /// We should SCOPE_EXIT because read_buffer should be reset correctly if there will be an exception.
+    /// We should use SCOPE_EXIT because read_buffer should be reset correctly if there will be an exception.
     SCOPE_EXIT(clear());
 
     const Settings & settings = getContext()->getSettingsRef();

From e1dc38f0fe2d41c54ea78ef7ff164778a30948b0 Mon Sep 17 00:00:00 2001
From: "Mikhail f. Shiryaev" <felixoid@clickhouse.com>
Date: Wed, 11 Jan 2023 23:39:00 +0100
Subject: [PATCH 222/262] Fix wrong report sorting and changing global
 BASE_HEADERS

---
 tests/ci/report.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tests/ci/report.py b/tests/ci/report.py
index 95f60794448..2146c5227ba 100644
--- a/tests/ci/report.py
+++ b/tests/ci/report.py
@@ -332,8 +332,7 @@ def create_test_html_report(
 
         # Display entires with logs at the top (they correspond to failed tests)
         test_results.sort(
-            key=lambda result: result.raw_logs is not None
-            or result.log_files is not None
+            key=lambda result: result.raw_logs is None and result.log_files is None
         )
 
         for test_result in test_results:
@@ -380,7 +379,7 @@ def create_test_html_report(
                 )
                 rows_part += row
 
-        headers = BASE_HEADERS
+        headers = BASE_HEADERS.copy()
         if has_test_time:
             headers.append("Test time, sec.")
         if has_log_urls:

From 47f91381ee94ad4c388a199bb89f0e86efa359c1 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Wed, 11 Jan 2023 23:46:52 +0100
Subject: [PATCH 223/262] Fix flaky

---
 src/Disks/tests/gtest_path_functions.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/Disks/tests/gtest_path_functions.cpp b/src/Disks/tests/gtest_path_functions.cpp
index ea201d34507..8016d60540d 100644
--- a/src/Disks/tests/gtest_path_functions.cpp
+++ b/src/Disks/tests/gtest_path_functions.cpp
@@ -3,7 +3,7 @@
 #include <Disks/IDisk.h>
 
 
-TEST(DiskTest, parentPath)
+TEST(DiskPathTest, parentPath)
 {
     EXPECT_EQ("", DB::parentPath("test_dir/"));
     EXPECT_EQ("test_dir/", DB::parentPath("test_dir/nested_dir/"));
@@ -11,7 +11,7 @@ TEST(DiskTest, parentPath)
 }
 
 
-TEST(DiskTest, fileName)
+TEST(DiskPathTest, fileName)
 {
     EXPECT_EQ("test_file", DB::fileName("test_file"));
     EXPECT_EQ("nested_file", DB::fileName("test_dir/nested_file"));

From bed39f3762bb6c5a34b964dbee084d2329fe199f Mon Sep 17 00:00:00 2001
From: Dmitry Novik <n0vik@clickhouse.com>
Date: Wed, 11 Jan 2023 23:52:13 +0000
Subject: [PATCH 224/262] Disable check to make test on flaky

---
 tests/integration/test_overcommit_tracker/test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integration/test_overcommit_tracker/test.py b/tests/integration/test_overcommit_tracker/test.py
index 50979526e6a..3787364b549 100644
--- a/tests/integration/test_overcommit_tracker/test.py
+++ b/tests/integration/test_overcommit_tracker/test.py
@@ -43,7 +43,6 @@ def test_user_overcommit():
         if err == "":
             finished = True
 
-    assert overcommited_killed, "no overcommited task was killed"
     assert finished, "all tasks are killed"
 
     node.query("DROP USER IF EXISTS A")

From 2e44ad9d0f5d015aabe71ce8ef181e4e793cc043 Mon Sep 17 00:00:00 2001
From: rfraposa <richraposa@gmail.com>
Date: Wed, 11 Jan 2023 17:10:51 -0700
Subject: [PATCH 225/262] Add maxIntersections to docs

---
 .../aggregate-functions/reference/index.md    |  4 +-
 .../reference/maxintersections.md             | 64 +++++++++++++++++++
 .../reference/maxintersectionsposition.md     | 64 +++++++++++++++++++
 3 files changed, 131 insertions(+), 1 deletion(-)
 create mode 100644 docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
 create mode 100644 docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md

diff --git a/docs/en/sql-reference/aggregate-functions/reference/index.md b/docs/en/sql-reference/aggregate-functions/reference/index.md
index 575141766dd..5d9423e0a55 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/index.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/index.md
@@ -77,4 +77,6 @@ ClickHouse-specific aggregate functions:
 -   [contingency](./contingency.md)
 -   [cramersV](./cramersv.md)
 -   [cramersVBiasCorrected](./cramersvbiascorrected.md)
--   [theilsU](./theilsu.md)
\ No newline at end of file
+-   [theilsU](./theilsu.md)
+-   [maxIntersections](./maxintersections.md)
+-   [maxIntersectionsPosition](./maxintersectionsposition.md)
\ No newline at end of file
diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
new file mode 100644
index 00000000000..a6776df303e
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -0,0 +1,64 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/maxintersections
+sidebar_position: 360
+title: maxIntersections
+---
+
+# maxIntersections
+
+Aggregate function that calculates the maximum number of times that a group of intervals intersects each other (if all the intervals intersect at least once).
+
+The syntax is:
+
+```sql
+maxIntersections(start_column, end_column)
+```
+
+**Arguments**
+
+- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
+
+- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
+
+**Returned value**
+
+Returns the maximum number of intersected intervals.
+
+**Example**
+
+```sql
+CREATE TABLE my_events (
+    start UInt32,
+    end UInt32
+)
+Engine = MergeTree
+ORDER BY tuple();
+
+INSERT INTO my_events VALUES
+   (1, 3),
+   (1, 6),
+   (2, 5),
+   (3, 7);
+```
+
+The intervals look like the following:
+
+```response
+1 - 3
+1 - - - - 6
+  2 - - 5
+    3 - - - 7
+```
+
+Notice that three of these intervals have the value 4 in common, and that is the most frequent number of intersections:
+
+```sql
+SELECT maxIntersections(start, end) FROM my_events;
+```
+
+Response:
+```response
+3
+```
+
+If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the numer and location of those occurrences.
\ No newline at end of file
diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md
new file mode 100644
index 00000000000..7dd63f09316
--- /dev/null
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersectionsposition.md
@@ -0,0 +1,64 @@
+---
+slug: /en/sql-reference/aggregate-functions/reference/maxintersectionsposition
+sidebar_position: 361
+title: maxIntersectionsPosition
+---
+
+# maxIntersectionsPosition
+
+Aggregate function that calculates the positions of the occurrences of the [`maxIntersections` function](./maxintersections.md).
+
+The syntax is:
+
+```sql
+maxIntersectionsPosition(start_column, end_column)
+```
+
+**Arguments**
+
+- `start_column` – the numeric column that represents the start of each interval. If `start_column` is `NULL` or 0 then the interval will be skipped.
+
+- `end_column` - the numeric column that represents the end of each interval. If `end_column` is `NULL` or 0 then the interval will be skipped.
+
+**Returned value**
+
+Returns the start positions of the maximum number of intersected intervals.
+
+**Example**
+
+```sql
+CREATE TABLE my_events (
+    start UInt32,
+    end UInt32
+)
+Engine = MergeTree
+ORDER BY tuple();
+
+INSERT INTO my_events VALUES
+   (1, 3),
+   (1, 6),
+   (2, 5),
+   (3, 7);
+```
+
+The intervals look like the following:
+
+```response
+1 - 3
+1 - - - - 6
+  2 - - 5
+    3 - - - 7
+```
+
+Notice that three of these intervals have the value 4 in common, and that starts with the 2nd interval:
+
+```sql
+SELECT maxIntersectionsPosition(start, end) FROM my_events;
+```
+
+Response:
+```response
+2
+```
+
+In other words, the `(1,6)` row is the start of the 3 intervals that intersect, and 3 is the maximum number of intervals that intersect.
\ No newline at end of file

From 7a651d749ce50315dfeffc5536c557f9b5c3b1d2 Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Wed, 11 Jan 2023 19:20:37 -0500
Subject: [PATCH 226/262] Update
 docs/en/sql-reference/aggregate-functions/reference/maxintersections.md

---
 .../aggregate-functions/reference/maxintersections.md           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
index a6776df303e..f33b51f0374 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -61,4 +61,4 @@ Response:
 3
 ```
 
-If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the numer and location of those occurrences.
\ No newline at end of file
+If you have multiple occurrences of the maximum interval, you can use the [`maxIntersectionsPosition` function](./maxintersectionsposition.md) to locate the number and location of those occurrences.
\ No newline at end of file

From 28d5c3cf7f0bc6b340791bdd08cea4b2987002cd Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Wed, 11 Jan 2023 17:00:10 -0800
Subject: [PATCH 227/262] Addressed the review comments

---
 contrib/crc32-vpmsum-cmake/CMakeLists.txt   |  4 +++-
 contrib/crc32-vpmsum-cmake/README.md        |  3 ++-
 contrib/crc32-vpmsum-cmake/vec_crc32.h      | 17 +++++++----------
 src/Common/HashTable/Hash.h                 |  8 ++++----
 src/Functions/FunctionsStringHash.cpp       | 16 ++++++++--------
 src/Functions/FunctionsStringSimilarity.cpp |  4 ++--
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/contrib/crc32-vpmsum-cmake/CMakeLists.txt b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
index bb7d5618410..66fe6690e01 100644
--- a/contrib/crc32-vpmsum-cmake/CMakeLists.txt
+++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt
@@ -1,5 +1,7 @@
+# module crc32-vpmsum gets build along with the files vec_crc32.h and crc32_constants.h in crc32-vpmsum-cmake
+# Please see README.md for information about how to generate crc32_constants.h
 if (NOT ARCH_PPC64LE)
-    message(STATUS "crc32-vpmsum library is only supported on ppc64le")
+	message (STATUS "crc32-vpmsum library is only supported on ppc64le")
     return()
 endif()
 
diff --git a/contrib/crc32-vpmsum-cmake/README.md b/contrib/crc32-vpmsum-cmake/README.md
index 9ea8133e331..cab4cdc5501 100644
--- a/contrib/crc32-vpmsum-cmake/README.md
+++ b/contrib/crc32-vpmsum-cmake/README.md
@@ -1,8 +1,9 @@
 # To Generate crc32_constants.h 
 
-- Run make file in `../crc32-vpmsum` diretory using folling options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions
+- Run make file in `../crc32-vpmsum` directory using following options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions
 ```bash
 make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c"
 ```
 - move the generated `crc32_constants.h` into this directory
 - To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/
+- Here is the link to information about intel intrinsic functions: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_crc32_u64&ig_expand=1492,1493,1559
diff --git a/contrib/crc32-vpmsum-cmake/vec_crc32.h b/contrib/crc32-vpmsum-cmake/vec_crc32.h
index 0ef13616b34..2280acec48c 100644
--- a/contrib/crc32-vpmsum-cmake/vec_crc32.h
+++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h
@@ -1,6 +1,9 @@
 #ifndef VEC_CRC32
 #define VEC_CRC32
 
+#if ! ((defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+#    error PowerPC architecture is expected
+#endif
 
 #ifdef __cplusplus
 extern "C" {
@@ -10,16 +13,10 @@ unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned lon
 
 static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len)
 {
-	unsigned char *emptybuffer;
-    if (!buffer) {
-        emptybuffer = (unsigned char *)malloc(len);
-        bzero(emptybuffer, len);
-        crc = crc32_vpmsum(crc, emptybuffer, len);
-        free(emptybuffer);
-    } else {
-        crc = crc32_vpmsum(crc, buffer, (unsigned long)len);
-    }
-	return crc;
+    assert(buffer);
+    crc = crc32_vpmsum(crc, buffer, (unsigned long)len);
+
+    return crc;
 }
 
 #ifdef __cplusplus
diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h
index c7342d061d8..acac8eeccb2 100644
--- a/src/Common/HashTable/Hash.h
+++ b/src/Common/HashTable/Hash.h
@@ -91,10 +91,10 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x)
     return _mm_crc32_u64(-1ULL, x);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
     return __crc32cd(-1U, x);
-#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-    return s390x_crc32(-1U, x)
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     return crc32_ppc(-1U, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
+#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+    return s390x_crc32(-1U, x)
 #else
     /// On other platforms we do not have CRC32. NOTE This can be confusing.
     /// NOTE: consider using intHash32()
@@ -107,10 +107,10 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value)
     return _mm_crc32_u64(updated_value, x);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
     return __crc32cd(static_cast<UInt32>(updated_value), x);
-#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-    return s390x_crc32(updated_value, x);
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
     return crc32_ppc(updated_value, reinterpret_cast<const unsigned char *>(&x), sizeof(x));
+#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+    return s390x_crc32(updated_value, x);
 #else
     /// On other platforms we do not have CRC32. NOTE This can be confusing.
     return intHash64(x) ^ updated_value;
diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp
index bf0b7463a5d..ea861b7e657 100644
--- a/src/Functions/FunctionsStringHash.cpp
+++ b/src/Functions/FunctionsStringHash.cpp
@@ -40,10 +40,10 @@ struct Hash
         return _mm_crc32_u64(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
         return __crc32cd(static_cast<UInt32>(crc), val);
-#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32(crc, val);
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
+#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+        return s390x_crc32(crc, val);
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -55,10 +55,10 @@ struct Hash
         return _mm_crc32_u32(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
         return __crc32cw(crc, val);
-#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32_u32(crc, val);
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
+#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+        return s390x_crc32_u32(crc, val);
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -70,10 +70,10 @@ struct Hash
         return _mm_crc32_u16(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
         return __crc32ch(crc, val);
-#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32_u16(crc, val);
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
+#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+        return s390x_crc32_u16(crc, val);
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
@@ -85,10 +85,10 @@ struct Hash
         return _mm_crc32_u8(crc, val);
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
         return __crc32cb(crc, val);
-#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
-        return s390x_crc32_u8(crc, val);
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(crc, reinterpret_cast<const unsigned char *>(&val), sizeof(val));
+#elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
+        return s390x_crc32_u8(crc, val);
 #else
         throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED);
 #endif
diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp
index 87aa0f4b3f7..0cc0248baf4 100644
--- a/src/Functions/FunctionsStringSimilarity.cpp
+++ b/src/Functions/FunctionsStringSimilarity.cpp
@@ -74,10 +74,10 @@ struct NgramDistanceImpl
         return _mm_crc32_u64(code_points[2], combined) & 0xFFFFu;
 #elif defined(__aarch64__) && defined(__ARM_FEATURE_CRC32)
         return __crc32cd(code_points[2], combined) & 0xFFFFu;
-#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
-        return s390x_crc32(code_points[2], combined) & 0xFFFFu;
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
         return crc32_ppc(code_points[2], reinterpret_cast<const unsigned char *>(&combined), sizeof(combined)) & 0xFFFFu;
+#elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+        return s390x_crc32(code_points[2], combined) & 0xFFFFu;
 #else
         return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu;
 #endif

From dbe0d7c935aedd334c4ac2b0ffa858abc077a56c Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 12 Jan 2023 02:05:00 +0100
Subject: [PATCH 228/262] Fix flaky test `01961_roaring_memory_tracking` (3)

---
 tests/queries/0_stateless/01961_roaring_memory_tracking.sql | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
index 6012d5904f4..695f233ed13 100644
--- a/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
+++ b/tests/queries/0_stateless/01961_roaring_memory_tracking.sql
@@ -1,4 +1,4 @@
--- Tags: no-replicated-database
+-- Tags: no-replicated-database, no-asan, no-tsan, no-msan, no-ubsan
 
-SET max_memory_usage = '50M';
-SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(2000000000) GROUP BY n FORMAT Null; -- { serverError 241 }
+SET max_memory_usage = '100M';
+SELECT cityHash64(rand() % 1000) as n, groupBitmapState(number) FROM numbers_mt(200000000) GROUP BY n FORMAT Null; -- { serverError 241 }

From 0841809a9c176712b0199110e2d7156d77c1798d Mon Sep 17 00:00:00 2001
From: serxa <sergei@clickhouse.com>
Date: Thu, 12 Jan 2023 01:30:42 +0000
Subject: [PATCH 229/262] fix special builds

---
 src/Common/SharedMutex.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h
index e0143d4042d..9215ff62af3 100644
--- a/src/Common/SharedMutex.h
+++ b/src/Common/SharedMutex.h
@@ -42,6 +42,9 @@ private:
 
 #else
 
+namespace DB
+{
+
 using SharedMutex = std::shared_mutex;
 
 }

From 703f223e50c3fb1cf3f05998baf7bc41d35c7b5f Mon Sep 17 00:00:00 2001
From: Alexey Milovidov <milovidov@clickhouse.com>
Date: Thu, 12 Jan 2023 04:56:20 +0100
Subject: [PATCH 230/262] Remove trash from stress test

---
 docker/test/stress/run.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh
index 7f3e551edbc..d0d76fb0525 100644
--- a/docker/test/stress/run.sh
+++ b/docker/test/stress/run.sh
@@ -593,7 +593,7 @@ clickhouse-local --structure "test String, res String" -q "SELECT 'failure', tes
 [ -s /test_output/check_status.tsv ] || echo -e "success\tNo errors found" > /test_output/check_status.tsv
 
 # Core dumps
-find . -type f -name 'core.*' | while read core; do
+find . -type f -maxdepth 1 -name 'core.*' | while read core; do
     zstd --threads=0 $core
     mv $core.zst /test_output/
 done

From 68f747ed3239e2c13653f1996e08176c4a7488db Mon Sep 17 00:00:00 2001
From: flynn <fenglv15@mails.ucas.ac.cn>
Date: Thu, 12 Jan 2023 06:48:24 +0000
Subject: [PATCH 231/262] remove unused function

---
 src/Storages/StorageDeltaLake.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/Storages/StorageDeltaLake.h b/src/Storages/StorageDeltaLake.h
index 5915d498a9f..af6485b9a40 100644
--- a/src/Storages/StorageDeltaLake.h
+++ b/src/Storages/StorageDeltaLake.h
@@ -92,8 +92,6 @@ public:
         const std::optional<FormatSettings> & format_settings,
         ContextPtr ctx);
 private:
-    void init();
-
     StorageS3::S3Configuration base_configuration;
     std::shared_ptr<StorageS3> s3engine;
     Poco::Logger * log;

From 0fc279e2bbac06b7d80e933c7ec88307583a8e44 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 12 Jan 2023 08:44:55 +0000
Subject: [PATCH 232/262] Don't allow third node to become leader

---
 .../test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml | 1 +
 .../test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml | 1 +
 .../test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml | 1 +
 3 files changed, 3 insertions(+)

diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml
index d2717283a8d..1778f97ba49 100644
--- a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml
+++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper1.xml
@@ -26,6 +26,7 @@
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>9234</port>
+                <start_as_follower>1</start_as_follower>
             </server>
         </raft_configuration>
     </keeper_server>
diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml
index 5924ee1c2dc..d5280134cd0 100644
--- a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml
+++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper2.xml
@@ -26,6 +26,7 @@
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>9234</port>
+                <start_as_follower>1</start_as_follower>
             </server>
         </raft_configuration>
     </keeper_server>
diff --git a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml
index d261e4f67f3..e4eab72421d 100644
--- a/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml
+++ b/tests/integration/test_keeper_three_nodes_two_alive/configs/enable_keeper3.xml
@@ -26,6 +26,7 @@
                 <id>3</id>
                 <hostname>node3</hostname>
                 <port>9234</port>
+                <start_as_follower>1</start_as_follower>
             </server>
         </raft_configuration>
     </keeper_server>

From 2f0eabab5441aaec64780c7bd6c70e0843ca4ca0 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 12 Jan 2023 11:30:55 +0100
Subject: [PATCH 233/262] Fixed tests

---
 src/Analyzer/Passes/SumIfToCountIfPass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
index 27717fccd78..1faf79e87f9 100644
--- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp
+++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp
@@ -81,7 +81,7 @@ public:
         if (nested_if_function_arguments_nodes.size() != 3)
             return;
 
-        auto & cond_argument = nested_if_function_arguments_nodes[0];
+        const auto & cond_argument = nested_if_function_arguments_nodes[0];
         const auto * if_true_condition_constant_node = nested_if_function_arguments_nodes[1]->as<ConstantNode>();
         const auto * if_false_condition_constant_node = nested_if_function_arguments_nodes[2]->as<ConstantNode>();
 

From 47f41599094342b9cd02cba63417c171715124d5 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 23 Dec 2022 18:45:28 +0100
Subject: [PATCH 234/262] Analyzer support distributed queries processing

---
 src/Analyzer/IQueryTreeNode.cpp               |    9 +-
 src/Analyzer/IQueryTreeNode.h                 |    7 +
 src/Analyzer/Passes/QueryAnalysisPass.cpp     |    4 +-
 src/Interpreters/InterpreterDescribeQuery.cpp |    2 +-
 src/Interpreters/InterpreterExplainQuery.cpp  |    4 +-
 src/Interpreters/InterpreterFactory.cpp       |    4 +-
 .../InterpreterSelectQueryAnalyzer.cpp        |   37 +-
 .../InterpreterSelectQueryAnalyzer.h          |   31 +-
 src/Planner/CollectTableExpressionData.cpp    |    4 -
 src/Planner/Planner.cpp                       | 1690 +++++++++++------
 src/Planner/Planner.h                         |   16 +-
 src/Planner/PlannerJoinTree.cpp               |  308 ++-
 src/Planner/PlannerJoinTree.h                 |   12 +-
 src/Planner/PlannerQueryProcessingInfo.h      |   91 +
 src/Planner/TableExpressionData.h             |    1 +
 .../QueryPlan/DistributedCreateLocalPlan.cpp  |   45 +-
 .../QueryPlan/ReadFromMergeTree.cpp           |    2 +-
 src/Storages/SelectQueryInfo.h                |    9 +
 src/Storages/StorageDistributed.cpp           |   80 +-
 19 files changed, 1638 insertions(+), 718 deletions(-)
 create mode 100644 src/Planner/PlannerQueryProcessingInfo.h

diff --git a/src/Analyzer/IQueryTreeNode.cpp b/src/Analyzer/IQueryTreeNode.cpp
index ea2412eadb2..1970d36a3dd 100644
--- a/src/Analyzer/IQueryTreeNode.cpp
+++ b/src/Analyzer/IQueryTreeNode.cpp
@@ -214,6 +214,11 @@ IQueryTreeNode::Hash IQueryTreeNode::getTreeHash() const
 }
 
 QueryTreeNodePtr IQueryTreeNode::clone() const
+{
+    return cloneAndReplace({});
+}
+
+QueryTreeNodePtr IQueryTreeNode::cloneAndReplace(const ReplacementMap & replacement_map) const
 {
     /** Clone tree with this node as root.
       *
@@ -236,11 +241,11 @@ QueryTreeNodePtr IQueryTreeNode::clone() const
         const auto [node_to_clone, place_for_cloned_node] = nodes_to_clone.back();
         nodes_to_clone.pop_back();
 
-        auto node_clone = node_to_clone->cloneImpl();
+        auto it = replacement_map.find(node_to_clone);
+        auto node_clone = it != replacement_map.end() ? it->second : node_to_clone->cloneImpl();
         *place_for_cloned_node = node_clone;
 
         node_clone->setAlias(node_to_clone->alias);
-        node_clone->setOriginalAST(node_to_clone->original_ast);
         node_clone->children = node_to_clone->children;
         node_clone->weak_pointers = node_to_clone->weak_pointers;
 
diff --git a/src/Analyzer/IQueryTreeNode.h b/src/Analyzer/IQueryTreeNode.h
index 0fed9d36830..8aa834e60b7 100644
--- a/src/Analyzer/IQueryTreeNode.h
+++ b/src/Analyzer/IQueryTreeNode.h
@@ -110,6 +110,13 @@ public:
     /// Get a deep copy of the query tree
     QueryTreeNodePtr clone() const;
 
+    /** Get a deep copy of the query tree.
+      * If node to clone is key in replacement map, then instead of clone it
+      * use value node from replacement map.
+      */
+    using ReplacementMap = std::unordered_map<const IQueryTreeNode *, QueryTreeNodePtr>;
+    QueryTreeNodePtr cloneAndReplace(const ReplacementMap & replacement_map) const;
+
     /// Returns true if node has alias, false otherwise
     bool hasAlias() const
     {
diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp
index 7e2624f318c..1c9dd01e2a5 100644
--- a/src/Analyzer/Passes/QueryAnalysisPass.cpp
+++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp
@@ -1695,7 +1695,7 @@ void QueryAnalyzer::evaluateScalarSubqueryIfNeeded(QueryTreeNodePtr & node, size
     subquery_context->setSettings(subquery_settings);
 
     auto options = SelectQueryOptions(QueryProcessingStage::Complete, subquery_depth, true /*is_subquery*/);
-    auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node, options, subquery_context);
+    auto interpreter = std::make_unique<InterpreterSelectQueryAnalyzer>(node, subquery_context, options);
 
     auto io = interpreter->execute();
 
@@ -2027,7 +2027,7 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con
     auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout);
     auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context);
 
-    return std::make_shared<TableNode>(std::move(storage), storage_lock, storage_snapshot);
+    return std::make_shared<TableNode>(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
 }
 
 /// Resolve identifier from compound expression
diff --git a/src/Interpreters/InterpreterDescribeQuery.cpp b/src/Interpreters/InterpreterDescribeQuery.cpp
index 512f9be6fa1..0b9eea86b46 100644
--- a/src/Interpreters/InterpreterDescribeQuery.cpp
+++ b/src/Interpreters/InterpreterDescribeQuery.cpp
@@ -78,7 +78,7 @@ BlockIO InterpreterDescribeQuery::execute()
         if (settings.allow_experimental_analyzer)
         {
             SelectQueryOptions select_query_options;
-            names_and_types = InterpreterSelectQueryAnalyzer(select_query, select_query_options, current_context).getSampleBlock().getNamesAndTypesList();
+            names_and_types = InterpreterSelectQueryAnalyzer(select_query, current_context, select_query_options).getSampleBlock().getNamesAndTypesList();
         }
         else
         {
diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp
index 2341059a04f..957c3d822f2 100644
--- a/src/Interpreters/InterpreterExplainQuery.cpp
+++ b/src/Interpreters/InterpreterExplainQuery.cpp
@@ -423,7 +423,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
 
             if (getContext()->getSettingsRef().allow_experimental_analyzer)
             {
-                InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext());
+                InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), getContext(), options);
                 context = interpreter.getContext();
                 plan = std::move(interpreter).extractQueryPlan();
             }
@@ -469,7 +469,7 @@ QueryPipeline InterpreterExplainQuery::executeImpl()
 
                 if (getContext()->getSettingsRef().allow_experimental_analyzer)
                 {
-                    InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), options, getContext());
+                    InterpreterSelectQueryAnalyzer interpreter(ast.getExplainedQuery(), getContext(), options);
                     context = interpreter.getContext();
                     plan = std::move(interpreter).extractQueryPlan();
                 }
diff --git a/src/Interpreters/InterpreterFactory.cpp b/src/Interpreters/InterpreterFactory.cpp
index e62fca2916e..4c677ce5e18 100644
--- a/src/Interpreters/InterpreterFactory.cpp
+++ b/src/Interpreters/InterpreterFactory.cpp
@@ -126,7 +126,7 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
     if (query->as<ASTSelectQuery>())
     {
         if (context->getSettingsRef().allow_experimental_analyzer)
-            return std::make_unique<InterpreterSelectQueryAnalyzer>(query, options, context);
+            return std::make_unique<InterpreterSelectQueryAnalyzer>(query, context, options);
 
         /// This is internal part of ASTSelectWithUnionQuery.
         /// Even if there is SELECT without union, it is represented by ASTSelectWithUnionQuery with single ASTSelectQuery as a child.
@@ -137,7 +137,7 @@ std::unique_ptr<IInterpreter> InterpreterFactory::get(ASTPtr & query, ContextMut
         ProfileEvents::increment(ProfileEvents::SelectQuery);
 
         if (context->getSettingsRef().allow_experimental_analyzer)
-            return std::make_unique<InterpreterSelectQueryAnalyzer>(query, options, context);
+            return std::make_unique<InterpreterSelectQueryAnalyzer>(query, context, options);
 
         return std::make_unique<InterpreterSelectWithUnionQuery>(query, context, options);
     }
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
index fb77f0997d8..076d52cab5e 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.cpp
@@ -45,13 +45,17 @@ ASTPtr normalizeAndValidateQuery(const ASTPtr & query)
     }
 }
 
-QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const ContextPtr & context)
+QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const SelectQueryOptions & select_query_options, const ContextPtr & context)
 {
     auto query_tree = buildQueryTree(query, context);
 
     QueryTreePassManager query_tree_pass_manager(context);
     addQueryTreePasses(query_tree_pass_manager);
-    query_tree_pass_manager.run(query_tree);
+
+    if (select_query_options.ignore_ast_optimizations)
+        query_tree_pass_manager.run(query_tree, 1 /*up_to_pass_index*/);
+    else
+        query_tree_pass_manager.run(query_tree);
 
     return query_tree;
 }
@@ -60,24 +64,24 @@ QueryTreeNodePtr buildQueryTreeAndRunPasses(const ASTPtr & query, const ContextP
 
 InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
     const ASTPtr & query_,
-    const SelectQueryOptions & select_query_options_,
-    ContextPtr context_)
+    const ContextPtr & context_,
+    const SelectQueryOptions & select_query_options_)
     : query(normalizeAndValidateQuery(query_))
-    , query_tree(buildQueryTreeAndRunPasses(query, context_))
+    , context(Context::createCopy(context_))
     , select_query_options(select_query_options_)
-    , context(std::move(context_))
+    , query_tree(buildQueryTreeAndRunPasses(query, select_query_options, context))
     , planner(query_tree, select_query_options)
 {
 }
 
 InterpreterSelectQueryAnalyzer::InterpreterSelectQueryAnalyzer(
     const QueryTreeNodePtr & query_tree_,
-    const SelectQueryOptions & select_query_options_,
-    ContextPtr context_)
+    const ContextPtr & context_,
+    const SelectQueryOptions & select_query_options_)
     : query(query_tree_->toAST())
-    , query_tree(query_tree_)
+    , context(Context::createCopy(context_))
     , select_query_options(select_query_options_)
-    , context(std::move(context_))
+    , query_tree(query_tree_)
     , planner(query_tree, select_query_options)
 {
 }
@@ -122,4 +126,17 @@ void InterpreterSelectQueryAnalyzer::extendQueryLogElemImpl(QueryLogElement & el
     elem.query_kind = "Select";
 }
 
+void InterpreterSelectQueryAnalyzer::setMergeTreeReadTaskCallbackAndClientInfo(MergeTreeReadTaskCallback && callback)
+{
+    context->getClientInfo().collaborate_with_initiator = true;
+    context->setMergeTreeReadTaskCallback(std::move(callback));
+}
+
+void InterpreterSelectQueryAnalyzer::setProperClientInfo(size_t replica_number, size_t count_participating_replicas)
+{
+    context->getClientInfo().query_kind = ClientInfo::QueryKind::SECONDARY_QUERY;
+    context->getClientInfo().number_of_current_replica = replica_number;
+    context->getClientInfo().count_participating_replicas = count_participating_replicas;
+}
+
 }
diff --git a/src/Interpreters/InterpreterSelectQueryAnalyzer.h b/src/Interpreters/InterpreterSelectQueryAnalyzer.h
index 04dfe4e0948..4a0346c65bb 100644
--- a/src/Interpreters/InterpreterSelectQueryAnalyzer.h
+++ b/src/Interpreters/InterpreterSelectQueryAnalyzer.h
@@ -3,11 +3,11 @@
 #include <Interpreters/IInterpreter.h>
 #include <Interpreters/SelectQueryOptions.h>
 
-#include <Analyzer/QueryTreePassManager.h>
+#include <Storages/MergeTree/RequestResponse.h>
 #include <Processors/QueryPlan/QueryPlan.h>
-#include <Interpreters/Context_fwd.h>
-
+#include <Analyzer/QueryTreePassManager.h>
 #include <Planner/Planner.h>
+#include <Interpreters/Context_fwd.h>
 
 namespace DB
 {
@@ -17,20 +17,15 @@ class InterpreterSelectQueryAnalyzer : public IInterpreter
 public:
     /// Initialize interpreter with query AST
     InterpreterSelectQueryAnalyzer(const ASTPtr & query_,
-        const SelectQueryOptions & select_query_options_,
-        ContextPtr context_);
+        const ContextPtr & context_,
+        const SelectQueryOptions & select_query_options_);
 
     /// Initialize interpreter with query tree
     InterpreterSelectQueryAnalyzer(const QueryTreeNodePtr & query_tree_,
-        const SelectQueryOptions & select_query_options_,
-        ContextPtr context_);
+        const ContextPtr & context_,
+        const SelectQueryOptions & select_query_options_);
 
-    const ContextPtr & getContext() const
-    {
-        return context;
-    }
-
-    ContextPtr & getContext()
+    ContextPtr getContext() const
     {
         return context;
     }
@@ -51,11 +46,17 @@ public:
 
     void extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr &, ContextPtr) const override;
 
+    /// Set merge tree read task callback in context and set collaborate_with_initiator in client info
+    void setMergeTreeReadTaskCallbackAndClientInfo(MergeTreeReadTaskCallback && callback);
+
+    /// Set number_of_current_replica and count_participating_replicas in client_info
+    void setProperClientInfo(size_t replica_number, size_t count_participating_replicas);
+
 private:
     ASTPtr query;
-    QueryTreeNodePtr query_tree;
+    ContextMutablePtr context;
     SelectQueryOptions select_query_options;
-    ContextPtr context;
+    QueryTreeNodePtr query_tree;
     Planner planner;
 };
 
diff --git a/src/Planner/CollectTableExpressionData.cpp b/src/Planner/CollectTableExpressionData.cpp
index 81ce3d325f7..897959fa456 100644
--- a/src/Planner/CollectTableExpressionData.cpp
+++ b/src/Planner/CollectTableExpressionData.cpp
@@ -17,7 +17,6 @@ namespace DB
 namespace ErrorCodes
 {
     extern const int LOGICAL_ERROR;
-    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace
@@ -104,9 +103,6 @@ void collectTableExpressionData(QueryTreeNodePtr & query_node, PlannerContext &
             bool storage_is_remote = table_function_node->getStorage()->isRemote();
             table_expression_data.setIsRemote(storage_is_remote);
         }
-
-        if (table_expression_data.isRemote())
-            throw Exception(ErrorCodes::UNSUPPORTED_METHOD, "Remote storages are not supported");
     }
 
     CollectSourceColumnsVisitor collect_source_columns_visitor(planner_context);
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index b865e137038..0551429a7d0 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1,7 +1,5 @@
 #include <Planner/Planner.h>
 
-#include <Common/checkStackSize.h>
-
 #include <Core/ProtocolDefines.h>
 
 #include <DataTypes/DataTypeString.h>
@@ -20,6 +18,7 @@
 #include <Processors/QueryPlan/IntersectOrExceptStep.h>
 #include <Processors/QueryPlan/CreatingSetsStep.h>
 #include <Processors/QueryPlan/AggregatingStep.h>
+#include <Processors/QueryPlan/MergingAggregatedStep.h>
 #include <Processors/QueryPlan/SortingStep.h>
 #include <Processors/QueryPlan/FillingStep.h>
 #include <Processors/QueryPlan/LimitStep.h>
@@ -30,6 +29,7 @@
 #include <Processors/QueryPlan/CubeStep.h>
 #include <Processors/QueryPlan/LimitByStep.h>
 #include <Processors/QueryPlan/WindowStep.h>
+#include <Processors/QueryPlan/ReadNothingStep.h>
 #include <QueryPipeline/QueryPipelineBuilder.h>
 
 #include <Interpreters/Context.h>
@@ -47,6 +47,8 @@
 #include <Analyzer/TableFunctionNode.h>
 #include <Analyzer/QueryNode.h>
 #include <Analyzer/UnionNode.h>
+#include <Analyzer/JoinNode.h>
+#include <Analyzer/ArrayJoinNode.h>
 #include <Analyzer/QueryTreeBuilder.h>
 #include <Analyzer/QueryTreePassManager.h>
 #include <Analyzer/AggregationUtils.h>
@@ -65,6 +67,7 @@
 #include <Planner/PlannerJoinTree.h>
 #include <Planner/PlannerExpressionAnalysis.h>
 #include <Planner/CollectColumnIdentifiers.h>
+#include <Planner/PlannerQueryProcessingInfo.h>
 
 namespace DB
 {
@@ -84,7 +87,6 @@ namespace ErrorCodes
   * TODO: Support VIEWs.
   * TODO: JOIN drop unnecessary columns after ON, USING section
   * TODO: Support RBAC. Support RBAC for ALIAS columns
-  * TODO: Support distributed query processing
   * TODO: Support PREWHERE
   * TODO: Support DISTINCT
   * TODO: Support trivial count optimization
@@ -132,35 +134,6 @@ void checkStoragesSupportTransactions(const PlannerContextPtr & planner_context)
     }
 }
 
-void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan, const SelectQueryOptions & select_query_options, const PlannerContextPtr & planner_context)
-{
-    PreparedSets::SubqueriesForSets subqueries_for_sets;
-    const auto & set_key_to_planner_set = planner_context->getRegisteredSets();
-
-    for (const auto & [key, planner_set] : set_key_to_planner_set)
-    {
-        const auto subquery_node = planner_set.getSubqueryNode();
-        if (!subquery_node)
-            continue;
-
-        auto subquery_options = select_query_options.subquery();
-
-        Planner subquery_planner(
-            subquery_node,
-            subquery_options,
-            planner_context->getGlobalPlannerContext());
-        subquery_planner.buildQueryPlanIfNeeded();
-
-        SubqueryForSet subquery_for_set;
-        subquery_for_set.set = planner_set.getSet();
-        subquery_for_set.source = std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan());
-
-        subqueries_for_sets.emplace(key, std::move(subquery_for_set));
-    }
-
-    addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext());
-}
-
 /// Extend lifetime of query context, storages, and table locks
 void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const PlannerContextPtr & planner_context)
 {
@@ -180,6 +153,775 @@ void extendQueryContextAndStoragesLifetime(QueryPlan & query_plan, const Planner
     }
 }
 
+class QueryAnalysisResult
+{
+public:
+    QueryAnalysisResult(const QueryTreeNodePtr & query_tree, const PlannerQueryProcessingInfo & query_processing_info, const PlannerContextPtr & planner_context)
+    {
+        const auto & query_node = query_tree->as<QueryNode &>();
+        const auto & query_context = planner_context->getQueryContext();
+        const auto & settings = query_context->getSettingsRef();
+
+        aggregate_overflow_row = query_node.isGroupByWithTotals() && settings.max_rows_to_group_by
+            && settings.group_by_overflow_mode == OverflowMode::ANY && settings.totals_mode != TotalsMode::AFTER_HAVING_EXCLUSIVE;
+        aggregate_final = query_processing_info.getToStage() > QueryProcessingStage::WithMergeableState
+            && !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube();
+        aggregation_should_produce_results_in_order_of_bucket_number = query_processing_info.getToStage() == QueryProcessingStage::WithMergeableState &&
+            settings.distributed_aggregation_memory_efficient;
+
+        query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree);
+        query_has_with_totals_in_any_subquery_in_join_tree = queryHasWithTotalsInAnySubqueryInJoinTree(query_tree);
+
+        sort_description = extractSortDescription(query_node.getOrderByNode(), *planner_context);
+
+        if (query_node.hasLimit())
+        {
+            /// Constness of limit is validated during query analysis stage
+            limit_length = query_node.getLimit()->as<ConstantNode &>().getValue().safeGet<UInt64>();
+        }
+
+        if (query_node.hasOffset())
+        {
+            /// Constness of offset is validated during query analysis stage
+            limit_offset = query_node.getOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
+        }
+    }
+
+    bool aggregate_overflow_row = false;
+    bool aggregate_final = false;
+    bool aggregation_should_produce_results_in_order_of_bucket_number = false;
+    bool query_has_array_join_in_join_tree = false;
+    bool query_has_with_totals_in_any_subquery_in_join_tree = false;
+    SortDescription sort_description;
+    UInt64 limit_length = 0;
+    UInt64 limit_offset = 0;
+};
+
+void addExpressionStep(QueryPlan & query_plan,
+    const ActionsDAGPtr & expression_actions,
+    const std::string & step_description,
+    std::vector<ActionsDAGPtr> & result_actions_to_execute)
+{
+    result_actions_to_execute.push_back(expression_actions);
+    auto expression_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), expression_actions);
+    expression_step->setStepDescription(step_description);
+    query_plan.addStep(std::move(expression_step));
+}
+
+void addFilterStep(QueryPlan & query_plan,
+    const FilterAnalysisResult & filter_analysis_result,
+    const std::string & step_description,
+    std::vector<ActionsDAGPtr> & result_actions_to_execute)
+{
+    result_actions_to_execute.push_back(filter_analysis_result.filter_actions);
+    auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
+        filter_analysis_result.filter_actions,
+        filter_analysis_result.filter_column_name,
+        filter_analysis_result.remove_filter_column);
+    where_step->setStepDescription(step_description);
+    query_plan.addStep(std::move(where_step));
+}
+
+Aggregator::Params getAggregatorParams(const PlannerContextPtr & planner_context,
+    const AggregationAnalysisResult & aggregation_analysis_result,
+    const QueryAnalysisResult & query_analysis_result,
+    const SelectQueryInfo & select_query_info,
+    bool aggregate_descriptions_remove_arguments = false)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    const Settings & settings = query_context->getSettingsRef();
+
+    const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams(
+        select_query_info.query,
+        settings.collect_hash_table_stats_during_aggregation,
+        settings.max_entries_for_hash_table_stats,
+        settings.max_size_to_preallocate_for_aggregation);
+
+    auto aggregate_descriptions = aggregation_analysis_result.aggregate_descriptions;
+    if (aggregate_descriptions_remove_arguments)
+    {
+        for (auto & aggregate_description : aggregate_descriptions)
+            aggregate_description.argument_names.clear();
+    }
+
+    Aggregator::Params aggregator_params = Aggregator::Params(
+        aggregation_analysis_result.aggregation_keys,
+        aggregate_descriptions,
+        query_analysis_result.aggregate_overflow_row,
+        settings.max_rows_to_group_by,
+        settings.group_by_overflow_mode,
+        settings.group_by_two_level_threshold,
+        settings.group_by_two_level_threshold_bytes,
+        settings.max_bytes_before_external_group_by,
+        settings.empty_result_for_aggregation_by_empty_set
+            || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && aggregation_analysis_result.aggregation_keys.empty()
+                && aggregation_analysis_result.group_by_with_constant_keys),
+        query_context->getTempDataOnDisk(),
+        settings.max_threads,
+        settings.min_free_disk_space_for_temporary_data,
+        settings.compile_aggregate_expressions,
+        settings.min_count_to_compile_aggregate_expression,
+        settings.max_block_size,
+        settings.enable_software_prefetch_in_aggregation,
+        /* only_merge */ false,
+        stats_collecting_params);
+
+    return aggregator_params;
+}
+
+void addAggregationStep(QueryPlan & query_plan,
+    const AggregationAnalysisResult & aggregation_analysis_result,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const SelectQueryInfo & select_query_info)
+{
+    const Settings & settings = planner_context->getQueryContext()->getSettingsRef();
+    auto aggregator_params = getAggregatorParams(planner_context, aggregation_analysis_result, query_analysis_result, select_query_info);
+
+    SortDescription sort_description_for_merging;
+    SortDescription group_by_sort_description;
+
+    auto merge_threads = settings.max_threads;
+    auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads
+        ? static_cast<size_t>(settings.aggregation_memory_efficient_merge_threads)
+        : static_cast<size_t>(settings.max_threads);
+
+    bool storage_has_evenly_distributed_read = false;
+    const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData();
+
+    if (table_expression_node_to_data.size() == 1)
+    {
+        auto it = table_expression_node_to_data.begin();
+        const auto & table_expression_node = it->first;
+        if (const auto * table_node = table_expression_node->as<TableNode>())
+            storage_has_evenly_distributed_read = table_node->getStorage()->hasEvenlyDistributedRead();
+        else if (const auto * table_function_node = table_expression_node->as<TableFunctionNode>())
+            storage_has_evenly_distributed_read = table_function_node->getStorageOrThrow()->hasEvenlyDistributedRead();
+    }
+
+    auto aggregating_step = std::make_unique<AggregatingStep>(
+        query_plan.getCurrentDataStream(),
+        aggregator_params,
+        aggregation_analysis_result.grouping_sets_parameters_list,
+        query_analysis_result.aggregate_final,
+        settings.max_block_size,
+        settings.aggregation_in_order_max_block_bytes,
+        merge_threads,
+        temporary_data_merge_threads,
+        storage_has_evenly_distributed_read,
+        settings.group_by_use_nulls,
+        std::move(sort_description_for_merging),
+        std::move(group_by_sort_description),
+        query_analysis_result.aggregation_should_produce_results_in_order_of_bucket_number,
+        settings.enable_memory_bound_merging_of_aggregation_results);
+    query_plan.addStep(std::move(aggregating_step));
+}
+
+void addMergingAggregatedStep(QueryPlan & query_plan,
+    const AggregationAnalysisResult & aggregation_analysis_result,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+
+    /** There are two modes of distributed aggregation.
+      *
+      * 1. In different threads read from the remote servers blocks.
+      * Save all the blocks in the RAM. Merge blocks.
+      * If the aggregation is two-level - parallelize to the number of buckets.
+      *
+      * 2. In one thread, read blocks from different servers in order.
+      * RAM stores only one block from each server.
+      * If the aggregation is a two-level aggregation, we consistently merge the blocks of each next level.
+      *
+      * The second option consumes less memory (up to 256 times less)
+      * in the case of two-level aggregation, which is used for large results after GROUP BY,
+      * but it can work more slowly.
+      */
+
+    Aggregator::Params params(aggregation_analysis_result.aggregation_keys,
+        aggregation_analysis_result.aggregate_descriptions,
+        query_analysis_result.aggregate_overflow_row,
+        settings.max_threads,
+        settings.max_block_size);
+
+    bool is_remote_storage = false;
+
+    const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData();
+    if (table_expression_node_to_data.size() == 1)
+    {
+        auto it = table_expression_node_to_data.begin();
+        is_remote_storage = it->second.isRemote();
+    }
+
+    SortDescription group_by_sort_description;
+
+    auto merging_aggregated = std::make_unique<MergingAggregatedStep>(
+        query_plan.getCurrentDataStream(),
+        params,
+        query_analysis_result.aggregate_final,
+        settings.distributed_aggregation_memory_efficient && is_remote_storage,
+        settings.max_threads,
+        settings.aggregation_memory_efficient_merge_threads,
+        query_analysis_result.aggregation_should_produce_results_in_order_of_bucket_number,
+        settings.max_block_size,
+        settings.aggregation_in_order_max_block_bytes,
+        std::move(group_by_sort_description),
+        settings.enable_memory_bound_merging_of_aggregation_results);
+    query_plan.addStep(std::move(merging_aggregated));
+}
+
+void addTotalsHavingStep(QueryPlan & query_plan,
+    const PlannerExpressionsAnalysisResult & expression_analysis_result,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const QueryNode & query_node,
+    std::vector<ActionsDAGPtr> & result_actions_to_execute)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+
+    const auto & aggregation_analysis_result = expression_analysis_result.getAggregation();
+    const auto & having_analysis_result = expression_analysis_result.getHaving();
+    bool totals_having_final = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube();
+
+    if (having_analysis_result.filter_actions)
+        result_actions_to_execute.push_back(having_analysis_result.filter_actions);
+
+    auto totals_having_step = std::make_unique<TotalsHavingStep>(
+        query_plan.getCurrentDataStream(),
+        aggregation_analysis_result.aggregate_descriptions,
+        query_analysis_result.aggregate_overflow_row,
+        having_analysis_result.filter_actions,
+        having_analysis_result.filter_column_name,
+        having_analysis_result.remove_filter_column,
+        settings.totals_mode,
+        settings.totals_auto_threshold,
+        totals_having_final);
+    query_plan.addStep(std::move(totals_having_step));
+}
+
+void addCubeOrRollupStepIfNeeded(QueryPlan & query_plan,
+    const AggregationAnalysisResult & aggregation_analysis_result,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const SelectQueryInfo & select_query_info,
+    const QueryNode & query_node)
+{
+    if (!query_node.isGroupByWithCube() && !query_node.isGroupByWithRollup())
+        return;
+
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+
+    auto aggregator_params = getAggregatorParams(planner_context,
+        aggregation_analysis_result,
+        query_analysis_result,
+        select_query_info,
+        true /*aggregate_descriptions_remove_arguments*/);
+
+    if (query_node.isGroupByWithRollup())
+    {
+        auto rollup_step = std::make_unique<RollupStep>(
+            query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
+        query_plan.addStep(std::move(rollup_step));
+    }
+    else if (query_node.isGroupByWithCube())
+    {
+        auto cube_step = std::make_unique<CubeStep>(
+            query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
+        query_plan.addStep(std::move(cube_step));
+    }
+}
+
+void addDistinctStep(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const Names & column_names,
+    const QueryNode & query_node,
+    bool before_order,
+    bool pre_distinct)
+{
+    const Settings & settings = planner_context->getQueryContext()->getSettingsRef();
+
+    UInt64 limit_offset = query_analysis_result.limit_offset;
+    UInt64 limit_length = query_analysis_result.limit_length;
+
+    UInt64 limit_hint_for_distinct = 0;
+
+    /** If after this stage of DISTINCT
+      * 1. ORDER BY is not executed.
+      * 2. There is no LIMIT BY.
+      * Then you can get no more than limit_length + limit_offset of different rows.
+      */
+    if ((!query_node.hasOrderBy() || !before_order) && !query_node.hasLimitBy())
+    {
+        if (limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
+            limit_hint_for_distinct = limit_length + limit_offset;
+    }
+
+    SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
+
+    auto distinct_step = std::make_unique<DistinctStep>(
+        query_plan.getCurrentDataStream(),
+        limits,
+        limit_hint_for_distinct,
+        column_names,
+        pre_distinct,
+        settings.optimize_distinct_in_order);
+
+    if (pre_distinct)
+        distinct_step->setStepDescription("Preliminary DISTINCT");
+    else
+        distinct_step->setStepDescription("DISTINCT");
+
+    query_plan.addStep(std::move(distinct_step));
+}
+
+void addSortingStep(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const QueryNode & query_node)
+{
+    const auto & sort_description = query_analysis_result.sort_description;
+    UInt64 limit_length = query_analysis_result.limit_length;
+    UInt64 limit_offset = query_analysis_result.limit_offset;
+
+    UInt64 partial_sorting_limit = 0;
+
+    /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN
+    if (limit_length != 0 && !query_node.isDistinct() && !query_node.hasLimitBy() && !query_node.isLimitWithTies() &&
+        !query_analysis_result.query_has_array_join_in_join_tree &&
+        limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
+    {
+        partial_sorting_limit = limit_length + limit_offset;
+    }
+
+    const auto & query_context = planner_context->getQueryContext();
+    const Settings & settings = query_context->getSettingsRef();
+    SortingStep::Settings sort_settings(*query_context);
+
+    auto sorting_step = std::make_unique<SortingStep>(
+        query_plan.getCurrentDataStream(),
+        sort_description,
+        partial_sorting_limit,
+        sort_settings,
+        settings.optimize_sorting_by_input_stream_properties);
+    sorting_step->setStepDescription("Sorting for ORDER BY");
+    query_plan.addStep(std::move(sorting_step));
+}
+
+void addMergeSortingStep(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const std::string & description)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+
+    const auto & sort_description = query_analysis_result.sort_description;
+    UInt64 limit_length = query_analysis_result.limit_length;
+    const auto max_block_size = settings.max_block_size;
+
+    auto merging_sorted = std::make_unique<SortingStep>(query_plan.getCurrentDataStream(), sort_description, max_block_size, limit_length);
+    merging_sorted->setStepDescription("Merge sorted streams " + description);
+    query_plan.addStep(std::move(merging_sorted));
+}
+
+void addWithFillStepIfNeeded(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const QueryNode & query_node)
+{
+    const auto & sort_description = query_analysis_result.sort_description;
+
+    NameSet column_names_with_fill;
+    SortDescription fill_description;
+
+    for (const auto & description : sort_description)
+    {
+        if (description.with_fill)
+        {
+            fill_description.push_back(description);
+            column_names_with_fill.insert(description.column_name);
+        }
+    }
+
+    if (fill_description.empty())
+        return;
+
+    InterpolateDescriptionPtr interpolate_description;
+
+    if (query_node.hasInterpolate())
+    {
+        auto interpolate_actions_dag = std::make_shared<ActionsDAG>();
+
+        auto & interpolate_list_node = query_node.getInterpolate()->as<ListNode &>();
+        auto & interpolate_list_nodes = interpolate_list_node.getNodes();
+
+        if (interpolate_list_nodes.empty())
+        {
+            auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
+            for (auto & query_plan_column : query_plan_columns)
+            {
+                if (column_names_with_fill.contains(query_plan_column.name))
+                    continue;
+
+                const auto * input_action_node = &interpolate_actions_dag->addInput(query_plan_column);
+                interpolate_actions_dag->getOutputs().push_back(input_action_node);
+            }
+        }
+        else
+        {
+            for (auto & interpolate_node : interpolate_list_nodes)
+            {
+                auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
+
+                PlannerActionsVisitor planner_actions_visitor(planner_context);
+                auto expression_to_interpolate_expression_nodes
+                    = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression());
+                auto interpolate_expression_nodes
+                    = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression());
+
+                if (expression_to_interpolate_expression_nodes.size() != 1)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node");
+
+                if (interpolate_expression_nodes.size() != 1)
+                    throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node");
+
+                const auto * expression_to_interpolate = expression_to_interpolate_expression_nodes[0];
+                const auto & expression_to_interpolate_name = expression_to_interpolate->result_name;
+
+                const auto * interpolate_expression = interpolate_expression_nodes[0];
+                if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type))
+                {
+                    auto cast_type_name = expression_to_interpolate->result_type->getName();
+                    Field cast_type_constant_value(cast_type_name);
+
+                    ColumnWithTypeAndName column;
+                    column.name = calculateConstantActionNodeName(cast_type_name);
+                    column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
+                    column.type = std::make_shared<DataTypeString>();
+
+                    const auto * cast_type_constant_node = &interpolate_actions_dag->addColumn(std::move(column));
+
+                    FunctionCastBase::Diagnostic diagnostic = {interpolate_expression->result_name, interpolate_expression->result_name};
+                    FunctionOverloadResolverPtr func_builder_cast
+                        = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
+
+                    ActionsDAG::NodeRawConstPtrs children = {interpolate_expression, cast_type_constant_node};
+                    interpolate_expression = &interpolate_actions_dag->addFunction(
+                        func_builder_cast, std::move(children), interpolate_expression->result_name);
+                }
+
+                const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name);
+                interpolate_actions_dag->getOutputs().push_back(alias_node);
+            }
+
+            interpolate_actions_dag->removeUnusedActions();
+        }
+
+        Aliases empty_aliases;
+        interpolate_description = std::make_shared<InterpolateDescription>(std::move(interpolate_actions_dag), empty_aliases);
+    }
+
+    auto filling_step = std::make_unique<FillingStep>(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description);
+    query_plan.addStep(std::move(filling_step));
+}
+
+void addLimitByStep(QueryPlan & query_plan,
+    const LimitByAnalysisResult & limit_by_analysis_result,
+    const QueryNode & query_node)
+{
+    /// Constness of LIMIT BY limit is validated during query analysis stage
+    UInt64 limit_by_limit = query_node.getLimitByLimit()->as<ConstantNode &>().getValue().safeGet<UInt64>();
+    UInt64 limit_by_offset = 0;
+
+    if (query_node.hasLimitByOffset())
+    {
+        /// Constness of LIMIT BY offset is validated during query analysis stage
+        limit_by_offset = query_node.getLimitByOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
+    }
+
+    auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentDataStream(),
+        limit_by_limit,
+        limit_by_offset,
+        limit_by_analysis_result.limit_by_column_names);
+    query_plan.addStep(std::move(limit_by_step));
+}
+
+void addPreliminaryLimitStep(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    bool do_not_skip_offset)
+{
+    UInt64 limit_offset = query_analysis_result.limit_offset;
+    UInt64 limit_length = query_analysis_result.limit_length;
+
+    if (do_not_skip_offset)
+    {
+        if (limit_length > std::numeric_limits<UInt64>::max() - limit_offset)
+            return;
+
+        limit_length += limit_offset;
+        limit_offset = 0;
+    }
+
+    const auto & query_context = planner_context->getQueryContext();
+    const Settings & settings = query_context->getSettingsRef();
+
+    auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings.exact_rows_before_limit);
+    if (do_not_skip_offset)
+        limit->setStepDescription("preliminary LIMIT (with OFFSET)");
+    else
+        limit->setStepDescription("preliminary LIMIT (without OFFSET)");
+
+    query_plan.addStep(std::move(limit));
+}
+
+bool addPreliminaryLimitOptimizationStepIfNeeded(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr planner_context,
+    const PlannerQueryProcessingInfo & query_processing_info,
+    const QueryTreeNodePtr & query_tree)
+{
+    const auto & query_node = query_tree->as<QueryNode &>();
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+    const auto & sort_description = query_analysis_result.sort_description;
+
+    bool has_withfill = false;
+
+    for (const auto & desc : sort_description)
+    {
+        if (desc.with_fill)
+        {
+            has_withfill = true;
+            break;
+        }
+    }
+
+    bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation;
+    bool apply_prelimit = apply_limit &&
+        query_node.hasLimit() &&
+        !query_node.isLimitWithTies() &&
+        !query_analysis_result.query_has_with_totals_in_any_subquery_in_join_tree &&
+        !query_analysis_result.query_has_array_join_in_join_tree &&
+        !query_node.isDistinct() &&
+        !query_node.hasLimitBy() &&
+        !settings.extremes &&
+        !has_withfill;
+    bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
+    if (apply_prelimit)
+    {
+        addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, /* do_not_skip_offset= */!apply_offset);
+        return true;
+    }
+
+    return false;
+}
+
+/** For distributed query processing, add preliminary sort or distinct or limit
+  * for first stage of query processing on shard, if there is no GROUP BY, HAVING,
+  * WINDOW functions.
+  */
+void addPreliminarySortOrDistinctOrLimitStepsIfNeeded(QueryPlan & query_plan,
+    const PlannerExpressionsAnalysisResult & expressions_analysis_result,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const PlannerQueryProcessingInfo & query_processing_info,
+    const QueryTreeNodePtr & query_tree,
+    std::vector<ActionsDAGPtr> & result_actions_to_execute)
+{
+    const auto & query_node = query_tree->as<QueryNode &>();
+
+    if (query_processing_info.isSecondStage() ||
+        expressions_analysis_result.hasAggregation() ||
+        expressions_analysis_result.hasHaving() ||
+        expressions_analysis_result.hasWindow())
+        return;
+
+    if (expressions_analysis_result.hasSort())
+        addSortingStep(query_plan, query_analysis_result, planner_context, query_node);
+
+    /** For DISTINCT step, pre_distinct = false, because if we have limit and distinct,
+      * we need to merge streams to one and calculate overall distinct.
+      * Otherwise we can take several equal values from different streams
+      * according to limit and skip some distinct values.
+      */
+    if (query_node.hasLimit() && query_node.isDistinct())
+    {
+        addDistinctStep(query_plan,
+            query_analysis_result,
+            planner_context,
+            expressions_analysis_result.getProjection().projection_column_names,
+            query_node,
+            false /*before_order*/,
+            false /*pre_distinct*/);
+    }
+
+    if (expressions_analysis_result.hasLimitBy())
+    {
+        const auto & limit_by_analysis_result = expressions_analysis_result.getLimitBy();
+        addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute);
+        addLimitByStep(query_plan, limit_by_analysis_result, query_node);
+    }
+
+    if (query_node.hasLimit())
+        addPreliminaryLimitStep(query_plan, query_analysis_result, planner_context, true /*do_not_skip_offset*/);
+}
+
+void addWindowSteps(QueryPlan & query_plan,
+    const PlannerContextPtr & planner_context,
+    const WindowAnalysisResult & window_analysis_result)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+
+    auto window_descriptions = window_analysis_result.window_descriptions;
+    sortWindowDescriptions(window_descriptions);
+
+    size_t window_descriptions_size = window_descriptions.size();
+
+    for (size_t i = 0; i < window_descriptions_size; ++i)
+    {
+        const auto & window_description = window_descriptions[i];
+
+        /** We don't need to sort again if the input from previous window already
+          * has suitable sorting. Also don't create sort steps when there are no
+          * columns to sort by, because the sort nodes are confused by this. It
+          * happens in case of `over ()`.
+          */
+        if (!window_description.full_sort_description.empty() &&
+            (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description)))
+        {
+            SortingStep::Settings sort_settings(*query_context);
+
+            auto sorting_step = std::make_unique<SortingStep>(
+                query_plan.getCurrentDataStream(),
+                window_description.full_sort_description,
+                0 /*limit*/,
+                sort_settings,
+                settings.optimize_sorting_by_input_stream_properties);
+            sorting_step->setStepDescription("Sorting for window '" + window_description.window_name + "'");
+            query_plan.addStep(std::move(sorting_step));
+        }
+
+        auto window_step
+            = std::make_unique<WindowStep>(query_plan.getCurrentDataStream(), window_description, window_description.window_functions);
+        window_step->setStepDescription("Window step for window '" + window_description.window_name + "'");
+        query_plan.addStep(std::move(window_step));
+    }
+}
+
+void addLimitStep(QueryPlan & query_plan,
+    const QueryAnalysisResult & query_analysis_result,
+    const PlannerContextPtr & planner_context,
+    const QueryNode & query_node)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+    bool always_read_till_end = settings.exact_rows_before_limit;
+    bool limit_with_ties = query_node.isLimitWithTies();
+
+    /** Special cases:
+      *
+      * 1. If there is WITH TOTALS and there is no ORDER BY, then read the data to the end,
+      *  otherwise TOTALS is counted according to incomplete data.
+      *
+      * 2. If there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels,
+      *  then when using LIMIT, you should read the data to the end, rather than cancel the query earlier,
+      *  because if you cancel the query, we will not get `totals` data from the remote server.
+      */
+    if (query_node.isGroupByWithTotals() && !query_node.hasOrderBy())
+        always_read_till_end = true;
+
+    if (!query_node.isGroupByWithTotals() && query_analysis_result.query_has_with_totals_in_any_subquery_in_join_tree)
+        always_read_till_end = true;
+
+    SortDescription limit_with_ties_sort_description;
+
+    if (query_node.isLimitWithTies())
+    {
+        /// Validated during parser stage
+        if (!query_node.hasOrderBy())
+            throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY");
+
+        limit_with_ties_sort_description = query_analysis_result.sort_description;
+    }
+
+    UInt64 limit_length = query_analysis_result.limit_length;
+    UInt64 limit_offset = query_analysis_result.limit_offset;
+
+    auto limit = std::make_unique<LimitStep>(
+        query_plan.getCurrentDataStream(),
+        limit_length,
+        limit_offset,
+        always_read_till_end,
+        limit_with_ties,
+        limit_with_ties_sort_description);
+
+    if (limit_with_ties)
+        limit->setStepDescription("LIMIT WITH TIES");
+
+    query_plan.addStep(std::move(limit));
+}
+
+void addExtremesStepIfNeeded(QueryPlan & query_plan, const PlannerContextPtr & planner_context)
+{
+    const auto & query_context = planner_context->getQueryContext();
+    if (!query_context->getSettingsRef().extremes)
+        return;
+
+    auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentDataStream());
+    query_plan.addStep(std::move(extremes_step));
+}
+
+void addOffsetStep(QueryPlan & query_plan, const QueryAnalysisResult & query_analysis_result)
+{
+    UInt64 limit_offset = query_analysis_result.limit_offset;
+    auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), limit_offset);
+    query_plan.addStep(std::move(offsets_step));
+}
+
+void addBuildSubqueriesForSetsStepIfNeeded(QueryPlan & query_plan,
+    const SelectQueryOptions & select_query_options,
+    const PlannerContextPtr & planner_context,
+    const std::vector<ActionsDAGPtr> & result_actions_to_execute)
+{
+    PreparedSets::SubqueriesForSets subqueries_for_sets;
+
+    for (const auto & actions_to_execute : result_actions_to_execute)
+    {
+        for (const auto & node : actions_to_execute->getNodes())
+        {
+            const auto & set_key = node.result_name;
+            const auto * planner_set = planner_context->getSetOrNull(set_key);
+            if (!planner_set)
+                continue;
+
+            if (planner_set->getSet()->isCreated() || !planner_set->getSubqueryNode())
+                continue;
+
+            auto subquery_options = select_query_options.subquery();
+            Planner subquery_planner(
+                planner_set->getSubqueryNode(),
+                subquery_options,
+                planner_context->getGlobalPlannerContext());
+            subquery_planner.buildQueryPlanIfNeeded();
+
+            SubqueryForSet subquery_for_set;
+            subquery_for_set.set = planner_set->getSet();
+            subquery_for_set.source = std::make_unique<QueryPlan>(std::move(subquery_planner).extractQueryPlan());
+
+            subqueries_for_sets.emplace(set_key, std::move(subquery_for_set));
+        }
+    }
+
+    addCreatingSetsStep(query_plan, std::move(subqueries_for_sets), planner_context->getQueryContext());
+}
+
 }
 
 PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node,
@@ -223,19 +965,23 @@ PlannerContextPtr buildPlannerContext(const QueryTreeNodePtr & query_tree_node,
 }
 
 Planner::Planner(const QueryTreeNodePtr & query_tree_,
-    const SelectQueryOptions & select_query_options_)
+    const SelectQueryOptions & select_query_options_,
+    PlannerConfiguration planner_configuration_)
     : query_tree(query_tree_)
     , select_query_options(select_query_options_)
     , planner_context(buildPlannerContext(query_tree, select_query_options, std::make_shared<GlobalPlannerContext>()))
+    , planner_configuration(std::move(planner_configuration_))
 {
 }
 
 Planner::Planner(const QueryTreeNodePtr & query_tree_,
     const SelectQueryOptions & select_query_options_,
-    GlobalPlannerContextPtr global_planner_context_)
+    GlobalPlannerContextPtr global_planner_context_,
+    PlannerConfiguration planner_configuration_)
     : query_tree(query_tree_)
     , select_query_options(select_query_options_)
     , planner_context(buildPlannerContext(query_tree_, select_query_options, std::move(global_planner_context_)))
+    , planner_configuration(std::move(planner_configuration_))
 {
 }
 
@@ -244,105 +990,114 @@ void Planner::buildQueryPlanIfNeeded()
     if (query_plan.isInitialized())
         return;
 
-    auto query_context = planner_context->getQueryContext();
+    if (query_tree->as<UnionNode>())
+        buildUnionNodeQueryPlan();
+    else
+        buildQueryNodePlan();
 
-    if (auto * union_query_tree = query_tree->as<UnionNode>())
+    extendQueryContextAndStoragesLifetime(query_plan, planner_context);
+}
+
+void Planner::buildUnionNodeQueryPlan()
+{
+    const auto & union_node = query_tree->as<UnionNode &>();
+    auto union_mode = union_node.getUnionMode();
+    if (union_mode == SelectUnionMode::UNION_DEFAULT || union_mode == SelectUnionMode::EXCEPT_DEFAULT
+        || union_mode == SelectUnionMode::INTERSECT_DEFAULT)
+        throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode must be initialized");
+
+    const auto & union_queries_nodes = union_node.getQueries().getNodes();
+    size_t queries_size = union_queries_nodes.size();
+
+    std::vector<std::unique_ptr<QueryPlan>> query_plans;
+    query_plans.reserve(queries_size);
+
+    Blocks query_plans_headers;
+    query_plans_headers.reserve(queries_size);
+
+    for (const auto & query_node : union_queries_nodes)
     {
-        auto union_mode = union_query_tree->getUnionMode();
-        if (union_mode == SelectUnionMode::UNION_DEFAULT ||
-            union_mode == SelectUnionMode::EXCEPT_DEFAULT ||
-            union_mode == SelectUnionMode::INTERSECT_DEFAULT)
-            throw Exception(ErrorCodes::BAD_ARGUMENTS, "UNION mode must be initialized");
-
-        size_t queries_size = union_query_tree->getQueries().getNodes().size();
-
-        std::vector<std::unique_ptr<QueryPlan>> query_plans;
-        query_plans.reserve(queries_size);
-
-        Blocks query_plans_headers;
-        query_plans_headers.reserve(queries_size);
-
-        for (auto & query_node : union_query_tree->getQueries().getNodes())
-        {
-            Planner query_planner(query_node, select_query_options);
-            query_planner.buildQueryPlanIfNeeded();
-            auto query_node_plan = std::make_unique<QueryPlan>(std::move(query_planner).extractQueryPlan());
-            query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
-            query_plans.push_back(std::move(query_node_plan));
-        }
-
-        Block union_common_header = buildCommonHeaderForUnion(query_plans_headers);
-        DataStreams query_plans_streams;
-        query_plans_streams.reserve(query_plans.size());
-
-        for (auto & query_node_plan : query_plans)
-        {
-            if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header))
-            {
-                query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
-                continue;
-            }
-
-            auto actions_dag = ActionsDAG::makeConvertingActions(
-                    query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
-                    union_common_header.getColumnsWithTypeAndName(),
-                    ActionsDAG::MatchColumnsMode::Position);
-            auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentDataStream(), std::move(actions_dag));
-            converting_step->setStepDescription("Conversion before UNION");
-            query_node_plan->addStep(std::move(converting_step));
-
-            query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
-        }
-
-        const auto & settings = query_context->getSettingsRef();
-        auto max_threads = settings.max_threads;
-
-        bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT ||
-            union_mode == SelectUnionMode::EXCEPT_DISTINCT;
-
-        if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT)
-        {
-            auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
-            query_plan.unitePlans(std::move(union_step), std::move(query_plans));
-        }
-        else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT ||
-            union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT)
-        {
-            IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN;
-
-            if (union_mode == SelectUnionMode::INTERSECT_ALL)
-                intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL;
-            else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT)
-                intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT;
-            else if (union_mode == SelectUnionMode::EXCEPT_ALL)
-                intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL;
-            else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT)
-                intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT;
-
-            auto union_step = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
-            query_plan.unitePlans(std::move(union_step), std::move(query_plans));
-        }
-
-        if (is_distinct)
-        {
-            /// Add distinct transform
-            SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
-
-            auto distinct_step = std::make_unique<DistinctStep>(
-                query_plan.getCurrentDataStream(),
-                limits,
-                0 /*limit hint*/,
-                query_plan.getCurrentDataStream().header.getNames(),
-                false /*pre distinct*/,
-                settings.optimize_distinct_in_order);
-
-            query_plan.addStep(std::move(distinct_step));
-        }
-
-        return;
+        Planner query_planner(query_node, select_query_options);
+        query_planner.buildQueryPlanIfNeeded();
+        auto query_node_plan = std::make_unique<QueryPlan>(std::move(query_planner).extractQueryPlan());
+        query_plans_headers.push_back(query_node_plan->getCurrentDataStream().header);
+        query_plans.push_back(std::move(query_node_plan));
     }
 
+    Block union_common_header = buildCommonHeaderForUnion(query_plans_headers);
+    DataStreams query_plans_streams;
+    query_plans_streams.reserve(query_plans.size());
+
+    for (auto & query_node_plan : query_plans)
+    {
+        if (blocksHaveEqualStructure(query_node_plan->getCurrentDataStream().header, union_common_header))
+        {
+            query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
+            continue;
+        }
+
+        auto actions_dag = ActionsDAG::makeConvertingActions(
+            query_node_plan->getCurrentDataStream().header.getColumnsWithTypeAndName(),
+            union_common_header.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Position);
+        auto converting_step = std::make_unique<ExpressionStep>(query_node_plan->getCurrentDataStream(), std::move(actions_dag));
+        converting_step->setStepDescription("Conversion before UNION");
+        query_node_plan->addStep(std::move(converting_step));
+
+        query_plans_streams.push_back(query_node_plan->getCurrentDataStream());
+    }
+
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+    auto max_threads = settings.max_threads;
+
+    bool is_distinct = union_mode == SelectUnionMode::UNION_DISTINCT || union_mode == SelectUnionMode::INTERSECT_DISTINCT
+        || union_mode == SelectUnionMode::EXCEPT_DISTINCT;
+
+    if (union_mode == SelectUnionMode::UNION_ALL || union_mode == SelectUnionMode::UNION_DISTINCT)
+    {
+        auto union_step = std::make_unique<UnionStep>(std::move(query_plans_streams), max_threads);
+        query_plan.unitePlans(std::move(union_step), std::move(query_plans));
+    }
+    else if (union_mode == SelectUnionMode::INTERSECT_ALL || union_mode == SelectUnionMode::INTERSECT_DISTINCT
+        || union_mode == SelectUnionMode::EXCEPT_ALL || union_mode == SelectUnionMode::EXCEPT_DISTINCT)
+    {
+        IntersectOrExceptStep::Operator intersect_or_except_operator = IntersectOrExceptStep::Operator::UNKNOWN;
+
+        if (union_mode == SelectUnionMode::INTERSECT_ALL)
+            intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_ALL;
+        else if (union_mode == SelectUnionMode::INTERSECT_DISTINCT)
+            intersect_or_except_operator = IntersectOrExceptStep::Operator::INTERSECT_DISTINCT;
+        else if (union_mode == SelectUnionMode::EXCEPT_ALL)
+            intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_ALL;
+        else if (union_mode == SelectUnionMode::EXCEPT_DISTINCT)
+            intersect_or_except_operator = IntersectOrExceptStep::Operator::EXCEPT_DISTINCT;
+
+        auto union_step
+            = std::make_unique<IntersectOrExceptStep>(std::move(query_plans_streams), intersect_or_except_operator, max_threads);
+        query_plan.unitePlans(std::move(union_step), std::move(query_plans));
+    }
+
+    if (is_distinct)
+    {
+        /// Add distinct transform
+        SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
+
+        auto distinct_step = std::make_unique<DistinctStep>(
+            query_plan.getCurrentDataStream(),
+            limits,
+            0 /*limit hint*/,
+            query_plan.getCurrentDataStream().header.getNames(),
+            false /*pre distinct*/,
+            settings.optimize_distinct_in_order);
+        query_plan.addStep(std::move(distinct_step));
+    }
+}
+
+void Planner::buildQueryNodePlan()
+{
     auto & query_node = query_tree->as<QueryNode &>();
+    const auto & query_context = planner_context->getQueryContext();
 
     if (query_node.hasPrewhere())
     {
@@ -364,490 +1119,297 @@ void Planner::buildQueryPlanIfNeeded()
     SelectQueryInfo select_query_info;
     select_query_info.original_query = queryNodeToSelectQuery(query_tree);
     select_query_info.query = select_query_info.original_query;
+    select_query_info.query_tree = query_tree;
     select_query_info.planner_context = planner_context;
 
-    auto current_storage_limits = storage_limits;
-    current_storage_limits.push_back(buildStorageLimits(*query_context, select_query_options));
-    select_query_info.storage_limits = std::make_shared<StorageLimitsList>(std::move(current_storage_limits));
+    StorageLimitsList current_storage_limits = storage_limits;
+    select_query_info.local_storage_limits = buildStorageLimits(*query_context, select_query_options);
+    current_storage_limits.push_back(select_query_info.local_storage_limits);
+    select_query_info.storage_limits = std::make_shared<StorageLimitsList>(current_storage_limits);
+    select_query_info.has_order_by = query_node.hasOrderBy();
+    auto aggregate_function_nodes = collectAggregateFunctionNodes(query_tree);
+    auto window_function_nodes = collectWindowFunctionNodes(query_tree);
+    select_query_info.has_window = !window_function_nodes.empty();
+    select_query_info.has_aggregates = !aggregate_function_nodes.empty();
+    select_query_info.need_aggregate = query_node.hasGroupBy() || !aggregate_function_nodes.empty();
 
-    collectTableExpressionData(query_tree, *planner_context);
     checkStoragesSupportTransactions(planner_context);
-
+    collectTableExpressionData(query_tree, *planner_context);
     collectSets(query_tree, *planner_context);
 
-    auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
+    QueryProcessingStage::Enum from_stage = QueryProcessingStage::FetchColumns;
 
-    query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, top_level_identifiers, planner_context);
+    if (planner_configuration.only_analyze)
+    {
+        Block join_tree_block;
+
+        for (const auto & [_, table_expression_data] : planner_context->getTableExpressionNodeToData())
+        {
+            for (const auto & [column_name, column] : table_expression_data.getColumnNameToColumn())
+            {
+                const auto & column_identifier = table_expression_data.getColumnIdentifierOrThrow(column_name);
+                join_tree_block.insert(ColumnWithTypeAndName(column.type, column_identifier));
+            }
+        }
+
+        auto read_nothing_step = std::make_unique<ReadNothingStep>(join_tree_block);
+        read_nothing_step->setStepDescription("Read nothing");
+        query_plan.addStep(std::move(read_nothing_step));
+    }
+    else
+    {
+        auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context);
+        auto join_tree_query_plan = buildJoinTreeQueryPlan(query_tree,
+            select_query_info,
+            select_query_options,
+            top_level_identifiers,
+            planner_context);
+        from_stage = join_tree_query_plan.from_stage;
+        query_plan = std::move(join_tree_query_plan.query_plan);
+    }
+
+    if (select_query_options.to_stage == QueryProcessingStage::FetchColumns)
+        return;
+
+    PlannerQueryProcessingInfo query_processing_info(from_stage, select_query_options.to_stage);
+    QueryAnalysisResult query_analysis_result(query_tree, query_processing_info, planner_context);
     auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context);
 
-    if (expression_analysis_result.hasWhere())
+    std::vector<ActionsDAGPtr> result_actions_to_execute;
+
+    if (query_processing_info.isIntermediateStage())
     {
-        const auto & where_analysis_result = expression_analysis_result.getWhere();
-        auto where_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
-            where_analysis_result.filter_actions,
-            where_analysis_result.filter_column_name,
-            where_analysis_result.remove_filter_column);
-        where_step->setStepDescription("WHERE");
-        query_plan.addStep(std::move(where_step));
-    }
+        addPreliminarySortOrDistinctOrLimitStepsIfNeeded(query_plan,
+            expression_analysis_result,
+            query_analysis_result,
+            planner_context,
+            query_processing_info,
+            query_tree,
+            result_actions_to_execute);
 
-    bool having_executed = false;
-
-    if (expression_analysis_result.hasAggregation())
-    {
-        const auto & aggregation_analysis_result = expression_analysis_result.getAggregation();
-
-        if (aggregation_analysis_result.before_aggregation_actions)
+        if (expression_analysis_result.hasAggregation())
         {
-            auto expression_before_aggregation = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), aggregation_analysis_result.before_aggregation_actions);
-            expression_before_aggregation->setStepDescription("Before GROUP BY");
-            query_plan.addStep(std::move(expression_before_aggregation));
-        }
-
-        const Settings & settings = planner_context->getQueryContext()->getSettingsRef();
-
-        const auto stats_collecting_params = Aggregator::Params::StatsCollectingParams(
-            select_query_info.query,
-            settings.collect_hash_table_stats_during_aggregation,
-            settings.max_entries_for_hash_table_stats,
-            settings.max_size_to_preallocate_for_aggregation);
-
-        bool aggregate_overflow_row =
-            query_node.isGroupByWithTotals() &&
-            settings.max_rows_to_group_by &&
-            settings.group_by_overflow_mode == OverflowMode::ANY &&
-            settings.totals_mode != TotalsMode::AFTER_HAVING_EXCLUSIVE;
-
-        Aggregator::Params aggregator_params = Aggregator::Params(
-            aggregation_analysis_result.aggregation_keys,
-            aggregation_analysis_result.aggregate_descriptions,
-            aggregate_overflow_row,
-            settings.max_rows_to_group_by,
-            settings.group_by_overflow_mode,
-            settings.group_by_two_level_threshold,
-            settings.group_by_two_level_threshold_bytes,
-            settings.max_bytes_before_external_group_by,
-            settings.empty_result_for_aggregation_by_empty_set
-                || (settings.empty_result_for_aggregation_by_constant_keys_on_empty_set && aggregation_analysis_result.aggregation_keys.empty()
-                    && aggregation_analysis_result.group_by_with_constant_keys),
-            planner_context->getQueryContext()->getTempDataOnDisk(),
-            settings.max_threads,
-            settings.min_free_disk_space_for_temporary_data,
-            settings.compile_aggregate_expressions,
-            settings.min_count_to_compile_aggregate_expression,
-            settings.max_block_size,
-            settings.enable_software_prefetch_in_aggregation,
-            /* only_merge */ false,
-            stats_collecting_params
-        );
-
-        SortDescription group_by_sort_description;
-        SortDescription sort_description_for_merging;
-
-        auto merge_threads = settings.max_threads;
-        auto temporary_data_merge_threads = settings.aggregation_memory_efficient_merge_threads
-            ? static_cast<size_t>(settings.aggregation_memory_efficient_merge_threads)
-            : static_cast<size_t>(settings.max_threads);
-
-        bool storage_has_evenly_distributed_read = false;
-        const auto & table_expression_node_to_data = planner_context->getTableExpressionNodeToData();
-
-        if (table_expression_node_to_data.size() == 1)
-        {
-            auto it = table_expression_node_to_data.begin();
-            const auto & table_expression_node = it->first;
-            if (const auto * table_node = table_expression_node->as<TableNode>())
-                storage_has_evenly_distributed_read = table_node->getStorage()->hasEvenlyDistributedRead();
-            else if (const auto * table_function_node = table_expression_node->as<TableFunctionNode>())
-                storage_has_evenly_distributed_read = table_function_node->getStorageOrThrow()->hasEvenlyDistributedRead();
-        }
-
-        const bool should_produce_results_in_order_of_bucket_number
-            = select_query_options.to_stage == QueryProcessingStage::WithMergeableState && settings.distributed_aggregation_memory_efficient;
-
-        bool aggregate_final =
-            select_query_options.to_stage > QueryProcessingStage::WithMergeableState &&
-            !query_node.isGroupByWithTotals() && !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube();
-
-        auto aggregating_step = std::make_unique<AggregatingStep>(
-            query_plan.getCurrentDataStream(),
-            aggregator_params,
-            aggregation_analysis_result.grouping_sets_parameters_list,
-            aggregate_final,
-            settings.max_block_size,
-            settings.aggregation_in_order_max_block_bytes,
-            merge_threads,
-            temporary_data_merge_threads,
-            storage_has_evenly_distributed_read,
-            settings.group_by_use_nulls,
-            std::move(sort_description_for_merging),
-            std::move(group_by_sort_description),
-            should_produce_results_in_order_of_bucket_number,
-            settings.enable_memory_bound_merging_of_aggregation_results);
-        query_plan.addStep(std::move(aggregating_step));
-
-        if (query_node.isGroupByWithTotals())
-        {
-            const auto & having_analysis_result = expression_analysis_result.getHaving();
-            bool final = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube();
-            having_executed = true;
-
-            auto totals_having_step = std::make_unique<TotalsHavingStep>(
-                query_plan.getCurrentDataStream(),
-                aggregation_analysis_result.aggregate_descriptions,
-                aggregate_overflow_row,
-                having_analysis_result.filter_actions,
-                having_analysis_result.filter_column_name,
-                having_analysis_result.remove_filter_column,
-                settings.totals_mode,
-                settings.totals_auto_threshold,
-                final);
-
-            query_plan.addStep(std::move(totals_having_step));
-        }
-
-        if (query_node.isGroupByWithRollup())
-        {
-            auto rollup_step = std::make_unique<RollupStep>(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
-            query_plan.addStep(std::move(rollup_step));
-        }
-        else if (query_node.isGroupByWithCube())
-        {
-            auto cube_step = std::make_unique<CubeStep>(query_plan.getCurrentDataStream(), std::move(aggregator_params), true /*final*/, settings.group_by_use_nulls);
-            query_plan.addStep(std::move(cube_step));
+            const auto & aggregation_analysis_result = expression_analysis_result.getAggregation();
+            addMergingAggregatedStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context);
         }
     }
 
-    if (!having_executed && expression_analysis_result.hasHaving())
+    if (query_processing_info.isFirstStage())
     {
-        const auto & having_analysis_result = expression_analysis_result.getHaving();
+        if (expression_analysis_result.hasWhere())
+            addFilterStep(query_plan, expression_analysis_result.getWhere(), "WHERE", result_actions_to_execute);
 
-        auto having_step = std::make_unique<FilterStep>(query_plan.getCurrentDataStream(),
-            having_analysis_result.filter_actions,
-            having_analysis_result.filter_column_name,
-            having_analysis_result.remove_filter_column);
-        having_step->setStepDescription("HAVING");
-        query_plan.addStep(std::move(having_step));
-    }
-
-    if (expression_analysis_result.hasWindow())
-    {
-        const auto & window_analysis_result = expression_analysis_result.getWindow();
-
-        if (window_analysis_result.before_window_actions)
+        if (expression_analysis_result.hasAggregation())
         {
-            auto expression_step_before_window = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), window_analysis_result.before_window_actions);
-            expression_step_before_window->setStepDescription("Before WINDOW");
-            query_plan.addStep(std::move(expression_step_before_window));
+            const auto & aggregation_analysis_result = expression_analysis_result.getAggregation();
+            if (aggregation_analysis_result.before_aggregation_actions)
+                addExpressionStep(query_plan, aggregation_analysis_result.before_aggregation_actions, "Before GROUP BY", result_actions_to_execute);
+
+            addAggregationStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info);
         }
 
-        auto window_descriptions = window_analysis_result.window_descriptions;
-        sortWindowDescriptions(window_descriptions);
-
-        size_t window_descriptions_size = window_descriptions.size();
-
-        const auto & settings = query_context->getSettingsRef();
-        for (size_t i = 0; i < window_descriptions_size; ++i)
+        /** If we have aggregation, we can't execute any later-stage
+          * expressions on shards, neither "Before WINDOW" nor "Before ORDER BY"
+          */
+        if (!expression_analysis_result.hasAggregation())
         {
-            const auto & window_description = window_descriptions[i];
-
-            /** We don't need to sort again if the input from previous window already
-              * has suitable sorting. Also don't create sort steps when there are no
-              * columns to sort by, because the sort nodes are confused by this. It
-              * happens in case of `over ()`.
-              */
-            if (!window_description.full_sort_description.empty() &&
-                (i == 0 || !sortDescriptionIsPrefix(window_description.full_sort_description, window_descriptions[i - 1].full_sort_description)))
+            if (expression_analysis_result.hasWindow())
             {
-                SortingStep::Settings sort_settings(*query_context);
+                /** Window functions must be executed on initiator (second_stage).
+                  * ORDER BY and DISTINCT might depend on them, so if we have
+                  * window functions, we can't execute ORDER BY and DISTINCT
+                  * now, on shard (first_stage).
+                  */
+                const auto & window_analysis_result = expression_analysis_result.getWindow();
+                if (window_analysis_result.before_window_actions)
+                    addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before WINDOW", result_actions_to_execute);
+            }
+            else
+            {
+                /** There are no window functions, so we can execute the
+                  * Projection expressions, preliminary DISTINCT and before ORDER BY expressions
+                  * now, on shards (first_stage).
+                  */
+                const auto & projection_analysis_result = expression_analysis_result.getProjection();
+                addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute);
 
-                auto sorting_step = std::make_unique<SortingStep>(
-                    query_plan.getCurrentDataStream(),
-                    window_description.full_sort_description,
-                    0 /*limit*/,
-                    sort_settings,
-                    settings.optimize_sorting_by_input_stream_properties);
+                if (query_node.isDistinct())
+                {
+                    addDistinctStep(query_plan,
+                        query_analysis_result,
+                        planner_context,
+                        expression_analysis_result.getProjection().projection_column_names,
+                        query_node,
+                        true /*before_order*/,
+                        true /*pre_distinct*/);
+                }
 
-                sorting_step->setStepDescription("Sorting for window '" + window_description.window_name + "'");
-                query_plan.addStep(std::move(sorting_step));
+                if (expression_analysis_result.hasSort())
+                {
+                    const auto & sort_analysis_result = expression_analysis_result.getSort();
+                    addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute);
+                }
+            }
+        }
+
+        addPreliminarySortOrDistinctOrLimitStepsIfNeeded(query_plan,
+            expression_analysis_result,
+            query_analysis_result,
+            planner_context,
+            query_processing_info,
+            query_tree,
+            result_actions_to_execute);
+    }
+
+    if (query_processing_info.isSecondStage() || query_processing_info.isFromAggregationState())
+    {
+        if (query_processing_info.isFromAggregationState())
+        {
+            /// Aggregation was performed on remote shards
+        }
+        else if (expression_analysis_result.hasAggregation())
+        {
+            const auto & aggregation_analysis_result = expression_analysis_result.getAggregation();
+
+            if (!query_processing_info.isFirstStage())
+            {
+                addMergingAggregatedStep(query_plan, aggregation_analysis_result, query_analysis_result, planner_context);
             }
 
-            auto window_step = std::make_unique<WindowStep>(query_plan.getCurrentDataStream(), window_description, window_description.window_functions);
-            window_step->setStepDescription("Window step for window '" + window_description.window_name + "'");
-            query_plan.addStep(std::move(window_step));
+            bool having_executed = false;
+
+            if (query_node.isGroupByWithTotals())
+            {
+                addTotalsHavingStep(query_plan, expression_analysis_result, query_analysis_result, planner_context, query_node, result_actions_to_execute);
+                having_executed = true;
+            }
+
+            addCubeOrRollupStepIfNeeded(query_plan, aggregation_analysis_result, query_analysis_result, planner_context, select_query_info, query_node);
+
+            if (!having_executed && expression_analysis_result.hasHaving())
+                addFilterStep(query_plan, expression_analysis_result.getHaving(), "HAVING", result_actions_to_execute);
         }
-    }
 
-    const auto & projection_analysis_result = expression_analysis_result.getProjection();
-    auto expression_step_projection = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), projection_analysis_result.projection_actions);
-    expression_step_projection->setStepDescription("Projection");
-    query_plan.addStep(std::move(expression_step_projection));
+        if (query_processing_info.isFromAggregationState())
+        {
+            if (expression_analysis_result.hasWindow())
+                throw Exception(ErrorCodes::NOT_IMPLEMENTED,
+                    "Window functions does not support processing from WithMergeableStateAfterAggregation");
+        }
+        else if (expression_analysis_result.hasWindow() || expression_analysis_result.hasAggregation())
+        {
+            if (expression_analysis_result.hasWindow())
+            {
+                const auto & window_analysis_result = expression_analysis_result.getWindow();
+                if (expression_analysis_result.hasAggregation())
+                    addExpressionStep(query_plan, window_analysis_result.before_window_actions, "Before window functions", result_actions_to_execute);
 
-    UInt64 limit_offset = 0;
-    if (query_node.hasOffset())
-    {
-        /// Constness of offset is validated during query analysis stage
-        limit_offset = query_node.getOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
-    }
+                addWindowSteps(query_plan, planner_context, window_analysis_result);
+            }
 
-    UInt64 limit_length = 0;
+            const auto & projection_analysis_result = expression_analysis_result.getProjection();
+            addExpressionStep(query_plan, projection_analysis_result.projection_actions, "Projection", result_actions_to_execute);
 
-    if (query_node.hasLimit())
-    {
-        /// Constness of limit is validated during query analysis stage
-        limit_length = query_node.getLimit()->as<ConstantNode &>().getValue().safeGet<UInt64>();
-    }
+            if (query_node.isDistinct())
+            {
+                addDistinctStep(query_plan,
+                    query_analysis_result,
+                    planner_context,
+                    expression_analysis_result.getProjection().projection_column_names,
+                    query_node,
+                    true /*before_order*/,
+                    true /*pre_distinct*/);
+            }
 
-    if (query_node.isDistinct())
-    {
-        const Settings & settings = planner_context->getQueryContext()->getSettingsRef();
-        UInt64 limit_hint_for_distinct = 0;
-        bool pre_distinct = true;
-
-        SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
-        bool no_order_by = !query_node.hasOrderBy();
-
-        /** If after this stage of DISTINCT ORDER BY is not executed,
-          * then you can get no more than limit_length + limit_offset of different rows.
-          */
-        if (no_order_by && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
-            limit_hint_for_distinct = limit_length + limit_offset;
-
-        auto distinct_step = std::make_unique<DistinctStep>(
-            query_plan.getCurrentDataStream(),
-            limits,
-            limit_hint_for_distinct,
-            projection_analysis_result.projection_column_names,
-            pre_distinct,
-            settings.optimize_distinct_in_order);
-
-        if (pre_distinct)
-            distinct_step->setStepDescription("Preliminary DISTINCT");
+            if (expression_analysis_result.hasSort())
+            {
+                const auto & sort_analysis_result = expression_analysis_result.getSort();
+                addExpressionStep(query_plan, sort_analysis_result.before_order_by_actions, "Before ORDER BY", result_actions_to_execute);
+            }
+        }
         else
-            distinct_step->setStepDescription("DISTINCT");
-
-        query_plan.addStep(std::move(distinct_step));
-    }
-
-    if (expression_analysis_result.hasSort())
-    {
-        const auto & sort_analysis_result = expression_analysis_result.getSort();
-        auto expression_step_before_order_by = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), sort_analysis_result.before_order_by_actions);
-        expression_step_before_order_by->setStepDescription("Before ORDER BY");
-        query_plan.addStep(std::move(expression_step_before_order_by));
-    }
-
-    QueryPlanStepPtr filling_step;
-    SortDescription sort_description;
-
-    if (query_node.hasOrderBy())
-    {
-        sort_description = extractSortDescription(query_node.getOrderByNode(), *planner_context);
-
-        bool query_has_array_join_in_join_tree = queryHasArrayJoinInJoinTree(query_tree);
-
-        UInt64 partial_sorting_limit = 0;
-
-        /// Partial sort can be done if there is LIMIT, but no DISTINCT, LIMIT WITH TIES, LIMIT BY, ARRAY JOIN
-        if (limit_length != 0 && !query_node.isDistinct() && !query_node.hasLimitBy() && !query_node.isLimitWithTies() &&
-            !query_has_array_join_in_join_tree && limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
         {
-            partial_sorting_limit = limit_length + limit_offset;
+            /// There are no aggregation or windows, all expressions before ORDER BY executed on shards
         }
 
-        const Settings & settings = query_context->getSettingsRef();
-
-        SortingStep::Settings sort_settings(*query_context);
-
-        /// Merge the sorted blocks
-        auto sorting_step = std::make_unique<SortingStep>(
-            query_plan.getCurrentDataStream(),
-            sort_description,
-            partial_sorting_limit,
-            sort_settings,
-            settings.optimize_sorting_by_input_stream_properties);
-
-        sorting_step->setStepDescription("Sorting for ORDER BY");
-        query_plan.addStep(std::move(sorting_step));
-
-        NameSet column_names_with_fill;
-        SortDescription fill_description;
-        for (auto & description : sort_description)
+        if (expression_analysis_result.hasSort())
         {
-            if (description.with_fill)
-            {
-                fill_description.push_back(description);
-                column_names_with_fill.insert(description.column_name);
-            }
+            /** If there is an ORDER BY for distributed query processing,
+              * but there is no aggregation, then on the remote servers ORDER BY was made
+              * and we merge the sorted streams from remote servers.
+              *
+              * Also in case of remote servers was process the query up to WithMergeableStateAfterAggregationAndLimit
+              * (distributed_group_by_no_merge=2 or optimize_distributed_group_by_sharding_key=1 takes place),
+              * then merge the sorted streams is enough, since remote servers already did full ORDER BY.
+              */
+            if (query_processing_info.isFromAggregationState())
+                addMergeSortingStep(query_plan, query_analysis_result, planner_context, "after aggregation stage for ORDER BY");
+            else if (!query_processing_info.isFirstStage()
+                && !expression_analysis_result.hasAggregation()
+                && !expression_analysis_result.hasWindow()
+                && !(query_node.isGroupByWithTotals() && !query_analysis_result.aggregate_final))
+                addMergeSortingStep(query_plan, query_analysis_result, planner_context, "for ORDER BY, without aggregation");
+            else
+                addSortingStep(query_plan, query_analysis_result, planner_context, query_node);
         }
 
-        if (!fill_description.empty())
-        {
-            InterpolateDescriptionPtr interpolate_description;
-
-            if (query_node.hasInterpolate())
-            {
-                auto interpolate_actions_dag = std::make_shared<ActionsDAG>();
-
-                auto & interpolate_list_node = query_node.getInterpolate()->as<ListNode &>();
-                auto & interpolate_list_nodes = interpolate_list_node.getNodes();
-
-                if (interpolate_list_nodes.empty())
-                {
-                    auto query_plan_columns = query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
-                    for (auto & query_plan_column : query_plan_columns)
-                    {
-                        if (column_names_with_fill.contains(query_plan_column.name))
-                            continue;
-
-                        const auto * input_action_node = &interpolate_actions_dag->addInput(query_plan_column);
-                        interpolate_actions_dag->getOutputs().push_back(input_action_node);
-                    }
-                }
-                else
-                {
-                    for (auto & interpolate_node : interpolate_list_nodes)
-                    {
-                        auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
-
-                        PlannerActionsVisitor planner_actions_visitor(planner_context);
-                        auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression());
-                        auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression());
-
-                        if (expression_to_interpolate_expression_nodes.size() != 1)
-                            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node");
-
-                        if (interpolate_expression_nodes.size() != 1)
-                            throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node");
-
-                        const auto * expression_to_interpolate = expression_to_interpolate_expression_nodes[0];
-                        const auto & expression_to_interpolate_name = expression_to_interpolate->result_name;
-
-                        const auto * interpolate_expression = interpolate_expression_nodes[0];
-                        if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type))
-                        {
-                            auto cast_type_name = expression_to_interpolate->result_type->getName();
-                            Field cast_type_constant_value(cast_type_name);
-
-                            ColumnWithTypeAndName column;
-                            column.name = calculateConstantActionNodeName(cast_type_name);
-                            column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
-                            column.type = std::make_shared<DataTypeString>();
-
-                            const auto * cast_type_constant_node = &interpolate_actions_dag->addColumn(std::move(column));
-
-                            FunctionCastBase::Diagnostic diagnostic = {interpolate_expression->result_name, interpolate_expression->result_name};
-                            FunctionOverloadResolverPtr func_builder_cast
-                                = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
-
-                            ActionsDAG::NodeRawConstPtrs children = {interpolate_expression, cast_type_constant_node};
-                            interpolate_expression = &interpolate_actions_dag->addFunction(func_builder_cast, std::move(children), interpolate_expression->result_name);
-                        }
-
-                        const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name);
-                        interpolate_actions_dag->getOutputs().push_back(alias_node);
-                    }
-
-                    interpolate_actions_dag->removeUnusedActions();
-                }
-
-                Aliases empty_aliases;
-                interpolate_description = std::make_shared<InterpolateDescription>(std::move(interpolate_actions_dag), empty_aliases);
-            }
-
-            filling_step = std::make_unique<FillingStep>(query_plan.getCurrentDataStream(), std::move(fill_description), interpolate_description);
-        }
-    }
-
-    if (expression_analysis_result.hasLimitBy())
-    {
-        const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy();
-        auto expression_step_before_limit_by = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), limit_by_analysis_result.before_limit_by_actions);
-        expression_step_before_limit_by->setStepDescription("Before LIMIT BY");
-        query_plan.addStep(std::move(expression_step_before_limit_by));
-
-        /// Constness of LIMIT BY limit is validated during query analysis stage
-        UInt64 limit_by_limit = query_node.getLimitByLimit()->as<ConstantNode &>().getValue().safeGet<UInt64>();
-        UInt64 limit_by_offset = 0;
-
-        if (query_node.hasLimitByOffset())
-        {
-            /// Constness of LIMIT BY offset is validated during query analysis stage
-            limit_by_offset = query_node.getLimitByOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
-        }
-
-        auto limit_by_step = std::make_unique<LimitByStep>(query_plan.getCurrentDataStream(),
-            limit_by_limit,
-            limit_by_offset,
-            limit_by_analysis_result.limit_by_column_names);
-        query_plan.addStep(std::move(limit_by_step));
-    }
-
-    if (filling_step)
-        query_plan.addStep(std::move(filling_step));
-
-    if (query_context->getSettingsRef().extremes)
-    {
-        auto extremes_step = std::make_unique<ExtremesStep>(query_plan.getCurrentDataStream());
-        query_plan.addStep(std::move(extremes_step));
-    }
-
-    if (query_node.hasLimit())
-    {
-        const Settings & settings = query_context->getSettingsRef();
-        bool always_read_till_end = settings.exact_rows_before_limit;
-        bool limit_with_ties = query_node.isLimitWithTies();
-
-        /** Special cases:
-          *
-          * 1. If there is WITH TOTALS and there is no ORDER BY, then read the data to the end,
-          *  otherwise TOTALS is counted according to incomplete data.
-          *
-          * 2. If there is no WITH TOTALS and there is a subquery in FROM, and there is WITH TOTALS on one of the levels,
-          *  then when using LIMIT, you should read the data to the end, rather than cancel the query earlier,
-          *  because if you cancel the query, we will not get `totals` data from the remote server.
+        /** Optimization if there are several sources and there is LIMIT, then first apply the preliminary LIMIT,
+          * limiting the number of rows in each up to `offset + limit`.
           */
-        if (query_node.isGroupByWithTotals() && !query_node.hasOrderBy())
-            always_read_till_end = true;
+        bool applied_prelimit = addPreliminaryLimitOptimizationStepIfNeeded(query_plan,
+            query_analysis_result,
+            planner_context,
+            query_processing_info,
+            query_tree);
 
-        if (!query_node.isGroupByWithTotals() && queryHasWithTotalsInAnySubqueryInJoinTree(query_tree))
-            always_read_till_end = true;
-
-        SortDescription limit_with_ties_sort_description;
-
-        if (query_node.isLimitWithTies())
+        //// If there was more than one stream, then DISTINCT needs to be performed once again after merging all streams.
+        if (!query_processing_info.isFromAggregationState() && query_node.isDistinct())
         {
-            /// Validated during parser stage
-            if (!query_node.hasOrderBy())
-                throw Exception(ErrorCodes::LOGICAL_ERROR, "LIMIT WITH TIES without ORDER BY");
-
-            limit_with_ties_sort_description = sort_description;
+            addDistinctStep(query_plan,
+                query_analysis_result,
+                planner_context,
+                expression_analysis_result.getProjection().projection_column_names,
+                query_node,
+                false /*before_order*/,
+                false /*pre_distinct*/);
         }
 
-        auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(),
-            limit_length,
-            limit_offset,
-            always_read_till_end,
-            limit_with_ties,
-            limit_with_ties_sort_description);
+        if (!query_processing_info.isFromAggregationState() && expression_analysis_result.hasLimitBy())
+        {
+            const auto & limit_by_analysis_result = expression_analysis_result.getLimitBy();
+            addExpressionStep(query_plan, limit_by_analysis_result.before_limit_by_actions, "Before LIMIT BY", result_actions_to_execute);
+            addLimitByStep(query_plan, limit_by_analysis_result, query_node);
+        }
 
-        if (limit_with_ties)
-            limit->setStepDescription("LIMIT WITH TIES");
+        addWithFillStepIfNeeded(query_plan, query_analysis_result, planner_context, query_node);
 
-        query_plan.addStep(std::move(limit));
-    }
-    else if (query_node.hasOffset())
-    {
-        auto offsets_step = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), limit_offset);
-        query_plan.addStep(std::move(offsets_step));
+        bool apply_offset = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregationAndLimit;
+
+        if (query_node.hasLimit() && query_node.isLimitWithTies() && apply_offset)
+            addLimitStep(query_plan, query_analysis_result, planner_context, query_node);
+
+        addExtremesStepIfNeeded(query_plan, planner_context);
+
+        bool limit_applied = applied_prelimit || (query_node.isLimitWithTies() && apply_offset);
+        bool apply_limit = query_processing_info.getToStage() != QueryProcessingStage::WithMergeableStateAfterAggregation;
+
+        /** Limit is no longer needed if there is prelimit.
+          *
+          * That LIMIT cannot be applied if OFFSET should not be applied, since LIMIT will apply OFFSET too.
+          * This is the case for various optimizations for distributed queries,
+          * and when LIMIT cannot be applied it will be applied on the initiator anyway.
+          */
+        if (query_node.hasLimit() && apply_limit && !limit_applied && apply_offset)
+            addLimitStep(query_plan, query_analysis_result, planner_context, query_node);
+
+        if (apply_offset && query_node.hasOffset())
+            addOffsetStep(query_plan, query_analysis_result);
+
+        const auto & projection_analysis_result = expression_analysis_result.getProjection();
+        addExpressionStep(query_plan, projection_analysis_result.project_names_actions, "Project names", result_actions_to_execute);
     }
 
-    auto projection_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), projection_analysis_result.project_names_actions);
-    projection_step->setStepDescription("Project names");
-    query_plan.addStep(std::move(projection_step));
-
-    addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context);
-    extendQueryContextAndStoragesLifetime(query_plan, planner_context);
+    addBuildSubqueriesForSetsStepIfNeeded(query_plan, select_query_options, planner_context, result_actions_to_execute);
 }
 
 void Planner::addStorageLimits(const StorageLimitsList & limits)
diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h
index 1de3e0efded..462532b068f 100644
--- a/src/Planner/Planner.h
+++ b/src/Planner/Planner.h
@@ -16,17 +16,24 @@ using GlobalPlannerContextPtr = std::shared_ptr<GlobalPlannerContext>;
 class PlannerContext;
 using PlannerContextPtr = std::shared_ptr<PlannerContext>;
 
+struct PlannerConfiguration
+{
+    bool only_analyze = false;
+};
+
 class Planner
 {
 public:
     /// Initialize planner with query tree after analysis phase
     Planner(const QueryTreeNodePtr & query_tree_,
-        const SelectQueryOptions & select_query_options_);
+        const SelectQueryOptions & select_query_options_,
+        PlannerConfiguration planner_configuration_ = {});
 
     /// Initialize planner with query tree after query analysis phase and global planner context
     Planner(const QueryTreeNodePtr & query_tree_,
         const SelectQueryOptions & select_query_options_,
-        GlobalPlannerContextPtr global_planner_context_);
+        GlobalPlannerContextPtr global_planner_context_,
+        PlannerConfiguration planner_configuration_ = {});
 
     const QueryPlan & getQueryPlan() const
     {
@@ -48,10 +55,15 @@ public:
     void addStorageLimits(const StorageLimitsList & limits);
 
 private:
+    void buildUnionNodeQueryPlan();
+
+    void buildQueryNodePlan();
+
     QueryTreeNodePtr query_tree;
     QueryPlan query_plan;
     SelectQueryOptions select_query_options;
     PlannerContextPtr planner_context;
+    PlannerConfiguration planner_configuration;
     StorageLimitsList storage_limits;
 };
 
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index c2c26b2e3b8..fe280cdec28 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -11,6 +11,7 @@
 #include <Storages/IStorage.h>
 #include <Storages/StorageDictionary.h>
 
+#include <Analyzer/ConstantNode.h>
 #include <Analyzer/ColumnNode.h>
 #include <Analyzer/TableNode.h>
 #include <Analyzer/TableFunctionNode.h>
@@ -18,6 +19,7 @@
 #include <Analyzer/UnionNode.h>
 #include <Analyzer/JoinNode.h>
 #include <Analyzer/ArrayJoinNode.h>
+#include <Analyzer/Utils.h>
 
 #include <Processors/Sources/NullSource.h>
 #include <Processors/QueryPlan/SortingStep.h>
@@ -49,6 +51,9 @@ namespace ErrorCodes
     extern const int NOT_IMPLEMENTED;
     extern const int SYNTAX_ERROR;
     extern const int ACCESS_DENIED;
+    extern const int PARAMETER_OUT_OF_BOUND;
+    extern const int TOO_MANY_COLUMNS;
+    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace
@@ -139,11 +144,17 @@ NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage
     return result;
 }
 
-QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
-    SelectQueryInfo & select_query_info,
+JoinTreeQueryPlan buildQueryPlanForTableExpression(const QueryTreeNodePtr & table_expression,
+    const SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
-    PlannerContextPtr & planner_context)
+    PlannerContextPtr & planner_context,
+    bool is_single_table_expression)
 {
+    const auto & query_context = planner_context->getQueryContext();
+    const auto & settings = query_context->getSettingsRef();
+
+    QueryProcessingStage::Enum from_stage = QueryProcessingStage::Enum::FetchColumns;
+
     auto * table_node = table_expression->as<TableNode>();
     auto * table_function_node = table_expression->as<TableFunctionNode>();
     auto * query_node = table_expression->as<QueryNode>();
@@ -161,14 +172,93 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
         auto table_expression_query_info = select_query_info;
         table_expression_query_info.table_expression = table_expression;
 
+        size_t max_streams = settings.max_threads;
+        size_t max_threads_execute_query = settings.max_threads;
+
+        /** With distributed query processing, almost no computations are done in the threads,
+          * but wait and receive data from remote servers.
+          * If we have 20 remote servers, and max_threads = 8, then it would not be efficient to
+          * connect and ask only 8 servers at a time.
+          * To simultaneously query more remote servers,
+          * instead of max_threads, max_distributed_connections is used.
+          */
+        bool is_remote = table_expression_data.isRemote();
+        if (is_remote)
+        {
+            max_streams = settings.max_distributed_connections;
+            max_threads_execute_query = settings.max_distributed_connections;
+        }
+
+        UInt64 max_block_size = settings.max_block_size;
+
+        auto & main_query_node = select_query_info.query_tree->as<QueryNode &>();
+
+        if (is_single_table_expression)
+        {
+            size_t limit_length = 0;
+            if (main_query_node.hasLimit())
+            {
+                /// Constness of limit is validated during query analysis stage
+                limit_length = main_query_node.getLimit()->as<ConstantNode &>().getValue().safeGet<UInt64>();
+            }
+
+            size_t limit_offset = 0;
+            if (main_query_node.hasOffset())
+            {
+                /// Constness of offset is validated during query analysis stage
+                limit_offset = main_query_node.getOffset()->as<ConstantNode &>().getValue().safeGet<UInt64>();
+            }
+
+            /** If not specified DISTINCT, WHERE, GROUP BY, HAVING, ORDER BY, JOIN, LIMIT BY, LIMIT WITH TIES
+              * but LIMIT is specified, and limit + offset < max_block_size,
+              * then as the block size we will use limit + offset (not to read more from the table than requested),
+              * and also set the number of threads to 1.
+              */
+            if (main_query_node.hasLimit() &&
+                !main_query_node.isDistinct() &&
+                !main_query_node.isLimitWithTies() &&
+                !main_query_node.hasPrewhere() &&
+                !main_query_node.hasWhere() &&
+                select_query_info.filter_asts.empty() &&
+                !main_query_node.hasGroupBy() &&
+                !main_query_node.hasHaving() &&
+                !main_query_node.hasOrderBy() &&
+                !main_query_node.hasLimitBy() &&
+                !select_query_info.need_aggregate &&
+                !select_query_info.has_window &&
+                limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
+            {
+                if (limit_length + limit_offset < max_block_size)
+                {
+                    max_block_size = std::max<UInt64>(1, limit_length + limit_offset);
+                    max_streams = 1;
+                    max_threads_execute_query = 1;
+                }
+
+                if (limit_length + limit_offset < select_query_info.local_storage_limits.local_limits.size_limits.max_rows)
+                {
+                    table_expression_query_info.limit = limit_length + limit_offset;
+                }
+            }
+
+            if (!max_block_size)
+                throw Exception(ErrorCodes::PARAMETER_OUT_OF_BOUND,
+                    "Setting 'max_block_size' cannot be zero");
+        }
+
+        if (max_streams == 0)
+            max_streams = 1;
+
+        /// If necessary, we request more sources than the number of threads - to distribute the work evenly over the threads.
+        if (max_streams > 1 && !is_remote)
+            max_streams = static_cast<size_t>(max_streams * settings.max_streams_to_max_threads_ratio);
+
         if (table_node)
             table_expression_query_info.table_expression_modifiers = table_node->getTableExpressionModifiers();
         else
             table_expression_query_info.table_expression_modifiers = table_function_node->getTableExpressionModifiers();
 
-        auto & query_context = planner_context->getQueryContext();
-
-        auto from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info);
+        from_stage = storage->getQueryProcessingStage(query_context, select_query_options.to_stage, storage_snapshot, table_expression_query_info);
 
         Names columns_names = table_expression_data.getColumnNames();
 
@@ -183,6 +273,13 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
             checkAccessRights(*table_node, column_names_with_aliases, planner_context->getQueryContext());
         }
 
+        /// Limitation on the number of columns to read
+        if (settings.max_columns_to_read && columns_names.size() > settings.max_columns_to_read)
+            throw Exception(ErrorCodes::TOO_MANY_COLUMNS,
+                "Limit for number of columns to read exceeded. Requested: {}, maximum: {}",
+                columns_names.size(),
+                settings.max_columns_to_read);
+
         if (columns_names.empty())
         {
             auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot);
@@ -191,9 +288,6 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
             table_expression_data.addColumn(additional_column_to_read, column_identifier);
         }
 
-        size_t max_block_size = query_context->getSettingsRef().max_block_size;
-        size_t max_streams = query_context->getSettingsRef().max_threads;
-
         bool need_rewrite_query_with_final = storage->needRewriteQueryWithFinal(columns_names);
         if (need_rewrite_query_with_final)
         {
@@ -217,9 +311,21 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
 
         storage->read(query_plan, columns_names, storage_snapshot, table_expression_query_info, query_context, from_stage, max_block_size, max_streams);
 
-        /// Create step which reads from empty source if storage has no data.
-        if (!query_plan.isInitialized())
+        if (query_plan.isInitialized())
         {
+            /** Specify the number of threads only if it wasn't specified in storage.
+              *
+              * But in case of remote query and prefer_localhost_replica=1 (default)
+              * The inner local query (that is done in the same process, without
+              * network interaction), it will setMaxThreads earlier and distributed
+              * query will not update it.
+              */
+            if (!query_plan.getMaxThreads() || is_remote)
+                query_plan.setMaxThreads(max_threads_execute_query);
+        }
+        else
+        {
+            /// Create step which reads from empty source if storage has no data.
             auto source_header = storage_snapshot->getSampleBlockForColumns(columns_names);
             Pipe pipe(std::make_shared<NullSource>(source_header));
             auto read_from_pipe = std::make_unique<ReadFromPreparedSource>(std::move(pipe));
@@ -239,50 +345,52 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression,
         throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected table, table function, query or union. Actual {}", table_expression->formatASTForErrorMessage());
     }
 
-    auto rename_actions_dag = std::make_shared<ActionsDAG>(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
-    ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs;
-
-    for (auto & output_node : rename_actions_dag->getOutputs())
+    if (from_stage == QueryProcessingStage::FetchColumns)
     {
-        const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name);
-        if (!column_identifier)
-            continue;
+        auto rename_actions_dag = std::make_shared<ActionsDAG>(query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName());
+        ActionsDAG::NodeRawConstPtrs updated_actions_dag_outputs;
 
-        updated_actions_dag_outputs.push_back(&rename_actions_dag->addAlias(*output_node, *column_identifier));
+        for (auto & output_node : rename_actions_dag->getOutputs())
+        {
+            const auto * column_identifier = table_expression_data.getColumnIdentifierOrNull(output_node->result_name);
+            if (!column_identifier)
+                continue;
+
+            updated_actions_dag_outputs.push_back(&rename_actions_dag->addAlias(*output_node, *column_identifier));
+        }
+
+        rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs);
+
+        auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), rename_actions_dag);
+        rename_step->setStepDescription("Change column names to column identifiers");
+        query_plan.addStep(std::move(rename_step));
     }
 
-    rename_actions_dag->getOutputs() = std::move(updated_actions_dag_outputs);
-
-    auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), rename_actions_dag);
-    rename_step->setStepDescription("Change column names to column identifiers");
-    query_plan.addStep(std::move(rename_step));
-
-    return query_plan;
+    return {std::move(query_plan), from_stage};
 }
 
-QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
-    SelectQueryInfo & select_query_info,
-    const SelectQueryOptions & select_query_options,
+JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_expression,
+    JoinTreeQueryPlan left_join_tree_query_plan,
+    JoinTreeQueryPlan right_join_tree_query_plan,
     const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
-    auto & join_node = join_tree_node->as<JoinNode &>();
+    auto & join_node = join_table_expression->as<JoinNode &>();
+    if (left_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns)
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "JOIN {} left table expression expected to process query to fetch columns stage. Actual {}",
+            join_node.formatASTForErrorMessage(),
+            QueryProcessingStage::toString(left_join_tree_query_plan.from_stage));
 
-    ColumnIdentifierSet current_scope_columns = outer_scope_columns;
-    collectTopLevelColumnIdentifiers(join_tree_node, planner_context, current_scope_columns);
-
-    auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(),
-        select_query_info,
-        select_query_options,
-        current_scope_columns,
-        planner_context);
+    auto left_plan = std::move(left_join_tree_query_plan.query_plan);
     auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
+    if (right_join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns)
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "JOIN {} right table expression expected to process query to fetch columns stage. Actual {}",
+            join_node.formatASTForErrorMessage(),
+            QueryProcessingStage::toString(right_join_tree_query_plan.from_stage));
 
-    auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(),
-        select_query_info,
-        select_query_options,
-        current_scope_columns,
-        planner_context);
+    auto right_plan = std::move(right_join_tree_query_plan.query_plan);
     auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
 
     JoinClausesAndActions join_clauses_and_actions;
@@ -291,7 +399,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
     std::optional<bool> join_constant;
 
     if (join_node.getStrictness() == JoinStrictness::All)
-        join_constant = tryExtractConstantFromJoinNode(join_tree_node);
+        join_constant = tryExtractConstantFromJoinNode(join_table_expression);
 
     if (join_constant)
     {
@@ -308,7 +416,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
     {
         join_clauses_and_actions = buildJoinClausesAndActions(left_plan_output_columns,
             right_plan_output_columns,
-            join_tree_node,
+            join_table_expression,
             planner_context);
 
         join_clauses_and_actions.left_join_expressions_actions->projectInput();
@@ -575,12 +683,10 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
 
     if (join_algorithm->isFilled())
     {
-        size_t max_block_size = query_context->getSettingsRef().max_block_size;
-
         auto filled_join_step = std::make_unique<FilledJoinStep>(
             left_plan.getCurrentDataStream(),
             join_algorithm,
-            max_block_size);
+            settings.max_block_size);
 
         filled_join_step->setStepDescription("Filled JOIN");
         left_plan.addStep(std::move(filled_join_step));
@@ -645,19 +751,15 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
             add_sorting(right_plan, join_clause.key_names_right, JoinTableSide::Right);
         }
 
-        size_t max_block_size = query_context->getSettingsRef().max_block_size;
-        size_t max_streams = query_context->getSettingsRef().max_threads;
-
-        JoinPipelineType join_pipeline_type = join_algorithm->pipelineType();
         auto join_step = std::make_unique<JoinStep>(
             left_plan.getCurrentDataStream(),
             right_plan.getCurrentDataStream(),
             std::move(join_algorithm),
-            max_block_size,
-            max_streams,
+            settings.max_block_size,
+            settings.max_threads,
             false /*optimize_read_in_order*/);
 
-        join_step->setStepDescription(fmt::format("JOIN {}", join_pipeline_type));
+        join_step->setStepDescription(fmt::format("JOIN {}", join_algorithm->pipelineType()));
 
         std::vector<QueryPlanPtr> plans;
         plans.emplace_back(std::make_unique<QueryPlan>(std::move(left_plan)));
@@ -690,22 +792,21 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node,
     drop_unused_columns_after_join_transform_step->setStepDescription("DROP unused columns after JOIN");
     result_plan.addStep(std::move(drop_unused_columns_after_join_transform_step));
 
-    return result_plan;
+    return {std::move(result_plan), QueryProcessingStage::FetchColumns};
 }
 
-QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression,
-    SelectQueryInfo & select_query_info,
-    const SelectQueryOptions & select_query_options,
-    const ColumnIdentifierSet & outer_scope_columns,
+JoinTreeQueryPlan buildQueryPlanForArrayJoinNode(const QueryTreeNodePtr & array_join_table_expression,
+    JoinTreeQueryPlan join_tree_query_plan,
     PlannerContextPtr & planner_context)
 {
-    auto & array_join_node = table_expression->as<ArrayJoinNode &>();
+    auto & array_join_node = array_join_table_expression->as<ArrayJoinNode &>();
+    if (join_tree_query_plan.from_stage != QueryProcessingStage::FetchColumns)
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "ARRAY JOIN {} table expression expected to process query to fetch columns stage. Actual {}",
+            array_join_node.formatASTForErrorMessage(),
+            QueryProcessingStage::toString(join_tree_query_plan.from_stage));
 
-    auto plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(),
-        select_query_info,
-        select_query_options,
-        outer_scope_columns,
-        planner_context);
+    auto plan = std::move(join_tree_query_plan.query_plan);
     auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName();
 
     ActionsDAGPtr array_join_action_dag = std::make_shared<ActionsDAG>(plan_output_columns);
@@ -736,46 +837,75 @@ QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression,
     array_join_step->setStepDescription("ARRAY JOIN");
     plan.addStep(std::move(array_join_step));
 
-    return plan;
+    return {std::move(plan), QueryProcessingStage::FetchColumns};
 }
 
 }
 
-QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
-    SelectQueryInfo & select_query_info,
+JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node,
+    const SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
     const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context)
 {
-    auto join_tree_node_type = join_tree_node->getNodeType();
+    const auto & query_node_typed = query_node->as<QueryNode &>();
+    auto table_expressions_stack = buildTableExpressionsStack(query_node_typed.getJoinTree());
+    bool is_single_table_expression = table_expressions_stack.size() == 1;
 
-    switch (join_tree_node_type)
+    std::vector<JoinTreeQueryPlan> query_plans_stack;
+
+    for (auto & table_expression : table_expressions_stack)
     {
-        case QueryTreeNodeType::TABLE:
-            [[fallthrough]];
-        case QueryTreeNodeType::TABLE_FUNCTION:
-            [[fallthrough]];
-        case QueryTreeNodeType::QUERY:
-            [[fallthrough]];
-        case QueryTreeNodeType::UNION:
+        if (auto * array_join_node = table_expression->as<ArrayJoinNode>())
         {
-            return buildQueryPlanForTableExpression(join_tree_node, select_query_info, select_query_options, planner_context);
+            if (query_plans_stack.empty())
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Expected at least 1 query plan on stack before ARRAY JOIN processing");
+
+            auto query_plan = std::move(query_plans_stack.back());
+            query_plans_stack.back() = buildQueryPlanForArrayJoinNode(table_expression,
+                std::move(query_plan),
+                planner_context);
         }
-        case QueryTreeNodeType::JOIN:
+        else if (auto * join_node = table_expression->as<JoinNode>())
         {
-            return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, outer_scope_columns, planner_context);
+            size_t table_expressions_column_nodes_with_names_stack_size = query_plans_stack.size();
+            if (table_expressions_column_nodes_with_names_stack_size < 2)
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Expected at least 2 query plans on stack before JOIN processing. Actual {}",
+                    table_expressions_column_nodes_with_names_stack_size);
+
+            auto right_query_plan = std::move(query_plans_stack.back());
+            query_plans_stack.pop_back();
+
+            auto left_query_plan = std::move(query_plans_stack.back());
+            query_plans_stack.pop_back();
+
+            query_plans_stack.push_back(buildQueryPlanForJoinNode(table_expression,
+                std::move(left_query_plan),
+                std::move(right_query_plan),
+                outer_scope_columns,
+                planner_context));
         }
-        case QueryTreeNodeType::ARRAY_JOIN:
+        else
         {
-            return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, outer_scope_columns, planner_context);
-        }
-        default:
-        {
-            throw Exception(ErrorCodes::LOGICAL_ERROR,
-                "Expected table, table function, query, union, join or array join query node. Actual {}",
-                join_tree_node->formatASTForErrorMessage());
+            const auto & table_expression_data = planner_context->getTableExpressionDataOrThrow(table_expression);
+            if (table_expression_data.isRemote() && !is_single_table_expression)
+                throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+                    "JOIN with remote storages is unsuppored");
+
+            query_plans_stack.push_back(buildQueryPlanForTableExpression(table_expression,
+                select_query_info,
+                select_query_options,
+                planner_context,
+                is_single_table_expression));
         }
     }
+
+    if (query_plans_stack.empty())
+        throw Exception(ErrorCodes::LOGICAL_ERROR, "Expected at least 1 query plan for JOIN TREE");
+
+    return std::move(query_plans_stack.back());
 }
 
 }
diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h
index 742d6853267..acbc96ddae0 100644
--- a/src/Planner/PlannerJoinTree.h
+++ b/src/Planner/PlannerJoinTree.h
@@ -11,9 +11,15 @@
 namespace DB
 {
 
-/// Build query plan for query JOIN TREE node
-QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node,
-    SelectQueryInfo & select_query_info,
+struct JoinTreeQueryPlan
+{
+    QueryPlan query_plan;
+    QueryProcessingStage::Enum from_stage;
+};
+
+/// Build JOIN TREE query plan for query node
+JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node,
+    const SelectQueryInfo & select_query_info,
     const SelectQueryOptions & select_query_options,
     const ColumnIdentifierSet & outer_scope_columns,
     PlannerContextPtr & planner_context);
diff --git a/src/Planner/PlannerQueryProcessingInfo.h b/src/Planner/PlannerQueryProcessingInfo.h
new file mode 100644
index 00000000000..1f12742f77a
--- /dev/null
+++ b/src/Planner/PlannerQueryProcessingInfo.h
@@ -0,0 +1,91 @@
+#pragma once
+
+#include <Common/Exception.h>
+#include <Core/QueryProcessingStage.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+}
+
+class PlannerQueryProcessingInfo
+{
+public:
+    PlannerQueryProcessingInfo(QueryProcessingStage::Enum from_stage_, QueryProcessingStage::Enum to_stage_)
+        : from_stage(from_stage_)
+        , to_stage(to_stage_)
+    {
+        if (isIntermediateStage())
+        {
+            if (isFirstStage() || isSecondStage())
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Query with intermediate stage cannot have any other stages");
+        }
+
+        if (isFromAggregationState())
+        {
+            if (isIntermediateStage() || isFirstStage() || isSecondStage())
+                throw Exception(ErrorCodes::LOGICAL_ERROR,
+                    "Query with after aggregation stage cannot have any other stages");
+        }
+    }
+
+    QueryProcessingStage::Enum getFromStage() const
+    {
+        return from_stage;
+    }
+
+    QueryProcessingStage::Enum getToStage() const
+    {
+        return to_stage;
+    }
+
+    /** Do I need to perform the first part of the pipeline?
+      * Running on remote servers during distributed processing or if query is not distributed.
+      *
+      * Also note that with distributed_group_by_no_merge=1 or when there is
+      * only one remote server, it is equal to local query in terms of query
+      * stages (or when due to optimize_distributed_group_by_sharding_key the query was processed up to Complete stage).
+      */
+    bool isFirstStage() const
+    {
+        return from_stage < QueryProcessingStage::WithMergeableState
+            && to_stage >= QueryProcessingStage::WithMergeableState;
+    }
+
+    /** Do I need to execute the second part of the pipeline?
+      * Running on the initiating server during distributed processing or if query is not distributed.
+      *
+      * Also note that with distributed_group_by_no_merge=2 (i.e. when optimize_distributed_group_by_sharding_key takes place)
+      * the query on the remote server will be processed up to WithMergeableStateAfterAggregationAndLimit,
+      * So it will do partial second stage (second_stage=true), and initiator will do the final part.
+      */
+    bool isSecondStage() const
+    {
+        return from_stage <= QueryProcessingStage::WithMergeableState
+            && to_stage > QueryProcessingStage::WithMergeableState;
+    }
+
+    bool isIntermediateStage() const
+    {
+        return from_stage == QueryProcessingStage::WithMergeableState && to_stage == QueryProcessingStage::WithMergeableState;
+    }
+
+    bool isToAggregationState() const
+    {
+        return to_stage >= QueryProcessingStage::WithMergeableStateAfterAggregation;
+    }
+
+    bool isFromAggregationState() const
+    {
+        return from_stage >= QueryProcessingStage::WithMergeableStateAfterAggregation;
+    }
+private:
+    QueryProcessingStage::Enum from_stage;
+    QueryProcessingStage::Enum to_stage;
+};
+
+}
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index 6b4a9b4748d..3eb1f9ca38e 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -2,6 +2,7 @@
 
 #include <Core/Names.h>
 #include <Core/NamesAndTypes.h>
+#include <Core/ColumnsWithTypeAndName.h>
 
 namespace DB
 {
diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index f91c8020509..711c63b4bc8 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -3,6 +3,7 @@
 #include <Processors/QueryPlan/ExpressionStep.h>
 #include <Interpreters/ActionsDAG.h>
 #include <Interpreters/InterpreterSelectQuery.h>
+#include <Interpreters/InterpreterSelectQueryAnalyzer.h>
 
 namespace DB
 {
@@ -48,26 +49,48 @@ std::unique_ptr<QueryPlan> createLocalPlan(
     checkStackSize();
 
     auto query_plan = std::make_unique<QueryPlan>();
+
     /// Do not apply AST optimizations, because query
     /// is already optimized and some optimizations
     /// can be applied only for non-distributed tables
     /// and we can produce query, inconsistent with remote plans.
-    auto interpreter = InterpreterSelectQuery(
-        query_ast, context,
-        SelectQueryOptions(processed_stage)
-            .setShardInfo(shard_num, shard_count)
-            .ignoreASTOptimizations());
+    auto select_query_options = SelectQueryOptions(processed_stage)
+        .setShardInfo(shard_num, shard_count)
+        .ignoreASTOptimizations();
 
-    interpreter.setProperClientInfo(replica_num, replica_count);
-    if (coordinator)
+    if (context->getSettingsRef().allow_experimental_analyzer)
     {
-        interpreter.setMergeTreeReadTaskCallbackAndClientInfo([coordinator](PartitionReadRequest request) -> std::optional<PartitionReadResponse>
+        auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, context, select_query_options);
+
+        interpreter.setProperClientInfo(replica_num, replica_count);
+        if (coordinator)
         {
-            return coordinator->handleRequest(request);
-        });
+            interpreter.setMergeTreeReadTaskCallbackAndClientInfo([coordinator](PartitionReadRequest request) -> std::optional<PartitionReadResponse>
+            {
+                return coordinator->handleRequest(request);
+            });
+        }
+
+        query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
+    }
+    else
+    {
+        auto interpreter = InterpreterSelectQuery(
+            query_ast, context,
+            select_query_options);
+
+        interpreter.setProperClientInfo(replica_num, replica_count);
+        if (coordinator)
+        {
+            interpreter.setMergeTreeReadTaskCallbackAndClientInfo([coordinator](PartitionReadRequest request) -> std::optional<PartitionReadResponse>
+            {
+                return coordinator->handleRequest(request);
+            });
+        }
+
+        interpreter.buildQueryPlan(*query_plan);
     }
 
-    interpreter.buildQueryPlan(*query_plan);
     addConvertingActions(*query_plan, header);
     return query_plan;
 }
diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
index dddbf1a570a..f729e9e1383 100644
--- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp
@@ -952,7 +952,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
 
         std::unordered_map<std::string, ColumnWithTypeAndName> node_name_to_input_node_column;
 
-        if (context->getSettingsRef().allow_experimental_analyzer)
+        if (settings.allow_experimental_analyzer)
         {
             const auto & table_expression_data = query_info.planner_context->getTableExpressionDataOrThrow(query_info.table_expression);
             for (const auto & [column_identifier, column_name] : table_expression_data.getColumnIdentifierToColumnName())
diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h
index a8a8ae4e877..c72b546951e 100644
--- a/src/Storages/SelectQueryInfo.h
+++ b/src/Storages/SelectQueryInfo.h
@@ -182,6 +182,9 @@ struct SelectQueryInfo
     ASTPtr view_query; /// Optimized VIEW query
     ASTPtr original_query; /// Unmodified query for projection analysis
 
+    /// Query tree
+    QueryTreeNodePtr query_tree;
+
     /// Planner context
     PlannerContextPtr planner_context;
 
@@ -193,6 +196,9 @@ struct SelectQueryInfo
 
     std::shared_ptr<const StorageLimitsList> storage_limits;
 
+    /// Local storage limits
+    StorageLimits local_storage_limits;
+
     /// Cluster for the query.
     ClusterPtr cluster;
     /// Optimized cluster for the query.
@@ -226,6 +232,9 @@ struct SelectQueryInfo
     bool need_aggregate = false;
     PrewhereInfoPtr prewhere_info;
 
+    /// If query has aggregate functions
+    bool has_aggregates = false;
+
     ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }
 
     /// If not null, it means we choose a projection to execute current query.
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 6473e00cdf2..6353474efd4 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -38,6 +38,11 @@
 #include <Parsers/parseQuery.h>
 #include <Parsers/IAST.h>
 
+#include <Analyzer/TableNode.h>
+
+#include <Planner/Planner.h>
+#include <Planner/Utils.h>
+
 #include <Interpreters/ClusterProxy/SelectStreamFactory.h>
 #include <Interpreters/ClusterProxy/executeQuery.h>
 #include <Interpreters/Cluster.h>
@@ -66,6 +71,7 @@
 #include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
 #include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
 #include <Processors/QueryPlan/ReadFromPreparedSource.h>
+#include <Processors/QueryPlan/ExpressionStep.h>
 #include <Processors/Sources/NullSource.h>
 #include <Processors/Sources/RemoteSource.h>
 #include <Processors/Sinks/EmptySink.h>
@@ -123,6 +129,7 @@ namespace ErrorCodes
     extern const int DISTRIBUTED_TOO_MANY_PENDING_BYTES;
     extern const int ARGUMENT_OUT_OF_BOUND;
     extern const int TOO_LARGE_DISTRIBUTED_DEPTH;
+    extern const int UNSUPPORTED_METHOD;
 }
 
 namespace ActionLocks
@@ -566,13 +573,14 @@ std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryP
             return {};
     }
 
-    /// TODO: Analyzer syntax analyzer result
-    if (!query_info.syntax_analyzer_result)
-        return {};
-
     // GROUP BY
     const ASTPtr group_by = select.groupBy();
-    if (!query_info.syntax_analyzer_result->aggregates.empty() || group_by)
+
+    bool has_aggregates = query_info.has_aggregates;
+    if (query_info.syntax_analyzer_result)
+        has_aggregates = !query_info.syntax_analyzer_result->aggregates.empty();
+
+    if (!has_aggregates || group_by)
     {
         if (!optimize_sharding_key_aggregation || !group_by || !expr_contains_sharding_key(group_by->children))
             return {};
@@ -651,6 +659,26 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery(
     return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, object_columns, std::move(snapshot_data));
 }
 
+QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const SelectQueryInfo & query_info, StorageID remote_storage_id)
+{
+    const auto & query_context = query_info.planner_context->getQueryContext();
+    auto resolved_remote_storage_id = query_context->resolveStorageID(remote_storage_id);
+    auto storage = DatabaseCatalog::instance().tryGetTable(resolved_remote_storage_id, query_context);
+    if (!storage)
+        throw Exception(ErrorCodes::UNSUPPORTED_METHOD,
+            "Distributed local table {} does not exists on coordinator",
+            remote_storage_id.getFullTableName());
+
+    auto storage_lock = storage->lockForShare(query_context->getInitialQueryId(), query_context->getSettingsRef().lock_acquire_timeout);
+    auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), query_context);
+    auto replacement_table_expression = std::make_shared<TableNode>(std::move(storage), std::move(storage_lock), std::move(storage_snapshot));
+
+    std::unordered_map<const IQueryTreeNode *, QueryTreeNodePtr> replacement_map;
+    replacement_map.emplace(query_info.table_expression.get(), std::move(replacement_table_expression));
+
+    return query_info.query_tree->cloneAndReplace(replacement_map);
+}
+
 void StorageDistributed::read(
     QueryPlan & query_plan,
     const Names &,
@@ -665,12 +693,28 @@ void StorageDistributed::read(
     if (select_query->final() && local_context->getSettingsRef().allow_experimental_parallel_reading_from_replicas)
         throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature");
 
-    const auto & modified_query_ast = rewriteSelectQuery(
-        local_context, query_info.query,
-        remote_database, remote_table, remote_table_function_ptr);
+    Block header;
+    ASTPtr query_ast;
 
-    Block header =
-        InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
+    if (local_context->getSettingsRef().allow_experimental_analyzer)
+    {
+        StorageID remote_storage_id{remote_database, remote_table};
+        auto query_tree_with_replaced_distributed_table = buildQueryTreeDistributedTableReplacedWithLocalTable(query_info, remote_storage_id);
+        query_ast = queryNodeToSelectQuery(query_tree_with_replaced_distributed_table);
+        Planner planner(query_tree_with_replaced_distributed_table, SelectQueryOptions(processed_stage), PlannerConfiguration{.only_analyze = true});
+        planner.buildQueryPlanIfNeeded();
+        header = planner.getQueryPlan().getCurrentDataStream().header;
+    }
+    else
+    {
+        header =
+            InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock();
+        query_ast = query_info.query;
+    }
+
+    auto modified_query_ast = rewriteSelectQuery(
+        local_context, query_ast,
+        remote_database, remote_table, remote_table_function_ptr);
 
     /// Return directly (with correct header) if no shard to query.
     if (query_info.getCluster()->getShardsInfo().empty())
@@ -718,6 +762,22 @@ void StorageDistributed::read(
     /// This is a bug, it is possible only when there is no shards to query, and this is handled earlier.
     if (!query_plan.isInitialized())
         throw Exception("Pipeline is not initialized", ErrorCodes::LOGICAL_ERROR);
+
+    if (local_context->getSettingsRef().allow_experimental_analyzer)
+    {
+        Planner planner(query_info.query_tree, SelectQueryOptions(processed_stage), PlannerConfiguration{.only_analyze = true});
+        planner.buildQueryPlanIfNeeded();
+        auto expected_header = planner.getQueryPlan().getCurrentDataStream().header;
+
+        auto rename_actions_dag = ActionsDAG::makeConvertingActions(
+            query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(),
+            expected_header.getColumnsWithTypeAndName(),
+            ActionsDAG::MatchColumnsMode::Position,
+            true /*ignore_constant_values*/);
+        auto rename_step = std::make_unique<ExpressionStep>(query_plan.getCurrentDataStream(), std::move(rename_actions_dag));
+        rename_step->setStepDescription("Change remote column names to local column names");
+        query_plan.addStep(std::move(rename_step));
+    }
 }
 
 

From a140d6c5b1d1d1bf2821174a41d145886ef6d167 Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Tue, 10 Jan 2023 12:52:29 +0100
Subject: [PATCH 235/262] Fixed code review issues

---
 src/Interpreters/ActionsDAG.cpp               | 17 ++++++
 src/Interpreters/ActionsDAG.h                 |  1 +
 src/Planner/Planner.cpp                       | 53 +++++--------------
 src/Planner/Planner.h                         |  4 +-
 src/Planner/PlannerJoinTree.cpp               | 18 +------
 src/Planner/PlannerJoins.cpp                  | 35 +-----------
 src/Planner/TableExpressionData.h             |  1 -
 .../QueryPlan/DistributedCreateLocalPlan.cpp  | 20 +++----
 src/Storages/StorageDistributed.cpp           |  5 ++
 src/Storages/StorageView.cpp                  |  2 +-
 10 files changed, 49 insertions(+), 107 deletions(-)

diff --git a/src/Interpreters/ActionsDAG.cpp b/src/Interpreters/ActionsDAG.cpp
index 9968c43cc9d..746e7007213 100644
--- a/src/Interpreters/ActionsDAG.cpp
+++ b/src/Interpreters/ActionsDAG.cpp
@@ -13,6 +13,7 @@
 #include <IO/WriteBufferFromString.h>
 #include <IO/Operators.h>
 #include <Core/SortDescription.h>
+#include <Planner/PlannerActionsVisitor.h>
 
 #include <stack>
 #include <base/sort.h>
@@ -216,6 +217,22 @@ const ActionsDAG::Node & ActionsDAG::addFunction(
         all_const);
 }
 
+const ActionsDAG::Node & ActionsDAG::addCast(const Node & node_to_cast, const DataTypePtr & cast_type)
+{
+    Field cast_type_constant_value(cast_type->getName());
+
+    ColumnWithTypeAndName column;
+    column.name = calculateConstantActionNodeName(cast_type_constant_value);
+    column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
+    column.type = std::make_shared<DataTypeString>();
+
+    const auto * cast_type_constant_node = &addColumn(std::move(column));
+    ActionsDAG::NodeRawConstPtrs children = {&node_to_cast, cast_type_constant_node};
+    FunctionOverloadResolverPtr func_builder_cast = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl();
+
+    return addFunction(func_builder_cast, std::move(children), node_to_cast.result_name);
+}
+
 const ActionsDAG::Node & ActionsDAG::addFunctionImpl(
     const FunctionBasePtr & function_base,
     NodeRawConstPtrs children,
diff --git a/src/Interpreters/ActionsDAG.h b/src/Interpreters/ActionsDAG.h
index a26694e00f5..40bc76fe057 100644
--- a/src/Interpreters/ActionsDAG.h
+++ b/src/Interpreters/ActionsDAG.h
@@ -143,6 +143,7 @@ public:
         const FunctionBasePtr & function_base,
         NodeRawConstPtrs children,
         std::string result_name);
+    const Node & addCast(const Node & node_to_cast, const DataTypePtr & cast_type);
 
     /// Find first column by name in output nodes. This search is linear.
     const Node & findInOutputs(const std::string & name) const;
diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 0551429a7d0..95f80891986 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -384,7 +384,7 @@ void addTotalsHavingStep(QueryPlan & query_plan,
 
     const auto & aggregation_analysis_result = expression_analysis_result.getAggregation();
     const auto & having_analysis_result = expression_analysis_result.getHaving();
-    bool totals_having_final = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube();
+    bool need_finalize = !query_node.isGroupByWithRollup() && !query_node.isGroupByWithCube();
 
     if (having_analysis_result.filter_actions)
         result_actions_to_execute.push_back(having_analysis_result.filter_actions);
@@ -398,7 +398,7 @@ void addTotalsHavingStep(QueryPlan & query_plan,
         having_analysis_result.remove_filter_column,
         settings.totals_mode,
         settings.totals_auto_threshold,
-        totals_having_final);
+        need_finalize);
     query_plan.addStep(std::move(totals_having_step));
 }
 
@@ -471,11 +471,7 @@ void addDistinctStep(QueryPlan & query_plan,
         pre_distinct,
         settings.optimize_distinct_in_order);
 
-    if (pre_distinct)
-        distinct_step->setStepDescription("Preliminary DISTINCT");
-    else
-        distinct_step->setStepDescription("DISTINCT");
-
+    distinct_step->setStepDescription(pre_distinct ? "Preliminary DISTINCT" : "DISTINCT");
     query_plan.addStep(std::move(distinct_step));
 }
 
@@ -579,14 +575,13 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
                 auto & interpolate_node_typed = interpolate_node->as<InterpolateNode &>();
 
                 PlannerActionsVisitor planner_actions_visitor(planner_context);
-                auto expression_to_interpolate_expression_nodes
-                    = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getExpression());
-                auto interpolate_expression_nodes
-                    = planner_actions_visitor.visit(interpolate_actions_dag, interpolate_node_typed.getInterpolateExpression());
-
+                auto expression_to_interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag,
+                    interpolate_node_typed.getExpression());
                 if (expression_to_interpolate_expression_nodes.size() != 1)
                     throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expression to interpolate expected to have single action node");
 
+                auto interpolate_expression_nodes = planner_actions_visitor.visit(interpolate_actions_dag,
+                    interpolate_node_typed.getInterpolateExpression());
                 if (interpolate_expression_nodes.size() != 1)
                     throw Exception(ErrorCodes::BAD_ARGUMENTS, "Interpolate expression expected to have single action node");
 
@@ -595,25 +590,7 @@ void addWithFillStepIfNeeded(QueryPlan & query_plan,
 
                 const auto * interpolate_expression = interpolate_expression_nodes[0];
                 if (!interpolate_expression->result_type->equals(*expression_to_interpolate->result_type))
-                {
-                    auto cast_type_name = expression_to_interpolate->result_type->getName();
-                    Field cast_type_constant_value(cast_type_name);
-
-                    ColumnWithTypeAndName column;
-                    column.name = calculateConstantActionNodeName(cast_type_name);
-                    column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
-                    column.type = std::make_shared<DataTypeString>();
-
-                    const auto * cast_type_constant_node = &interpolate_actions_dag->addColumn(std::move(column));
-
-                    FunctionCastBase::Diagnostic diagnostic = {interpolate_expression->result_name, interpolate_expression->result_name};
-                    FunctionOverloadResolverPtr func_builder_cast
-                        = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
-
-                    ActionsDAG::NodeRawConstPtrs children = {interpolate_expression, cast_type_constant_node};
-                    interpolate_expression = &interpolate_actions_dag->addFunction(
-                        func_builder_cast, std::move(children), interpolate_expression->result_name);
-                }
+                    interpolate_expression = &interpolate_actions_dag->addCast(*interpolate_expression, expression_to_interpolate->result_type);
 
                 const auto * alias_node = &interpolate_actions_dag->addAlias(*interpolate_expression, expression_to_interpolate_name);
                 interpolate_actions_dag->getOutputs().push_back(alias_node);
@@ -672,11 +649,7 @@ void addPreliminaryLimitStep(QueryPlan & query_plan,
     const Settings & settings = query_context->getSettingsRef();
 
     auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), limit_length, limit_offset, settings.exact_rows_before_limit);
-    if (do_not_skip_offset)
-        limit->setStepDescription("preliminary LIMIT (with OFFSET)");
-    else
-        limit->setStepDescription("preliminary LIMIT (without OFFSET)");
-
+    limit->setStepDescription(do_not_skip_offset ? "preliminary LIMIT (with OFFSET)" : "preliminary LIMIT (without OFFSET)");
     query_plan.addStep(std::move(limit));
 }
 
@@ -991,14 +964,14 @@ void Planner::buildQueryPlanIfNeeded()
         return;
 
     if (query_tree->as<UnionNode>())
-        buildUnionNodeQueryPlan();
+        buildPlanForUnionNode();
     else
-        buildQueryNodePlan();
+        buildPlanForQueryNode();
 
     extendQueryContextAndStoragesLifetime(query_plan, planner_context);
 }
 
-void Planner::buildUnionNodeQueryPlan()
+void Planner::buildPlanForUnionNode()
 {
     const auto & union_node = query_tree->as<UnionNode &>();
     auto union_mode = union_node.getUnionMode();
@@ -1094,7 +1067,7 @@ void Planner::buildUnionNodeQueryPlan()
     }
 }
 
-void Planner::buildQueryNodePlan()
+void Planner::buildPlanForQueryNode()
 {
     auto & query_node = query_tree->as<QueryNode &>();
     const auto & query_context = planner_context->getQueryContext();
diff --git a/src/Planner/Planner.h b/src/Planner/Planner.h
index 462532b068f..6e225bbf905 100644
--- a/src/Planner/Planner.h
+++ b/src/Planner/Planner.h
@@ -55,9 +55,9 @@ public:
     void addStorageLimits(const StorageLimitsList & limits);
 
 private:
-    void buildUnionNodeQueryPlan();
+    void buildPlanForUnionNode();
 
-    void buildQueryNodePlan();
+    void buildPlanForQueryNode();
 
     QueryTreeNodePtr query_tree;
     QueryPlan query_plan;
diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index fe280cdec28..8ae39a90252 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -3,7 +3,6 @@
 #include <DataTypes/DataTypeString.h>
 
 #include <Functions/FunctionFactory.h>
-#include <Functions/CastOverloadResolver.h>
 
 #include <Access/Common/AccessFlags.h>
 #include <Access/ContextAccess.h>
@@ -473,22 +472,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
                 continue;
 
             const auto & cast_type = it->second;
-            auto cast_type_name = cast_type->getName();
-            Field cast_type_constant_value(cast_type_name);
-
-            ColumnWithTypeAndName column;
-            column.name = calculateConstantActionNodeName(cast_type_constant_value);
-            column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
-            column.type = std::make_shared<DataTypeString>();
-
-            const auto * cast_type_constant_node = &cast_actions_dag->addColumn(std::move(column));
-
-            FunctionCastBase::Diagnostic diagnostic = {output_node->result_name, output_node->result_name};
-            FunctionOverloadResolverPtr func_builder_cast
-                = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
-
-            ActionsDAG::NodeRawConstPtrs children = {output_node, cast_type_constant_node};
-            output_node = &cast_actions_dag->addFunction(func_builder_cast, std::move(children), output_node->result_name);
+            output_node = &cast_actions_dag->addCast(*output_node, cast_type);
         }
 
         cast_actions_dag->projectInput();
diff --git a/src/Planner/PlannerJoins.cpp b/src/Planner/PlannerJoins.cpp
index a17bbaebb04..f6152e324c9 100644
--- a/src/Planner/PlannerJoins.cpp
+++ b/src/Planner/PlannerJoins.cpp
@@ -17,8 +17,6 @@
 
 #include <Functions/IFunction.h>
 #include <Functions/FunctionFactory.h>
-#include <Functions/FunctionsConversion.h>
-#include <Functions/CastOverloadResolver.h>
 
 #include <Analyzer/FunctionNode.h>
 #include <Analyzer/ConstantNode.h>
@@ -465,40 +463,11 @@ JoinClausesAndActions buildJoinClausesAndActions(const ColumnsWithTypeAndName &
                     throw;
                 }
 
-                auto cast_type_name = common_type->getName();
-                Field cast_type_constant_value(cast_type_name);
-
-                ColumnWithTypeAndName cast_column;
-                cast_column.name = calculateConstantActionNodeName(cast_type_constant_value);
-                cast_column.column = DataTypeString().createColumnConst(0, cast_type_constant_value);
-                cast_column.type = std::make_shared<DataTypeString>();
-
-                const ActionsDAG::Node * cast_type_constant_node = nullptr;
-
                 if (!left_key_node->result_type->equals(*common_type))
-                {
-                    cast_type_constant_node = &join_expression_actions->addColumn(cast_column);
-
-                    FunctionCastBase::Diagnostic diagnostic = {left_key_node->result_name, left_key_node->result_name};
-                    FunctionOverloadResolverPtr func_builder_cast
-                        = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(diagnostic);
-
-                    ActionsDAG::NodeRawConstPtrs children = {left_key_node, cast_type_constant_node};
-                    left_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {});
-                }
+                    left_key_node = &join_expression_actions->addCast(*left_key_node, common_type);
 
                 if (!right_key_node->result_type->equals(*common_type))
-                {
-                    if (!cast_type_constant_node)
-                        cast_type_constant_node = &join_expression_actions->addColumn(cast_column);
-
-                    FunctionCastBase::Diagnostic diagnostic = {right_key_node->result_name, right_key_node->result_name};
-                    FunctionOverloadResolverPtr func_builder_cast
-                        = CastInternalOverloadResolver<CastType::nonAccurate>::createImpl(std::move(diagnostic));
-
-                    ActionsDAG::NodeRawConstPtrs children = {right_key_node, cast_type_constant_node};
-                    right_key_node = &join_expression_actions->addFunction(func_builder_cast, std::move(children), {});
-                }
+                    right_key_node = &join_expression_actions->addCast(*right_key_node, common_type);
             }
 
             join_expression_actions->addOrReplaceInOutputs(*left_key_node);
diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h
index 3eb1f9ca38e..6b4a9b4748d 100644
--- a/src/Planner/TableExpressionData.h
+++ b/src/Planner/TableExpressionData.h
@@ -2,7 +2,6 @@
 
 #include <Core/Names.h>
 #include <Core/NamesAndTypes.h>
-#include <Core/ColumnsWithTypeAndName.h>
 
 namespace DB
 {
diff --git a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
index 711c63b4bc8..166b021b5ce 100644
--- a/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
+++ b/src/Processors/QueryPlan/DistributedCreateLocalPlan.cpp
@@ -58,10 +58,8 @@ std::unique_ptr<QueryPlan> createLocalPlan(
         .setShardInfo(shard_num, shard_count)
         .ignoreASTOptimizations();
 
-    if (context->getSettingsRef().allow_experimental_analyzer)
+    auto update_interpreter = [&](auto & interpreter)
     {
-        auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, context, select_query_options);
-
         interpreter.setProperClientInfo(replica_num, replica_count);
         if (coordinator)
         {
@@ -70,7 +68,12 @@ std::unique_ptr<QueryPlan> createLocalPlan(
                 return coordinator->handleRequest(request);
             });
         }
+    };
 
+    if (context->getSettingsRef().allow_experimental_analyzer)
+    {
+        auto interpreter = InterpreterSelectQueryAnalyzer(query_ast, context, select_query_options);
+        update_interpreter(interpreter);
         query_plan = std::make_unique<QueryPlan>(std::move(interpreter).extractQueryPlan());
     }
     else
@@ -78,16 +81,7 @@ std::unique_ptr<QueryPlan> createLocalPlan(
         auto interpreter = InterpreterSelectQuery(
             query_ast, context,
             select_query_options);
-
-        interpreter.setProperClientInfo(replica_num, replica_count);
-        if (coordinator)
-        {
-            interpreter.setMergeTreeReadTaskCallbackAndClientInfo([coordinator](PartitionReadRequest request) -> std::optional<PartitionReadResponse>
-            {
-                return coordinator->handleRequest(request);
-            });
-        }
-
+        update_interpreter(interpreter);
         interpreter.buildQueryPlan(*query_plan);
     }
 
diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 6353474efd4..44b16496180 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -659,6 +659,9 @@ StorageSnapshotPtr StorageDistributed::getStorageSnapshotForQuery(
     return std::make_shared<StorageSnapshot>(*this, metadata_snapshot, object_columns, std::move(snapshot_data));
 }
 
+namespace
+{
+
 QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const SelectQueryInfo & query_info, StorageID remote_storage_id)
 {
     const auto & query_context = query_info.planner_context->getQueryContext();
@@ -679,6 +682,8 @@ QueryTreeNodePtr buildQueryTreeDistributedTableReplacedWithLocalTable(const Sele
     return query_info.query_tree->cloneAndReplace(replacement_map);
 }
 
+}
+
 void StorageDistributed::read(
     QueryPlan & query_plan,
     const Names &,
diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp
index f040e94e141..48967211722 100644
--- a/src/Storages/StorageView.cpp
+++ b/src/Storages/StorageView.cpp
@@ -141,7 +141,7 @@ void StorageView::read(
 
     if (context->getSettingsRef().allow_experimental_analyzer)
     {
-        InterpreterSelectQueryAnalyzer interpreter(current_inner_query, options, getViewContext(context));
+        InterpreterSelectQueryAnalyzer interpreter(current_inner_query, getViewContext(context), options);
         interpreter.addStorageLimits(*query_info.storage_limits);
         query_plan = std::move(interpreter).extractQueryPlan();
     }

From 43a09963569fa77e2391dc3e63be00f1dde6654e Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Wed, 11 Jan 2023 12:25:08 +0100
Subject: [PATCH 236/262] Fixed tests

---
 src/Storages/StorageDistributed.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp
index 44b16496180..febf9773f71 100644
--- a/src/Storages/StorageDistributed.cpp
+++ b/src/Storages/StorageDistributed.cpp
@@ -578,7 +578,7 @@ std::optional<QueryProcessingStage::Enum> StorageDistributed::getOptimizedQueryP
 
     bool has_aggregates = query_info.has_aggregates;
     if (query_info.syntax_analyzer_result)
-        has_aggregates = !query_info.syntax_analyzer_result->aggregates.empty();
+        has_aggregates = query_info.syntax_analyzer_result->aggregates.empty();
 
     if (!has_aggregates || group_by)
     {

From 225e0bdcc28ebdaa44a638e816f466db04feaafc Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 12 Jan 2023 11:54:30 +0100
Subject: [PATCH 237/262] Fixed tests

---
 src/Planner/Planner.cpp | 23 +++++++++++++----------
 src/Planner/Utils.cpp   | 12 ++++++++++++
 src/Planner/Utils.h     |  3 +++
 3 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp
index 95f80891986..2a9d06bc17b 100644
--- a/src/Planner/Planner.cpp
+++ b/src/Planner/Planner.cpp
@@ -1075,18 +1075,11 @@ void Planner::buildPlanForQueryNode()
     if (query_node.hasPrewhere())
     {
         if (query_node.hasWhere())
-        {
-            auto function_node = std::make_shared<FunctionNode>("and");
-            auto and_function = FunctionFactory::instance().get("and", query_context);
-            function_node->getArguments().getNodes() = {query_node.getPrewhere(), query_node.getWhere()};
-            function_node->resolveAsFunction(and_function->build(function_node->getArgumentColumns()));
-            query_node.getWhere() = std::move(function_node);
-            query_node.getPrewhere() = {};
-        }
+            query_node.getWhere() = mergeConditionNodes({query_node.getPrewhere(), query_node.getWhere()}, query_context);
         else
-        {
             query_node.getWhere() = query_node.getPrewhere();
-        }
+
+        query_node.getPrewhere() = {};
     }
 
     SelectQueryInfo select_query_info;
@@ -1106,6 +1099,16 @@ void Planner::buildPlanForQueryNode()
     select_query_info.has_aggregates = !aggregate_function_nodes.empty();
     select_query_info.need_aggregate = query_node.hasGroupBy() || !aggregate_function_nodes.empty();
 
+    if (!select_query_info.need_aggregate && query_node.hasHaving())
+    {
+        if (query_node.hasWhere())
+            query_node.getWhere() = mergeConditionNodes({query_node.getWhere(), query_node.getHaving()}, query_context);
+        else
+            query_node.getWhere() = query_node.getHaving();
+
+        query_node.getHaving() = {};
+    }
+
     checkStoragesSupportTransactions(planner_context);
     collectTableExpressionData(query_tree, *planner_context);
     collectSets(query_tree, *planner_context);
diff --git a/src/Planner/Utils.cpp b/src/Planner/Utils.cpp
index 59d174c2877..fa6bd774960 100644
--- a/src/Planner/Utils.cpp
+++ b/src/Planner/Utils.cpp
@@ -8,6 +8,8 @@
 
 #include <IO/WriteBufferFromString.h>
 
+#include <Functions/FunctionFactory.h>
+
 #include <Interpreters/Context.h>
 
 #include <Analyzer/ConstantNode.h>
@@ -308,4 +310,14 @@ bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_no
     return false;
 }
 
+QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, const ContextPtr & context)
+{
+    auto function_node = std::make_shared<FunctionNode>("and");
+    auto and_function = FunctionFactory::instance().get("and", context);
+    function_node->getArguments().getNodes() = condition_nodes;
+    function_node->resolveAsFunction(and_function->build(function_node->getArgumentColumns()));
+
+    return function_node;
+}
+
 }
diff --git a/src/Planner/Utils.h b/src/Planner/Utils.h
index da99a7e62df..3ec1ed3a947 100644
--- a/src/Planner/Utils.h
+++ b/src/Planner/Utils.h
@@ -59,4 +59,7 @@ bool queryHasArrayJoinInJoinTree(const QueryTreeNodePtr & query_node);
   */
 bool queryHasWithTotalsInAnySubqueryInJoinTree(const QueryTreeNodePtr & query_node);
 
+/// Returns `and` function node that has condition nodes as its arguments
+QueryTreeNodePtr mergeConditionNodes(const QueryTreeNodes & condition_nodes, const ContextPtr & context);
+
 }

From feaad2bd431f5417808cb777f43cfa632cf86208 Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 12 Jan 2023 11:28:41 +0000
Subject: [PATCH 238/262] try avoiding bad file descriptor

---
 tests/integration/test_keeper_map/test.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index 2e80ada963f..779143dc73d 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -107,13 +107,13 @@ def create_drop_loop(index, stop_event):
         if stop_event.is_set():
             return
 
-        node.query(
+        node.query_with_retry(
             f"CREATE TABLE {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);"
         )
-        node.query(f"INSERT INTO {table_name} VALUES ({index}, {i})")
-        result = node.query(f"SELECT value FROM {table_name} WHERE key = {index}")
+        node.query_with_retry(f"INSERT INTO {table_name} VALUES ({index}, {i})")
+        result = node.query_with_retry(f"SELECT value FROM {table_name} WHERE key = {index}")
         assert result.strip() == str(i)
-        node.query(f"DROP TABLE {table_name} SYNC")
+        node.query_with_retry(f"DROP TABLE {table_name} SYNC")
 
 
 def test_create_drop_keeper_map_concurrent(started_cluster):

From d61e2024168f4ca0665d45deee03bcec14e04833 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 12 Jan 2023 11:35:03 +0000
Subject: [PATCH 239/262] Automatic style fix

---
 tests/integration/test_keeper_map/test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index 779143dc73d..38abc252972 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -111,7 +111,9 @@ def create_drop_loop(index, stop_event):
             f"CREATE TABLE {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);"
         )
         node.query_with_retry(f"INSERT INTO {table_name} VALUES ({index}, {i})")
-        result = node.query_with_retry(f"SELECT value FROM {table_name} WHERE key = {index}")
+        result = node.query_with_retry(
+            f"SELECT value FROM {table_name} WHERE key = {index}"
+        )
         assert result.strip() == str(i)
         node.query_with_retry(f"DROP TABLE {table_name} SYNC")
 

From 62d544e7425f09e55474c456d2a0a3187924cdfb Mon Sep 17 00:00:00 2001
From: Nikita Mikhaylov <mikhaylovnikitka@gmail.com>
Date: Thu, 12 Jan 2023 12:36:36 +0100
Subject: [PATCH 240/262] Fix integration test
 test_replicated_users::test_rename_replicated (#45192)

* Done

* Update tests/integration/test_replicated_users/test.py

Co-authored-by: Sergei Trifonov <sergei@clickhouse.com>

Co-authored-by: Sergei Trifonov <sergei@clickhouse.com>
---
 tests/integration/test_replicated_users/test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/integration/test_replicated_users/test.py b/tests/integration/test_replicated_users/test.py
index 1c73fc19c01..a7dbaf6ed30 100644
--- a/tests/integration/test_replicated_users/test.py
+++ b/tests/integration/test_replicated_users/test.py
@@ -96,6 +96,7 @@ def test_rename_replicated(started_cluster, entity):
     node2.query_with_retry(
         f"ALTER {entity.keyword} {entity.name} {entity.options} RENAME TO {entity.name}2"
     )
+    node1.query("SYSTEM RELOAD USERS")
     node1.query(f"DROP {entity.keyword} {entity.name}2 {entity.options}")
 
 

From 3245e822effe7145e507e2053a2202da55772053 Mon Sep 17 00:00:00 2001
From: alesapin <alesapin@gmail.com>
Date: Thu, 12 Jan 2023 12:55:50 +0100
Subject: [PATCH 241/262] Update merge

---
 contrib/azure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/contrib/azure b/contrib/azure
index 3b857189b40..ea8c3044f43 160000
--- a/contrib/azure
+++ b/contrib/azure
@@ -1 +1 @@
-Subproject commit 3b857189b401e68f34c3cd164f5b270887c76b86
+Subproject commit ea8c3044f43f5afa7016d2d580ed201f495d7e94

From e37f572c341658c7d6d25b3e9126092a818e6026 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@gmail.com>
Date: Thu, 12 Jan 2023 15:01:36 +0300
Subject: [PATCH 242/262] Revert "update function DAYOFWEEK and add new
 function WEEKDAY for mysql/spark compatiability"

---
 .../functions/date-time-functions.md          |  11 +-
 src/Common/DateLUTImpl.h                      |  32 -----
 src/Functions/DateTimeTransforms.h            |  16 +--
 src/Functions/dateDiff.cpp                    |   4 +-
 src/Functions/dateName.cpp                    |   2 +-
 src/Functions/formatDateTime.cpp              |   8 +-
 src/Functions/toDayOfWeek.cpp                 |   5 +-
 .../0_stateless/00189_time_zones_long.sql     |  10 +-
 ...00921_datetime64_compatibility_long.python | 136 +++++++++---------
 ...21_datetime64_compatibility_long.reference |   2 +-
 .../02521_to_custom_day_of_week.reference     |   7 -
 .../02521_to_custom_day_of_week.sql           |  10 --
 12 files changed, 90 insertions(+), 153 deletions(-)
 delete mode 100644 tests/queries/0_stateless/02521_to_custom_day_of_week.reference
 delete mode 100644 tests/queries/0_stateless/02521_to_custom_day_of_week.sql

diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md
index 2fb01dfc54b..225f2b162ab 100644
--- a/docs/en/sql-reference/functions/date-time-functions.md
+++ b/docs/en/sql-reference/functions/date-time-functions.md
@@ -207,16 +207,9 @@ Converts a date or date with time to a UInt8 number containing the number of the
 
 Aliases: `DAYOFMONTH`, `DAY`.
 
-## toDayOfWeek(date\[,mode\])
+## toDayOfWeek
 
-Converts a date or date with time to a UInt8 number containing the number of the day of the week. The two-argument form of toDayOfWeek() enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or from 1-7. If the mode argument is ommited, the default mode is 0.
-
-| Mode | First day of week | Range                                          |
-|------|-------------------|------------------------------------------------|
-| 0    | Monday            | 1-7, Monday = 1, Tuesday = 2, ..., Sunday = 7  |
-| 1    | Monday            | 0-6, Monday = 0, Tuesday = 1, ..., Sunday = 6  |
-| 2    | Sunday            | 0-6, Sunday = 0, Monday = 1, ..., Saturday = 6 |
-| 3    | Sunday            | 1-7, Sunday = 1, Monday = 2, ..., Saturday = 7 |
+Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7).
 
 Alias: `DAYOFWEEK`.
 
diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h
index 3d496e088bb..84f063f9555 100644
--- a/src/Common/DateLUTImpl.h
+++ b/src/Common/DateLUTImpl.h
@@ -39,15 +39,6 @@ enum class WeekModeFlag : UInt8
 };
 using YearWeek = std::pair<UInt16, UInt8>;
 
-/// Modes for toDayOfWeek() function.
-enum class WeekDayMode
-{
-    WeekStartsMonday1 = 0,
-    WeekStartsMonday0 = 1,
-    WeekStartsSunday0 = 2,
-    WeekStartsSunday1 = 3
-};
-
 /** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on.
   * First time was implemented for OLAPServer, that needed to do billions of such transformations.
   */
@@ -628,25 +619,9 @@ public:
     template <typename DateOrTime>
     inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; }
 
-    /// 1-based, starts on Monday
     template <typename DateOrTime>
     inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; }
 
-    template <typename DateOrTime>
-    inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const
-    {
-        WeekDayMode mode = check_week_day_mode(week_day_mode);
-        UInt8 res = toDayOfWeek(v);
-
-        bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1);
-        bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0);
-        if (start_from_sunday)
-            res = res % 7 + 1;
-        if (zero_based)
-            --res;
-        return res;
-    }
-
     template <typename DateOrTime>
     inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; }
 
@@ -869,13 +844,6 @@ public:
         return week_format;
     }
 
-    /// Check and change mode to effective.
-    inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT
-    {
-        return static_cast<WeekDayMode>(mode & 3);
-    }
-
-
     /** Calculate weekday from d.
       * Returns 0 for monday, 1 for tuesday...
       */
diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h
index 56a7a960ac9..f4163a336ef 100644
--- a/src/Functions/DateTimeTransforms.h
+++ b/src/Functions/DateTimeTransforms.h
@@ -786,21 +786,21 @@ struct ToDayOfWeekImpl
 {
     static constexpr auto name = "toDayOfWeek";
 
-    static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(t, week_day_mode);
+        return time_zone.toDayOfWeek(t);
     }
-    static inline UInt8 execute(UInt32 t, UInt8 week_day_mode, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(t, week_day_mode);
+        return time_zone.toDayOfWeek(t);
     }
-    static inline UInt8 execute(Int32 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(ExtendedDayNum(d), week_day_mode);
+        return time_zone.toDayOfWeek(ExtendedDayNum(d));
     }
-    static inline UInt8 execute(UInt16 d, UInt8 week_day_mode, const DateLUTImpl & time_zone)
+    static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone)
     {
-        return time_zone.toDayOfWeek(DayNum(d), week_day_mode);
+        return time_zone.toDayOfWeek(DayNum(d));
     }
 
     using FactorTransform = ToMondayImpl;
diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp
index d43ef2d4caf..60668f81edf 100644
--- a/src/Functions/dateDiff.cpp
+++ b/src/Functions/dateDiff.cpp
@@ -225,8 +225,8 @@ public:
             }
             else if constexpr (std::is_same_v<TransformX, TransformDateTime64<ToRelativeWeekNumImpl<ResultPrecision::Extended>>>)
             {
-                auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x);
-                auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y);
+                auto x_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_x.getScaleMultiplier()).execute(x, timezone_x);
+                auto y_day_of_week = TransformDateTime64<ToDayOfWeekImpl>(transform_y.getScaleMultiplier()).execute(y, timezone_y);
                 if ((x_day_of_week > y_day_of_week)
                     || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour))
                     || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute)
diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp
index bfb190b9a08..36c0be49190 100644
--- a/src/Functions/dateName.cpp
+++ b/src/Functions/dateName.cpp
@@ -276,7 +276,7 @@ private:
     {
         static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone)
         {
-            const auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
+            const auto day = ToDayOfWeekImpl::execute(source, timezone);
             static constexpr std::string_view day_names[] =
             {
                 "Monday",
diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp
index c01f32f68ae..e7c9a1b5103 100644
--- a/src/Functions/formatDateTime.cpp
+++ b/src/Functions/formatDateTime.cpp
@@ -344,13 +344,13 @@ private:
 
         static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            *dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone);
+            *dest = '0' + ToDayOfWeekImpl::execute(source, timezone);
             return 1;
         }
 
         static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto day = ToDayOfWeekImpl::execute(source, 0, timezone);
+            auto day = ToDayOfWeekImpl::execute(source, timezone);
             *dest = '0' + (day == 7 ? 0 : day);
             return 1;
         }
@@ -499,13 +499,13 @@ private:
 
         static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
+            auto week_day = ToDayOfWeekImpl::execute(source, timezone);
             return writeNumberWithPadding(dest, week_day, min_represent_digits);
         }
 
         static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone)
         {
-            auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone);
+            auto week_day = ToDayOfWeekImpl::execute(source, timezone);
             if (week_day == 7)
                 week_day = 0;
 
diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp
index 06343714b9d..354d4dea894 100644
--- a/src/Functions/toDayOfWeek.cpp
+++ b/src/Functions/toDayOfWeek.cpp
@@ -1,12 +1,13 @@
 #include <Functions/FunctionFactory.h>
 #include <Functions/DateTimeTransforms.h>
+#include <Functions/FunctionDateOrDateTimeToSomething.h>
 #include <DataTypes/DataTypesNumber.h>
-#include <Functions/FunctionCustomWeekToSomething.h>
+
 
 namespace DB
 {
 
-using FunctionToDayOfWeek = FunctionCustomWeekToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
+using FunctionToDayOfWeek = FunctionDateOrDateTimeToSomething<DataTypeUInt8, ToDayOfWeekImpl>;
 
 REGISTER_FUNCTION(ToDayOfWeek)
 {
diff --git a/tests/queries/0_stateless/00189_time_zones_long.sql b/tests/queries/0_stateless/00189_time_zones_long.sql
index 5760f6c0447..cf1b9e9ae1d 100644
--- a/tests/queries/0_stateless/00189_time_zones_long.sql
+++ b/tests/queries/0_stateless/00189_time_zones_long.sql
@@ -120,11 +120,11 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn');
 /* toDayOfWeek */
 
 SELECT 'toDayOfWeek';
-SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Istanbul');
-SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/Paris');
-SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/London');
-SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Tokyo');
-SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Pacific/Pitcairn');
+SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul');
+SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris');
+SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London');
+SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo');
+SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn');
 
 /* toHour */
 
diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
index 2706c0f5b12..e3cd7ee6d36 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python
@@ -7,14 +7,14 @@ import sys
 import argparse
 
 # Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime.
-FUNCTIONS = """
+FUNCTIONS="""
 toTimeZone(N, 'UTC')
 toYear(N, 'Asia/Istanbul')
 toQuarter(N, 'Asia/Istanbul')
 toMonth(N, 'Asia/Istanbul')
 toDayOfYear(N, 'Asia/Istanbul')
 toDayOfMonth(N, 'Asia/Istanbul')
-toDayOfWeek(N, 0, 'Asia/Istanbul')
+toDayOfWeek(N, 'Asia/Istanbul')
 toHour(N, 'Asia/Istanbul')
 toMinute(N, 'Asia/Istanbul')
 toSecond(N, 'Asia/Istanbul')
@@ -90,51 +90,68 @@ formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%',
 extra_ops = [
     # With same type:
     (
-        ["N {op} N"],
+        ['N {op} N'],
         {
-            "op": [
-                "- ",  # does not work, but should it?
-                "+ ",  # does not work, but should it?
-                "!=",
-                "==",  # equality and inequality supposed to take sub-second part in account
-                "< ",
-                "<=",
-                "> ",
-                ">=",
+            'op':
+            [
+                '- ', # does not work, but should it?
+                '+ ', # does not work, but should it?
+                '!=', '==', # equality and inequality supposed to take sub-second part in account
+                '< ',
+                '<=',
+                '> ',
+                '>='
             ]
-        },
+        }
     ),
     # With other DateTime types:
     (
-        ["N {op} {arg}", "{arg} {op} N"],
+        [
+            'N {op} {arg}',
+            '{arg} {op} N'
+        ],
         {
-            "op": [
-                "-",  # does not work, but should it?
-                "!=",
-                "==",
+            'op':
+            [
+                '-', # does not work, but should it?
+                '!=', '==',
                 # these are naturally expected to work, but they don't:
-                "< ",
-                "<=",
-                "> ",
-                ">=",
+                '< ',
+                '<=',
+                '> ',
+                '>='
             ],
-            "arg": ["DT", "D", "DT64"],
-        },
+            'arg': ['DT', 'D', 'DT64'],
+        }
     ),
     # With arithmetic types
     (
-        ["N {op} {arg}", "{arg} {op} N"],
+        [
+            'N {op} {arg}',
+            '{arg} {op} N'
+        ],
         {
-            "op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="],
-            "arg": [
-                "toUInt8(1)",
-                "toInt8(-1)",
-                "toUInt16(1)",
-                "toInt16(-1)",
-                "toUInt32(1)",
-                "toInt32(-1)",
-                "toUInt64(1)",
-                "toInt64(-1)",
+            'op':
+            [
+                '+ ',
+                '- ',
+                '==',
+                '!=',
+                '< ',
+                '<=',
+                '> ',
+                '>='
+            ],
+            'arg':
+            [
+                'toUInt8(1)',
+                'toInt8(-1)',
+                'toUInt16(1)',
+                'toInt16(-1)',
+                'toUInt32(1)',
+                'toInt32(-1)',
+                'toUInt64(1)',
+                'toInt64(-1)'
             ],
         },
     ),
@@ -150,17 +167,14 @@ for funcs, args in extra_ops:
 
 # filter out empty lines and commented out lines
 COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#")
-FUNCTIONS = list(
-    [f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]
-)
-TYPES = ["D", "DT", "DT64"]
-
+FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None])
+TYPES = ['D', 'DT', 'DT64']
 
 def escape_string(s):
     if sys.version_info[0] > 2:
-        return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'")
+        return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'")
     else:
-        return s.encode("string-escape").decode("utf-8")
+        return s.encode('string-escape').decode('utf-8')
 
 
 def execute_functions_for_types(functions, types):
@@ -172,39 +186,18 @@ def execute_functions_for_types(functions, types):
 WITH \
 toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \
 toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \
-toDate('2019-09-16') as D, {X} as N".format(
-                X=dt
-            )
-            print(
-                (
-                    """{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(
-                        prologue=prologue, func=func
-                    )
-                )
-            )
+toDate('2019-09-16') as D, {X} as N".format(X=dt)
+            print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func)))
         print("""SELECT '------------------------------------------';""")
 
-
 def main():
     def parse_args():
         parser = argparse.ArgumentParser()
-        parser.add_argument(
-            "--functions_re",
-            type=re.compile,
-            help="RE to enable functions",
-            default=None,
-        )
-        parser.add_argument(
-            "--types_re",
-            type=lambda s: re.compile("^(" + s + ")$"),
-            help="RE to enable types, supported types: " + ",".join(TYPES),
-            default=None,
-        )
-        parser.add_argument(
-            "--list_functions",
-            action="store_true",
-            help="List all functions to be tested and exit",
-        )
+        parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None)
+        parser.add_argument('--types_re',
+                type=lambda s: re.compile('^(' + s + ')$'),
+                help="RE to enable types, supported types: " + ",".join(TYPES), default=None)
+        parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit")
         return parser.parse_args()
 
     args = parse_args()
@@ -230,6 +223,5 @@ def main():
 
     execute_functions_for_types(functions, types)
 
-
-if __name__ == "__main__":
+if __name__ == '__main__':
     exit(main())
diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
index 8a168ed0e9e..8d28a69ff3d 100644
--- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
+++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference
@@ -28,7 +28,7 @@ SELECT toDayOfMonth(N, \'Asia/Istanbul\')
 "UInt8",16
 "UInt8",16
 ------------------------------------------
-SELECT toDayOfWeek(N, 0, \'Asia/Istanbul\')
+SELECT toDayOfWeek(N, \'Asia/Istanbul\')
 "UInt8",1
 "UInt8",1
 "UInt8",1
diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.reference b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference
deleted file mode 100644
index 660dff37b72..00000000000
--- a/tests/queries/0_stateless/02521_to_custom_day_of_week.reference
+++ /dev/null
@@ -1,7 +0,0 @@
-1	7
-1	7
-0	6
-1	0
-2	1
-1	7
-0	6
diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.sql b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql
deleted file mode 100644
index 5475e15a984..00000000000
--- a/tests/queries/0_stateless/02521_to_custom_day_of_week.sql
+++ /dev/null
@@ -1,10 +0,0 @@
-
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon), toDayOfWeek(date_sun);
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 0), toDayOfWeek(date_sun, 0);
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 1), toDayOfWeek(date_sun, 1);
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 2), toDayOfWeek(date_sun, 2);
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 3), toDayOfWeek(date_sun, 3);
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 4), toDayOfWeek(date_sun, 4);
-with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 5), toDayOfWeek(date_sun, 5);
-
-select toDayOfWeek(today(), -1); -- { serverError 43 }

From 2a92ae8d8e152f4421e2fa3177be0a96c5209eb8 Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Thu, 12 Jan 2023 13:14:09 +0100
Subject: [PATCH 243/262] Fuzz PREWHERE clause

---
 src/Client/QueryFuzzer.cpp | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index 018e0c6f130..fc6c7f82af9 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -905,11 +905,38 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
                 select->where()->children.clear();
                 select->setExpression(ASTSelectQuery::Expression::WHERE, {});
             }
+            else if (!select->prewhere().get())
+            {
+                if (fuzz_rand() % 50 == 0)
+                {
+                    select->setExpression(ASTSelectQuery::Expression::PREWHERE, select->where()->clone());
+
+                    if (fuzz_rand() % 2 == 0)
+                    {
+                        select->where()->children.clear();
+                        select->setExpression(ASTSelectQuery::Expression::WHERE, {});
+                    }
+                }
+            }
         }
         else if (fuzz_rand() % 50 == 0)
         {
             select->setExpression(ASTSelectQuery::Expression::WHERE, getRandomColumnLike());
         }
+
+        if (select->prewhere().get())
+        {
+            if (fuzz_rand() % 50 == 0)
+            {
+                select->prewhere()->children.clear();
+                select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+            }
+        }
+        else if (fuzz_rand() % 50 == 0)
+        {
+            select->setExpression(ASTSelectQuery::Expression::PREWHERE, getRandomColumnLike());
+        }
+
         fuzzOrderByList(select->orderBy().get());
 
         fuzz(select->children);

From 263a3f10c9287f73908b41d17e6127488a5b698f Mon Sep 17 00:00:00 2001
From: Alexander Gololobov <440544+davenger@users.noreply.github.com>
Date: Thu, 12 Jan 2023 14:11:30 +0100
Subject: [PATCH 244/262] Also copy or move PREWHERE to WHERE

---
 src/Client/QueryFuzzer.cpp | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/src/Client/QueryFuzzer.cpp b/src/Client/QueryFuzzer.cpp
index fc6c7f82af9..e150717db95 100644
--- a/src/Client/QueryFuzzer.cpp
+++ b/src/Client/QueryFuzzer.cpp
@@ -931,6 +931,19 @@ void QueryFuzzer::fuzz(ASTPtr & ast)
                 select->prewhere()->children.clear();
                 select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
             }
+            else if (!select->where().get())
+            {
+                if (fuzz_rand() % 50 == 0)
+                {
+                    select->setExpression(ASTSelectQuery::Expression::WHERE, select->prewhere()->clone());
+
+                    if (fuzz_rand() % 2 == 0)
+                    {
+                        select->prewhere()->children.clear();
+                        select->setExpression(ASTSelectQuery::Expression::PREWHERE, {});
+                    }
+                }
+            }
         }
         else if (fuzz_rand() % 50 == 0)
         {

From 72547d29e49e2216aa0f411306bb5dd2b3b3a22a Mon Sep 17 00:00:00 2001
From: Smita Kulkarni <Smita.Kulkarni@clickhouse.com>
Date: Thu, 12 Jan 2023 15:29:17 +0100
Subject: [PATCH 245/262] Added a test for merge join key condition with big
 int & decimal 02526_merge_join_int_decimal

---
 .../02526_merge_join_int_decimal.reference    |  8 ++++++
 .../02526_merge_join_int_decimal.sql          | 27 +++++++++++++++++++
 2 files changed, 35 insertions(+)
 create mode 100644 tests/queries/0_stateless/02526_merge_join_int_decimal.reference
 create mode 100644 tests/queries/0_stateless/02526_merge_join_int_decimal.sql

diff --git a/tests/queries/0_stateless/02526_merge_join_int_decimal.reference b/tests/queries/0_stateless/02526_merge_join_int_decimal.reference
new file mode 100644
index 00000000000..0bd0ea3927e
--- /dev/null
+++ b/tests/queries/0_stateless/02526_merge_join_int_decimal.reference
@@ -0,0 +1,8 @@
+3	3
+1	4
+1	4
+1	4
+1	4
+7	0	-9223372036854775807
+
+7	0	-9223372036854775807
diff --git a/tests/queries/0_stateless/02526_merge_join_int_decimal.sql b/tests/queries/0_stateless/02526_merge_join_int_decimal.sql
new file mode 100644
index 00000000000..b354f2020ab
--- /dev/null
+++ b/tests/queries/0_stateless/02526_merge_join_int_decimal.sql
@@ -0,0 +1,27 @@
+DROP TABLE IF EXISTS foo;
+DROP TABLE IF EXISTS foo1;
+DROP TABLE IF EXISTS foo_merge;
+DROP TABLE IF EXISTS t2;
+
+CREATE TABLE foo(Id Int32, Val Int32) Engine=MergeTree PARTITION BY Val ORDER BY Id;
+CREATE TABLE foo1(Id Int32, Val Decimal32(9)) Engine=MergeTree PARTITION BY Val ORDER BY Id;
+INSERT INTO foo SELECT number, number%5 FROM numbers(100000);
+INSERT INTO foo1 SELECT number, 1 FROM numbers(100000);
+
+CREATE TABLE foo_merge as foo ENGINE=Merge(currentDatabase(), '^foo');
+
+CREATE TABLE t2 (Id Int32, Val Int64, X UInt256) Engine=Memory;
+INSERT INTO t2 values (4, 3, 4);
+
+SELECT * FROM foo_merge WHERE Val = 3 AND Id = 3;
+SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 AND t2.X == 4 GROUP BY X;
+SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND (Id = 3 AND t2.X == 4) GROUP BY X;
+SELECT count(), X FROM foo_merge JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X;
+SELECT count(), X FROM (SELECT * FROM foo_merge) f JOIN t2 USING Val WHERE Val = 3 AND Id = 3 GROUP BY X;
+
+SELECT 7, count(1000.0001), -9223372036854775807 FROM foo_merge INNER JOIN t2 USING (Val) WHERE (((NULL AND -2 AND (Val = NULL)) AND (Id = NULL) AND (Val = NULL) AND (Id = NULL)) AND (Id = NULL) AND Val AND NULL) AND ((3 AND NULL AND -2147483648 AND (Val = NULL)) AND (Id = NULL) AND (Val = NULL)) AND ((NULL AND -2 AND (Val = NULL)) AND (Id = NULL) AND (Val = NULL)) AND 2147483647 WITH TOTALS;
+
+DROP TABLE IF EXISTS foo;
+DROP TABLE IF EXISTS foo1;
+DROP TABLE IF EXISTS foo_merge;
+DROP TABLE IF EXISTS t2;
\ No newline at end of file

From f119cdd5d856f5868c164cfee544190c21887f65 Mon Sep 17 00:00:00 2001
From: Anton Popov <anton@clickhouse.com>
Date: Thu, 12 Jan 2023 14:33:03 +0000
Subject: [PATCH 246/262] fix rare logical error: 'Too large alignment'

---
 src/Disks/IO/createReadBufferFromFileBase.cpp | 20 ++++++++-----------
 .../02369_lost_part_intersecting_merges.sh    |  3 ++-
 2 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp
index 96947ddb242..e2522da85c9 100644
--- a/src/Disks/IO/createReadBufferFromFileBase.cpp
+++ b/src/Disks/IO/createReadBufferFromFileBase.cpp
@@ -68,17 +68,17 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
         }
     }
 
-    auto create = [&](size_t buffer_size, int actual_flags)
+    auto create = [&](size_t buffer_size, size_t buffer_alignment, int actual_flags)
     {
         std::unique_ptr<ReadBufferFromFileBase> res;
 
         if (settings.local_fs_method == LocalFSReadMethod::read)
         {
-            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<ReadBufferFromFile>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap)
         {
-            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+            res = std::make_unique<ReadBufferFromFilePReadWithDescriptorsCache>(filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async)
         {
@@ -88,7 +88,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER);
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
         }
         else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool)
         {
@@ -98,7 +98,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 
             auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER);
             res = std::make_unique<AsynchronousReadBufferFromFileWithDescriptorsCache>(
-                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size);
+                reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, buffer_alignment, file_size);
         }
         else
             throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method");
@@ -129,11 +129,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
 
         auto align_up = [=](size_t value) { return (value + min_alignment - 1) / min_alignment * min_alignment; };
 
-        if (alignment == 0)
-            alignment = min_alignment;
-        else if (alignment % min_alignment)
-            alignment = align_up(alignment);
-
+        size_t buffer_alignment = alignment == 0 ? min_alignment : align_up(alignment);
         size_t buffer_size = settings.local_fs_buffer_size;
 
         if (buffer_size % min_alignment)
@@ -150,7 +146,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
         /// Attempt to open a file with O_DIRECT
         try
         {
-            std::unique_ptr<ReadBufferFromFileBase> res = create(buffer_size, flags | O_DIRECT);
+            std::unique_ptr<ReadBufferFromFileBase> res = create(buffer_size, buffer_alignment, flags | O_DIRECT);
             ProfileEvents::increment(ProfileEvents::CreatedReadBufferDirectIO);
             return res;
         }
@@ -171,7 +167,7 @@ std::unique_ptr<ReadBufferFromFileBase> createReadBufferFromFileBase(
     if (file_size.has_value() && *file_size < buffer_size)
         buffer_size = *file_size;
 
-    return create(buffer_size, flags);
+    return create(buffer_size, alignment, flags);
 }
 
 }
diff --git a/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh
index 88ff1f5b7c6..357c089e040 100755
--- a/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh
+++ b/tests/queries/0_stateless/02369_lost_part_intersecting_merges.sh
@@ -27,7 +27,8 @@ path=$($CLICKHOUSE_CLIENT -q "select path from system.parts where database='$CLI
 $CLICKHOUSE_CLIENT -q "select throwIf(substring('$path', 1, 1) != '/', 'Path is relative: $path')" || exit
 rm -rf $path
 
-$CLICKHOUSE_CLIENT -q "select * from rmt1;" 2>/dev/null
+$CLICKHOUSE_CLIENT -q "select * from rmt1;" 2>&1 | grep LOGICAL_ERROR
+$CLICKHOUSE_CLIENT --min_bytes_to_use_direct_io=1 --local_filesystem_read_method=pread_threadpool -q "select * from rmt1;" 2>&1 | grep LOGICAL_ERROR
 
 $CLICKHOUSE_CLIENT -q "detach table rmt1;"
 $CLICKHOUSE_CLIENT -q "attach table rmt1;"

From 69a11574d2606c7b07b51c40f44ed2c265e129eb Mon Sep 17 00:00:00 2001
From: rfraposa <richraposa@gmail.com>
Date: Thu, 12 Jan 2023 08:30:54 -0700
Subject: [PATCH 247/262] Update maxintersections.md

---
 .../aggregate-functions/reference/maxintersections.md           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
index f33b51f0374..6c30110970a 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -50,7 +50,7 @@ The intervals look like the following:
     3 - - - 7
 ```
 
-Notice that three of these intervals have the value 4 in common, and that is the most frequent number of intersections:
+Three of these intervals have the value 4 in common, and that is the most frequent number of intersections. (The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.)
 
 ```sql
 SELECT maxIntersections(start, end) FROM my_events;

From c6c17b9e3f7017a27ba24043d36fe2db94ef3ada Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Thu, 12 Jan 2023 16:33:00 +0100
Subject: [PATCH 248/262] Fixed tests

---
 src/Planner/PlannerJoinTree.cpp | 23 +++++++++++++++++++----
 1 file changed, 19 insertions(+), 4 deletions(-)

diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp
index 8ae39a90252..6a48f322ba5 100644
--- a/src/Planner/PlannerJoinTree.cpp
+++ b/src/Planner/PlannerJoinTree.cpp
@@ -735,6 +735,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             add_sorting(right_plan, join_clause.key_names_right, JoinTableSide::Right);
         }
 
+        auto join_pipeline_type = join_algorithm->pipelineType();
         auto join_step = std::make_unique<JoinStep>(
             left_plan.getCurrentDataStream(),
             right_plan.getCurrentDataStream(),
@@ -743,7 +744,7 @@ JoinTreeQueryPlan buildQueryPlanForJoinNode(const QueryTreeNodePtr & join_table_
             settings.max_threads,
             false /*optimize_read_in_order*/);
 
-        join_step->setStepDescription(fmt::format("JOIN {}", join_algorithm->pipelineType()));
+        join_step->setStepDescription(fmt::format("JOIN {}", join_pipeline_type));
 
         std::vector<QueryPlanPtr> plans;
         plans.emplace_back(std::make_unique<QueryPlan>(std::move(left_plan)));
@@ -834,12 +835,26 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node,
 {
     const auto & query_node_typed = query_node->as<QueryNode &>();
     auto table_expressions_stack = buildTableExpressionsStack(query_node_typed.getJoinTree());
-    bool is_single_table_expression = table_expressions_stack.size() == 1;
+    size_t table_expressions_stack_size = table_expressions_stack.size();
+    bool is_single_table_expression = table_expressions_stack_size == 1;
+
+    std::vector<ColumnIdentifierSet> table_expressions_outer_scope_columns(table_expressions_stack_size);
+    ColumnIdentifierSet current_outer_scope_columns = outer_scope_columns;
+
+    for (Int64 i = table_expressions_stack_size - 1; i >= 0; --i)
+    {
+        table_expressions_outer_scope_columns[i] = current_outer_scope_columns;
+
+        if (table_expressions_stack[i]->getNodeType() == QueryTreeNodeType::JOIN)
+            collectTopLevelColumnIdentifiers(table_expressions_stack[i], planner_context, current_outer_scope_columns);
+    }
 
     std::vector<JoinTreeQueryPlan> query_plans_stack;
 
-    for (auto & table_expression : table_expressions_stack)
+    for (size_t i = 0; i < table_expressions_stack_size; ++i)
     {
+        const auto & table_expression = table_expressions_stack[i];
+
         if (auto * array_join_node = table_expression->as<ArrayJoinNode>())
         {
             if (query_plans_stack.empty())
@@ -868,7 +883,7 @@ JoinTreeQueryPlan buildJoinTreeQueryPlan(const QueryTreeNodePtr & query_node,
             query_plans_stack.push_back(buildQueryPlanForJoinNode(table_expression,
                 std::move(left_query_plan),
                 std::move(right_query_plan),
-                outer_scope_columns,
+                table_expressions_outer_scope_columns[i],
                 planner_context));
         }
         else

From 1f2ec534f43b91dac5e504f18622c5d36284febb Mon Sep 17 00:00:00 2001
From: Antonio Andelic <antonio@clickhouse.com>
Date: Thu, 12 Jan 2023 15:39:04 +0000
Subject: [PATCH 249/262] Add EXISTS clauses

---
 tests/integration/test_keeper_map/test.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py
index 38abc252972..3809f046d55 100644
--- a/tests/integration/test_keeper_map/test.py
+++ b/tests/integration/test_keeper_map/test.py
@@ -108,14 +108,14 @@ def create_drop_loop(index, stop_event):
             return
 
         node.query_with_retry(
-            f"CREATE TABLE {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);"
+            f"CREATE TABLE IF NOT EXISTS {table_name} (key UInt64, value UInt64) ENGINE = KeeperMap('/test') PRIMARY KEY(key);"
         )
         node.query_with_retry(f"INSERT INTO {table_name} VALUES ({index}, {i})")
         result = node.query_with_retry(
             f"SELECT value FROM {table_name} WHERE key = {index}"
         )
         assert result.strip() == str(i)
-        node.query_with_retry(f"DROP TABLE {table_name} SYNC")
+        node.query_with_retry(f"DROP TABLE IF EXISTS {table_name} SYNC")
 
 
 def test_create_drop_keeper_map_concurrent(started_cluster):

From 759a4c0940e99020f340d9a72c21dd803a5eda81 Mon Sep 17 00:00:00 2001
From: Rich Raposa <richraposa@gmail.com>
Date: Thu, 12 Jan 2023 08:53:22 -0700
Subject: [PATCH 250/262] Update
 docs/en/sql-reference/aggregate-functions/reference/maxintersections.md

Co-authored-by: Dan Roscigno <dan@roscigno.com>
---
 .../aggregate-functions/reference/maxintersections.md           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
index 6c30110970a..29864394720 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -50,7 +50,7 @@ The intervals look like the following:
     3 - - - 7
 ```
 
-Three of these intervals have the value 4 in common, and that is the most frequent number of intersections. (The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.)
+Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections) . The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.
 
 ```sql
 SELECT maxIntersections(start, end) FROM my_events;

From 8c94ed95972acc1f4ba25c3f52a0c661278c6ced Mon Sep 17 00:00:00 2001
From: Dan Roscigno <dan@roscigno.com>
Date: Thu, 12 Jan 2023 11:01:03 -0500
Subject: [PATCH 251/262] Update
 docs/en/sql-reference/aggregate-functions/reference/maxintersections.md

---
 .../aggregate-functions/reference/maxintersections.md           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
index 29864394720..db99b900a3e 100644
--- a/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
+++ b/docs/en/sql-reference/aggregate-functions/reference/maxintersections.md
@@ -50,7 +50,7 @@ The intervals look like the following:
     3 - - - 7
 ```
 
-Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections) . The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.
+Three of these intervals have a common value (the value is `4`, but the value that is common is not important, we are measuring the count of the intersections). The intervals `(1,3)` and `(3,7)` share an endpoint but are not considered intersecting by the `maxIntersections` function.
 
 ```sql
 SELECT maxIntersections(start, end) FROM my_events;

From 81891f7bde8d57cb32c9d774a3a1912a2d1d9bea Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Thu, 12 Jan 2023 16:31:29 +0000
Subject: [PATCH 252/262] Do not compare empty const columns in
 checkBlockStructure

---
 src/Core/Block.cpp                                   | 12 +++++++++---
 .../02531_semi_join_null_const_bug.reference         |  0
 .../0_stateless/02531_semi_join_null_const_bug.sql   | 11 +++++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)
 create mode 100644 tests/queries/0_stateless/02531_semi_join_null_const_bug.reference
 create mode 100644 tests/queries/0_stateless/02531_semi_join_null_const_bug.sql

diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index 492766f8f51..29ffef3a0d0 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -91,14 +91,20 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con
                 expected.dumpStructure()),
             code);
 
-    if (isColumnConst(*actual.column) && isColumnConst(*expected.column))
+    if (isColumnConst(*actual.column) && isColumnConst(*expected.column)
+        && actual.column->size() > 0 && expected.column->size() > 0) /// don't check values in empty columns
     {
         Field actual_value = assert_cast<const ColumnConst &>(*actual.column).getField();
         Field expected_value = assert_cast<const ColumnConst &>(*expected.column).getField();
 
         if (actual_value != expected_value)
-            return onError<ReturnType>("Block structure mismatch in " + std::string(context_description) + " stream: different values of constants, actual: "
-                + applyVisitor(FieldVisitorToString(), actual_value) + ", expected: " + applyVisitor(FieldVisitorToString(), expected_value),
+            return onError<ReturnType>(
+                fmt::format(
+                    "Block structure mismatch in {} stream: different values of constants in column '{}': actual: {}, expected: {}",
+                    context_description,
+                    actual.name,
+                    applyVisitor(FieldVisitorToString(), actual_value),
+                    applyVisitor(FieldVisitorToString(), expected_value)),
                 code);
     }
 
diff --git a/tests/queries/0_stateless/02531_semi_join_null_const_bug.reference b/tests/queries/0_stateless/02531_semi_join_null_const_bug.reference
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/tests/queries/0_stateless/02531_semi_join_null_const_bug.sql b/tests/queries/0_stateless/02531_semi_join_null_const_bug.sql
new file mode 100644
index 00000000000..6f7412ad455
--- /dev/null
+++ b/tests/queries/0_stateless/02531_semi_join_null_const_bug.sql
@@ -0,0 +1,11 @@
+SET join_use_nulls = 1;
+
+SELECT b.id
+FROM (
+    SELECT toLowCardinality(0 :: UInt32) AS id
+    GROUP BY []
+) AS a
+SEMI LEFT JOIN (
+    SELECT toLowCardinality(1 :: UInt64) AS id
+) AS b
+USING (id);

From cb6eee6714e74e283cb19edb2c923c2295518cd7 Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Thu, 12 Jan 2023 12:19:04 -0500
Subject: [PATCH 253/262] Introduced the new test file
 01016_simhash_minhash_ppc.sql and modified the test file using Tags feature

---
 .../0_stateless/01016_simhash_minhash.python  | 394 ------------------
 .../01016_simhash_minhash.reference           | 220 ++++++----
 .../0_stateless/01016_simhash_minhash.sh      |   8 -
 .../0_stateless/01016_simhash_minhash.sql     | 118 ++++++
 .../01016_simhash_minhash.x86_64.reference    | 152 -------
 ...ce => 01016_simhash_minhash_ppc.reference} |  87 ++--
 .../0_stateless/01016_simhash_minhash_ppc.sql | 118 ++++++
 7 files changed, 414 insertions(+), 683 deletions(-)
 delete mode 100644 tests/queries/0_stateless/01016_simhash_minhash.python
 delete mode 100755 tests/queries/0_stateless/01016_simhash_minhash.sh
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.sql
 delete mode 100644 tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
 rename tests/queries/0_stateless/{01016_simhash_minhash.ppc64le.reference => 01016_simhash_minhash_ppc.reference} (85%)
 create mode 100644 tests/queries/0_stateless/01016_simhash_minhash_ppc.sql

diff --git a/tests/queries/0_stateless/01016_simhash_minhash.python b/tests/queries/0_stateless/01016_simhash_minhash.python
deleted file mode 100644
index 1d6eae456c1..00000000000
--- a/tests/queries/0_stateless/01016_simhash_minhash.python
+++ /dev/null
@@ -1,394 +0,0 @@
-#!/usr/bin/env python3                                                                                                                                                                                              
-import os
-import socket
-import sys
-from scipy import stats
-import pandas as pd
-import numpy as np
-import shutil
-import platform
-
-import uuid
-
-CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1')
-CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000'))
-CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default')
-
-
-CURDIR = os.path.dirname(os.path.realpath(__file__))
-sys.path.insert(0, os.path.join(CURDIR, 'helpers'))
-
-from pure_http_client import ClickHouseClient
-
-if platform.machine() == "ppc64le":
-    shutil.copyfile(CURDIR + "/01016_simhash_minhash.ppc64le.reference", CURDIR + "/01016_simhash_minhash.reference")
-elif platform.machine() == "x86_64" :
-    shutil.copyfile(CURDIR + "/01016_simhash_minhash.x86_64.reference", CURDIR + "/01016_simhash_minhash.reference")
-
-def writeVarUInt(x, ba):
-    for _ in range(0, 9):
-
-        byte = x & 0x7F
-        if x > 0x7F:
-            byte |= 0x80
-
-        ba.append(byte)
-
-        x >>= 7
-        if x == 0:
-            return
-
-def writeStringBinary(s, ba):
-    b = bytes(s, 'utf-8')
-    writeVarUInt(len(s), ba)
-    ba.extend(b)
-
-def readStrict(s, size = 1):
-    res = bytearray()
-    while size:
-        cur = s.recv(size)
-        # if not res:
-        #     raise "Socket is closed"
-        size -= len(cur)
-        res.extend(cur)
-
-    return res
-
-def readUInt(s, size=1):
-    res = readStrict(s, size)
-    val = 0
-    for i in range(len(res)):
-        val += res[i] << (i * 8)
-    return val
-
-def readUInt8(s):
-    return readUInt(s)
-
-def readUInt16(s):
-    return readUInt(s, 2)
-
-def readUInt32(s):
-    return readUInt(s, 4)
-
-def readUInt64(s):
-    return readUInt(s, 8)
-
-def readVarUInt(s):
-    x = 0
-    for i in range(9):
-        byte = readStrict(s)[0]
-        x |= (byte & 0x7F) << (7 * i)
-
-        if not byte & 0x80:
-            return x
-
-    return x
-
-def readStringBinary(s):
-    size = readVarUInt(s)
-    s = readStrict(s, size)
-    return s.decode('utf-8')
-
-def sendHello(s):
-    ba = bytearray()
-    writeVarUInt(0, ba) # Hello
-    writeStringBinary('simple native protocol', ba)
-    writeVarUInt(21, ba)
-    writeVarUInt(9, ba)
-    writeVarUInt(54449, ba)
-    writeStringBinary('default', ba) # database
-    writeStringBinary('default', ba) # user
-    writeStringBinary('', ba) # pwd
-    s.sendall(ba)
-
-
-def receiveHello(s):
-    p_type = readVarUInt(s)
-    assert (p_type == 0) # Hello
-    server_name = readStringBinary(s)
-    # print("Server name: ", server_name)
-    server_version_major = readVarUInt(s)
-    # print("Major: ", server_version_major)
-    server_version_minor = readVarUInt(s)
-    # print("Minor: ", server_version_minor)
-    server_revision = readVarUInt(s)
-    # print("Revision: ", server_revision)
-    server_timezone = readStringBinary(s)
-    # print("Timezone: ", server_timezone)
-    server_display_name = readStringBinary(s)
-    # print("Display name: ", server_display_name)
-    server_version_patch = readVarUInt(s)
-    # print("Version patch: ", server_version_patch)
-
-def serializeClientInfo(ba, query_id):
-    writeStringBinary('default', ba) # initial_user
-    writeStringBinary(query_id, ba) # initial_query_id
-    writeStringBinary('127.0.0.1:9000', ba) # initial_address
-    ba.extend([0] * 8) # initial_query_start_time_microseconds
-    ba.append(1) # TCP
-    writeStringBinary('os_user', ba) # os_user
-    writeStringBinary('client_hostname', ba) # client_hostname
-    writeStringBinary('client_name', ba) # client_name
-    writeVarUInt(21, ba)
-    writeVarUInt(9, ba)
-    writeVarUInt(54449, ba)
-    writeStringBinary('', ba) # quota_key
-    writeVarUInt(0, ba) # distributed_depth
-    writeVarUInt(1, ba) # client_version_patch
-    ba.append(0) # No telemetry
-
-def sendQuery(s, query):
-    ba = bytearray()
-    query_id = uuid.uuid4().hex
-    writeVarUInt(1, ba) # query
-    writeStringBinary(query_id, ba)
-
-    ba.append(1) # INITIAL_QUERY
-
-    # client info
-    serializeClientInfo(ba, query_id)
-
-    writeStringBinary('', ba) # No settings
-    writeStringBinary('', ba) # No interserver secret
-    writeVarUInt(2, ba) # Stage - Complete
-    ba.append(0) # No compression
-    writeStringBinary(query, ba) # query, finally
-    s.sendall(ba)
-
-def serializeBlockInfo(ba):
-    writeVarUInt(1, ba) # 1
-    ba.append(0) # is_overflows
-    writeVarUInt(2, ba) # 2
-    writeVarUInt(0, ba) # 0
-    ba.extend([0] * 4) # bucket_num
-
-def sendEmptyBlock(s):
-    ba = bytearray()
-    writeVarUInt(2, ba) # Data
-    writeStringBinary('', ba)
-    serializeBlockInfo(ba)
-    writeVarUInt(0, ba) # rows
-    writeVarUInt(0, ba) # columns
-    s.sendall(ba)
-
-def assertPacket(packet, expected):
-    assert(packet == expected), packet
-
-def readException(s):
-    code = readUInt32(s)
-    name = readStringBinary(s)
-    text = readStringBinary(s)
-    readStringBinary(s) # trace
-    assertPacket(readUInt8(s), 0) # has_nested
-    sys.stdout.write("code {}: {}".format(code, text.replace('DB::Exception:', '')))
-
-
-def test():
-    client = ClickHouseClient()
-
-    res = client.query("SELECT ngramSimHash('')")
-    sys.stdout.write(res)
-    res=client.query("SELECT ngramSimHash('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramSimHashCaseInsensitive('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramSimHashUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHash('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHashCaseInsensitive('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHashUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-
-    res = client.query("SELECT ngramMinHash('')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHash('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHashCaseInsensitive('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHashUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHash('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHashCaseInsensitive('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHashUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.')")
-    sys.stdout.write(res)
-
-    client.query("DROP TABLE IF EXISTS defaults")
-    client.query("CREATE TABLE defaults(s String) ENGINE = Memory()")
-    client.query("INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.')")
-
-    res = client.query("SELECT ngramSimHash(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramSimHashCaseInsensitive(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramSimHashUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHash(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHashUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-
-    res = client.query("SELECT ngramMinHash(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHashCaseInsensitive(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHashUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHash(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHashUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-    res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults")
-    sys.stdout.write(res)
-
-    client.query("TRUNCATE TABLE defaults")
-    client.query("INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', 'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'))")
-
-    res = client.query("SELECT 'uniqExact', uniqExact(s) FROM defaults")
-    sys.stdout.write(res)
-
-    res = client.query("SELECT 'ngramSimHash'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'ngramSimHashCaseInsensitive'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'ngramSimHashUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'ngramSimHashCaseInsensitiveUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleSimHash'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleSimHashCaseInsensitive'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleSimHashUTF8'")
-    sys.stdout.write(res)
-    res =  client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleSimHashCaseInsensitiveUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-
-    res = client.query("SELECT 'ngramMinHash'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'ngramMinHashCaseInsensitive'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'ngramMinHashUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'ngramMinHashCaseInsensitiveUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleMinHash'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleMinHashCaseInsensitive'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleMinHashUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-    res = client.query("SELECT 'wordShingleMinHashCaseInsensitiveUTF8'")
-    sys.stdout.write(res)
-    res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h")
-    sys.stdout.write(res)
-
-    wordShingleSimHashInvalidArg1()
-
-    wordShingleSimHashInvalidArg2()
-
-    wordShingleSimHashInvalidArg3()
-    #client.query("DROP TABLE defaults")
-
-def wordShingleSimHashInvalidArg1():
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.settimeout(30)
-        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
-        sendHello(s)
-        receiveHello(s)
-        sendQuery(s, "SELECT wordShingleSimHash('foobar', 9223372036854775807)")
-
-        # Fin block
-        sendEmptyBlock(s)
-
-
-        assertPacket(readVarUInt(s), 2)
-        print(readException(s))
-        s.close()
-
-
-def wordShingleSimHashInvalidArg2():
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.settimeout(30)
-        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
-        sendHello(s)
-        receiveHello(s)
-        sendQuery(s, "SELECT wordShingleSimHash('foobar', 1001)")
-
-        # Fin block
-        sendEmptyBlock(s)
-
-        assertPacket(readVarUInt(s), 2)
-        print(readException(s))
-        s.close()
-
-
-def wordShingleSimHashInvalidArg3():
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-        s.settimeout(30)
-        s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT))
-        sendHello(s)
-        receiveHello(s)
-        sendQuery(s, "SELECT wordShingleSimHash('foobar', 0)")
-
-        # Fin block
-        sendEmptyBlock(s)
-
-        assertPacket(readVarUInt(s), 2)
-        print(readException(s))
-        s.close()
-
-if __name__ == "__main__":
-    test()
-    #wordShingleSimHashInvalidArg1()
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference
index 9d3ff35efb0..47db393e571 100644
--- a/tests/queries/0_stateless/01016_simhash_minhash.reference
+++ b/tests/queries/0_stateless/01016_simhash_minhash.reference
@@ -1,77 +1,106 @@
 18446744073709551615
-130877626
-130877626
-2414681787
-2414681787
-3795742796
-3795742796
-3795742796
-3795742796
+1737075136
+1737075136
+4018781633
+4018781633
+1846985414
+1846985414
+1846985414
+1846985414
 (10693559443859979498,10693559443859979498)
-(12862934800683464900,12912608544812513109)
-(12862934800683464900,12912608544812513109)
-(5701637312405877447,12912608544812513109)
-(5701637312405877447,12912608544812513109)
-(17357047205102710216,17357047205102710216)
-(17357047205102710216,17357047205102710216)
-(17357047205102710216,17357047205102710216)
-(17357047205102710216,17357047205102710216)
-3562273581
-3579050789
-3562257197
-3562258213
-3579050797
-3579050757
-3562258221
-3562258181
-3004171816
-2584740395
-437257770
-2651981610
-3004171816
-2584740395
-437257770
-2651981610
-(17614245890954671019,12771214424940442770)
-(17614245890954671019,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(17614245890954671019,12771214424940442770)
-(17614245890954671019,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(14260447771268573594,5578182242585518316)
-(14260447771268573594,16377939020851853906)
-(4363920713808688881,5013693163726625177)
-(14260447771268573594,3863279269132177973)
-(14260447771268573594,5578182242585518316)
-(14260447771268573594,16377939020851853906)
-(4363920713808688881,5013693163726625177)
-(14260447771268573594,3863279269132177973)
+(12279482788274235946,6436413987527322272)
+(12279482788274235946,6436413987527322272)
+(13257488272755813409,6436413987527322272)
+(13257488272755813409,6436413987527322272)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+(13762864994695140861,13762864994695140861)
+3023525975
+3040303199
+3023509591
+3023510623
+3040303191
+3040303191
+3023510615
+3023510615
+1999952988
+926211140
+1999699532
+1999683148
+1999952988
+926211140
+1999699532
+1999683148
+(16071125717475221203,9592059329600248798)
+(16071125717475221203,1914899959549098907)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,9592059329600248798)
+(16071125717475221203,1914899959549098907)
+(16071125717475221203,7986182634218042944)
+(16071125717475221203,7986182634218042944)
+(10576877560263640956,4278250516018530743)
+(16211512098526494023,11479872370566432466)
+(13515070557027359649,17725505493832406849)
+(12589381623326290380,575343713614534202)
+(10576877560263640956,4278250516018530743)
+(16211512098526494023,11479872370566432466)
+(13515070557027359649,17725505493832406849)
+(12589381623326290380,575343713614534202)
 uniqExact	6
 ngramSimHash
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2857686823
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567843
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2293265501
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3392173149
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054169
 ngramSimHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891240999
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3908359975
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
 ngramSimHashUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2924795687
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3159676711
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3897874215
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2284876893
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3459282013
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3694163037
 ngramSimHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891241255
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
 wordShingleSimHash
+<<<<<<< HEAD
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+wordShingleSimHashCaseInsensitive
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+wordShingleSimHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+wordShingleSimHashCaseInsensitiveUTF8
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+=======
 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
@@ -99,27 +128,53 @@ ClickHouse makes full use of all available hardware to process every request as
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
+>>>>>>> 28d5c3cf7f0bc6b340791bdd08cea4b2987002cd
 ngramMinHash
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
 ngramMinHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
 ngramMinHashUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
 ngramMinHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
 wordShingleMinHash
+<<<<<<< HEAD
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+wordShingleMinHashCaseInsensitive
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+wordShingleMinHashUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+wordShingleMinHashCaseInsensitiveUTF8
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+=======
 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
@@ -147,6 +202,7 @@ ClickHouse makes full use of all available hardware to process each request as q
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
+>>>>>>> 28d5c3cf7f0bc6b340791bdd08cea4b2987002cd
 code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
 code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
 code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sh b/tests/queries/0_stateless/01016_simhash_minhash.sh
deleted file mode 100755
index 94bac7efacb..00000000000
--- a/tests/queries/0_stateless/01016_simhash_minhash.sh
+++ /dev/null
@@ -1,8 +0,0 @@
-#!/usr/bin/env bash
-
-CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-# shellcheck source=../shell_config.sh
-. "$CURDIR"/../shell_config.sh
-
-python3 "$CURDIR"/01016_simhash_minhash.python
-
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql
new file mode 100644
index 00000000000..5494416a905
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash.sql
@@ -0,0 +1,118 @@
+-- Tags: no-cpu-ppc64le
+-- Tag no-cpu-ppc64le: Depending on the target platform, CRC32C function returns different hash values. So, should not run on PowerPC. Whenever a new test gets added here, same has to be updated in 01016_simhash_minhash_ppc.sql
+ 
+SELECT ngramSimHash('');
+SELECT ngramSimHash('what a cute cat.');
+SELECT ngramSimHashCaseInsensitive('what a cute cat.');
+SELECT ngramSimHashUTF8('what a cute cat.');
+SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.');
+SELECT wordShingleSimHash('what a cute cat.');
+SELECT wordShingleSimHashCaseInsensitive('what a cute cat.');
+SELECT wordShingleSimHashUTF8('what a cute cat.');
+SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.');
+
+SELECT ngramMinHash('');
+SELECT ngramMinHash('what a cute cat.');
+SELECT ngramMinHashCaseInsensitive('what a cute cat.');
+SELECT ngramMinHashUTF8('what a cute cat.');
+SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.');
+SELECT wordShingleMinHash('what a cute cat.');
+SELECT wordShingleMinHashCaseInsensitive('what a cute cat.');
+SELECT wordShingleMinHashUTF8('what a cute cat.');
+SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.');
+
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+   s String
+)ENGINE = Memory();
+
+INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.');
+
+SELECT ngramSimHash(s) FROM defaults;
+SELECT ngramSimHashCaseInsensitive(s) FROM defaults;
+SELECT ngramSimHashUTF8(s) FROM defaults;
+SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults;
+SELECT wordShingleSimHash(s) FROM defaults;
+SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults;
+SELECT wordShingleSimHashUTF8(s) FROM defaults;
+SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults;
+
+SELECT ngramMinHash(s) FROM defaults;
+SELECT ngramMinHashCaseInsensitive(s) FROM defaults;
+SELECT ngramMinHashUTF8(s) FROM defaults;
+SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults;
+SELECT wordShingleMinHash(s) FROM defaults;
+SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults;
+SELECT wordShingleMinHashUTF8(s) FROM defaults;
+SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults;
+
+TRUNCATE TABLE defaults;
+INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n',
+'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.
+ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.
+ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'
+));
+
+SELECT 'uniqExact', uniqExact(s) FROM defaults;
+
+
+SELECT 'ngramSimHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramSimHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramSimHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramSimHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+
+SELECT 'ngramMinHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramMinHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramMinHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramMinHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+
+SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 }
+SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 }
+SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 }
+
+DROP TABLE defaults;
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
deleted file mode 100644
index 9d3ff35efb0..00000000000
--- a/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference
+++ /dev/null
@@ -1,152 +0,0 @@
-18446744073709551615
-130877626
-130877626
-2414681787
-2414681787
-3795742796
-3795742796
-3795742796
-3795742796
-(10693559443859979498,10693559443859979498)
-(12862934800683464900,12912608544812513109)
-(12862934800683464900,12912608544812513109)
-(5701637312405877447,12912608544812513109)
-(5701637312405877447,12912608544812513109)
-(17357047205102710216,17357047205102710216)
-(17357047205102710216,17357047205102710216)
-(17357047205102710216,17357047205102710216)
-(17357047205102710216,17357047205102710216)
-3562273581
-3579050789
-3562257197
-3562258213
-3579050797
-3579050757
-3562258221
-3562258181
-3004171816
-2584740395
-437257770
-2651981610
-3004171816
-2584740395
-437257770
-2651981610
-(17614245890954671019,12771214424940442770)
-(17614245890954671019,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(17614245890954671019,12771214424940442770)
-(17614245890954671019,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(7128473921279637957,12771214424940442770)
-(14260447771268573594,5578182242585518316)
-(14260447771268573594,16377939020851853906)
-(4363920713808688881,5013693163726625177)
-(14260447771268573594,3863279269132177973)
-(14260447771268573594,5578182242585518316)
-(14260447771268573594,16377939020851853906)
-(4363920713808688881,5013693163726625177)
-(14260447771268573594,3863279269132177973)
-uniqExact	6
-ngramSimHash
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2857686823
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567843
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
-ngramSimHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891240999
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3908359975
-ngramSimHashUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2924795687
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3159676711
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3897874215
-ngramSimHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891241255
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
-wordShingleSimHash
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
-wordShingleSimHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
-wordShingleSimHashUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
-wordShingleSimHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
-ngramMinHash
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
-ngramMinHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
-ngramMinHashUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
-ngramMinHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
-wordShingleMinHash
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
-wordShingleMinHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
-wordShingleMinHashUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
-wordShingleMinHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
diff --git a/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference b/tests/queries/0_stateless/01016_simhash_minhash_ppc.reference
similarity index 85%
rename from tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
rename to tests/queries/0_stateless/01016_simhash_minhash_ppc.reference
index 2acad33320b..08d6bf04007 100644
--- a/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference
+++ b/tests/queries/0_stateless/01016_simhash_minhash_ppc.reference
@@ -72,33 +72,33 @@ ClickHouse makes full use of all available hardware to process every request as
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
 wordShingleSimHash
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	192157020
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	460591452
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1492386136
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1525941084
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2339636568
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3401122928
 wordShingleSimHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	183785812
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1525943132
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2199148880
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2199148884
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3400551536
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3673512784
 wordShingleSimHashUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	192157020
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	460591452
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1492386136
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1525941084
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2339636568
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3401122928
 wordShingleSimHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	183785812
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1525943132
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2199148880
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2199148884
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3400551536
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3673512784
 ngramMinHash
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
@@ -120,29 +120,22 @@ ClickHouse uses all available hardware to its full potential to process each que
 ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
 ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
 wordShingleMinHash
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(6579710252960108857,2848666928617645043)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16802224947162838854,4032169656367376737)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(16802224947162838854,17232647740399944031)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(17996725009512358105,9079979506678996383)
 wordShingleMinHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(6579710252960108857,2848666928617645043)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(16802224947162838854,334416161876576673)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16802224947162838854,12756399179623007102)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(17996725009512358105,9385516997538506173)
 wordShingleMinHashUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(6579710252960108857,2848666928617645043)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16802224947162838854,4032169656367376737)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(16802224947162838854,17232647740399944031)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(17996725009512358105,9079979506678996383)
 wordShingleMinHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(6579710252960108857,2848666928617645043)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(16802224947162838854,334416161876576673)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16802224947162838854,12756399179623007102)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(17996725009512358105,9385516997538506173)
diff --git a/tests/queries/0_stateless/01016_simhash_minhash_ppc.sql b/tests/queries/0_stateless/01016_simhash_minhash_ppc.sql
new file mode 100644
index 00000000000..9d5d1297dfe
--- /dev/null
+++ b/tests/queries/0_stateless/01016_simhash_minhash_ppc.sql
@@ -0,0 +1,118 @@
+-- Tags: no-cpu-x86_64, no-cpu-aarch64
+-- Tag no-cpu-x86_64 and no-cpu-aarch64: Depending on the target platform, CRC32C function returns different hash values. So, should not run on X86_64 and ARM. Whenever a new test gets added here, same has to be updated in 01016_simhash_minhash.sql
+
+SELECT ngramSimHash('');
+SELECT ngramSimHash('what a cute cat.');
+SELECT ngramSimHashCaseInsensitive('what a cute cat.');
+SELECT ngramSimHashUTF8('what a cute cat.');
+SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.');
+SELECT wordShingleSimHash('what a cute cat.');
+SELECT wordShingleSimHashCaseInsensitive('what a cute cat.');
+SELECT wordShingleSimHashUTF8('what a cute cat.');
+SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.');
+
+SELECT ngramMinHash('');
+SELECT ngramMinHash('what a cute cat.');
+SELECT ngramMinHashCaseInsensitive('what a cute cat.');
+SELECT ngramMinHashUTF8('what a cute cat.');
+SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.');
+SELECT wordShingleMinHash('what a cute cat.');
+SELECT wordShingleMinHashCaseInsensitive('what a cute cat.');
+SELECT wordShingleMinHashUTF8('what a cute cat.');
+SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.');
+
+DROP TABLE IF EXISTS defaults;
+CREATE TABLE defaults
+(
+   s String
+)ENGINE = Memory();
+
+INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.');
+
+SELECT ngramSimHash(s) FROM defaults;
+SELECT ngramSimHashCaseInsensitive(s) FROM defaults;
+SELECT ngramSimHashUTF8(s) FROM defaults;
+SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults;
+SELECT wordShingleSimHash(s) FROM defaults;
+SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults;
+SELECT wordShingleSimHashUTF8(s) FROM defaults;
+SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults;
+
+SELECT ngramMinHash(s) FROM defaults;
+SELECT ngramMinHashCaseInsensitive(s) FROM defaults;
+SELECT ngramMinHashUTF8(s) FROM defaults;
+SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults;
+SELECT wordShingleMinHash(s) FROM defaults;
+SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults;
+SELECT wordShingleMinHashUTF8(s) FROM defaults;
+SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults;
+
+TRUNCATE TABLE defaults;
+INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n',
+'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.
+ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.
+ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.
+
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.
+ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.
+ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'
+));
+
+SELECT 'uniqExact', uniqExact(s) FROM defaults;
+
+
+SELECT 'ngramSimHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramSimHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramSimHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramSimHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleSimHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h;
+
+SELECT 'ngramMinHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramMinHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramMinHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'ngramMinHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHash';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHashCaseInsensitive';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHashUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+SELECT 'wordShingleMinHashCaseInsensitiveUTF8';
+SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h;
+
+SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 }
+SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 }
+SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 }
+
+DROP TABLE defaults;

From 983cac0a64aa3761240e9a537f8007fb63554594 Mon Sep 17 00:00:00 2001
From: MeenaRenganathan22 <Meena.Renganathan@ibm.com>
Date: Thu, 12 Jan 2023 12:25:42 -0500
Subject: [PATCH 254/262] Correcting the wrong commit for
 01016_simhash_minhash.reference

---
 .../01016_simhash_minhash.reference           | 311 +++++++-----------
 1 file changed, 122 insertions(+), 189 deletions(-)

diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference
index 47db393e571..d4fdcfea6a5 100644
--- a/tests/queries/0_stateless/01016_simhash_minhash.reference
+++ b/tests/queries/0_stateless/01016_simhash_minhash.reference
@@ -1,208 +1,141 @@
 18446744073709551615
-1737075136
-1737075136
-4018781633
-4018781633
-1846985414
-1846985414
-1846985414
-1846985414
+130877626
+130877626
+2414681787
+2414681787
+3795742796
+3795742796
+3795742796
+3795742796
 (10693559443859979498,10693559443859979498)
-(12279482788274235946,6436413987527322272)
-(12279482788274235946,6436413987527322272)
-(13257488272755813409,6436413987527322272)
-(13257488272755813409,6436413987527322272)
-(13762864994695140861,13762864994695140861)
-(13762864994695140861,13762864994695140861)
-(13762864994695140861,13762864994695140861)
-(13762864994695140861,13762864994695140861)
-3023525975
-3040303199
-3023509591
-3023510623
-3040303191
-3040303191
-3023510615
-3023510615
-1999952988
-926211140
-1999699532
-1999683148
-1999952988
-926211140
-1999699532
-1999683148
-(16071125717475221203,9592059329600248798)
-(16071125717475221203,1914899959549098907)
-(16071125717475221203,7986182634218042944)
-(16071125717475221203,7986182634218042944)
-(16071125717475221203,9592059329600248798)
-(16071125717475221203,1914899959549098907)
-(16071125717475221203,7986182634218042944)
-(16071125717475221203,7986182634218042944)
-(10576877560263640956,4278250516018530743)
-(16211512098526494023,11479872370566432466)
-(13515070557027359649,17725505493832406849)
-(12589381623326290380,575343713614534202)
-(10576877560263640956,4278250516018530743)
-(16211512098526494023,11479872370566432466)
-(13515070557027359649,17725505493832406849)
-(12589381623326290380,575343713614534202)
+(12862934800683464900,12912608544812513109)
+(12862934800683464900,12912608544812513109)
+(5701637312405877447,12912608544812513109)
+(5701637312405877447,12912608544812513109)
+(17357047205102710216,17357047205102710216)
+(17357047205102710216,17357047205102710216)
+(17357047205102710216,17357047205102710216)
+(17357047205102710216,17357047205102710216)
+3562273581
+3579050789
+3562257197
+3562258213
+3579050797
+3579050757
+3562258221
+3562258181
+3004171816
+2584740395
+437257770
+2651981610
+3004171816
+2584740395
+437257770
+2651981610
+(17614245890954671019,12771214424940442770)
+(17614245890954671019,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(17614245890954671019,12771214424940442770)
+(17614245890954671019,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(7128473921279637957,12771214424940442770)
+(14260447771268573594,5578182242585518316)
+(14260447771268573594,16377939020851853906)
+(4363920713808688881,5013693163726625177)
+(14260447771268573594,3863279269132177973)
+(14260447771268573594,5578182242585518316)
+(14260447771268573594,16377939020851853906)
+(4363920713808688881,5013693163726625177)
+(14260447771268573594,3863279269132177973)
 uniqExact	6
 ngramSimHash
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2293265501
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3392173149
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054169
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2857686823
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567843
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
 ngramSimHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891240999
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3908359975
 ngramSimHashUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	1211135069
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1546679389
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2284876893
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	3459282013
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3694163037
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	676648743
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1012193063
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2924795687
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3159676711
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3897874215
 ngramSimHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	2291168349
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	3358618717
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3425727581
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3627054429
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	2824132391
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2891241255
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	3092567591
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	3906262823
 wordShingleSimHash
-<<<<<<< HEAD
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215014
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215270
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	425963587
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	563598566
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	857724390
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	991679910
 wordShingleSimHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	420713958
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	421737795
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	429118950
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	959182215
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	964941252
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	965465540
 wordShingleSimHashUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	10637533
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	171136201
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	209864029
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353165
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	413353677
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	418595033
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215014
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	404215270
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	425963587
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	563598566
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	857724390
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	991679910
 wordShingleSimHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	218252892
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1218592985
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1613919433
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2080524225
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2088912577
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2094163657
-=======
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
-wordShingleSimHashCaseInsensitive
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
-wordShingleSimHashUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	163730020
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1863866568
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2066765888
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2131775692
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132302028
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	2133610504
-wordShingleSimHashCaseInsensitiveUTF8
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	769814628
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	1851412545
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	1983533133
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2121947213
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2132430916
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	2134530116
->>>>>>> 28d5c3cf7f0bc6b340791bdd08cea4b2987002cd
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	420713958
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	421737795
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	429118950
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	959182215
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	964941252
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	965465540
 ngramMinHash
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
 ngramMinHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
 ngramMinHashUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,17443426065825246292)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,17443426065825246292)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,17443426065825246292)
 ngramMinHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(2793448378579182412,5526633106516004292)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(8530889421347045182,5150364204263408031)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(8992738078100405992,5526633106516004292)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(15193387305258759701,5526633106516004292)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(4388091710993602029,17613327300639166679)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(6021986790841777095,8535005350590298790)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(7962672159337006560,8535005350590298790)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(13225377334870249827,8535005350590298790)
 wordShingleMinHash
-<<<<<<< HEAD
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,12338022931991160906)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(18148981179837829400,6048943706095721476)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(18148981179837829400,14581416672396321264)
 wordShingleMinHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,3381836163833256482)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(15504011608613565061,6048943706095721476)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(15504011608613565061,14581416672396321264)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
 wordShingleMinHashUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(3409292695558556998,3242671779450421938)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(11981468198903037199,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(12852656749419794093,678630951345180105)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,410122209669519134)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13105381013738345838,3365040177160857031)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,12338022931991160906)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(18148981179837829400,6048943706095721476)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(18148981179837829400,14581416672396321264)
 wordShingleMinHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(712181695272576370,125062659592971094)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	2	(3404326999173181417,12067981913120463876)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(13918035273694643957,5500630346333489583)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,12467125901844798869)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(14132553626943388792,17567683680214055861)
-=======
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
-wordShingleMinHashCaseInsensitive
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
-wordShingleMinHashUTF8
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(2737777099019241270,12203217272515755130)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,5291917846812693075)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,8290914314000593271)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5061606110519186545,13404711269494939830)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(9049684948427678934,525844926417235186)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(13984163398937596233,5291917846812693075)
-wordShingleMinHashCaseInsensitiveUTF8
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,304181940976393091)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,2742255228205943790)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(5915334596853187377,4737570281654602452)
-ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(8339553084913780125,304181940976393091)
-ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(16416045251850351268,9014309695588044244)
-ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(18035669763176492916,17383752913124421136)
->>>>>>> 28d5c3cf7f0bc6b340791bdd08cea4b2987002cd
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None
-code 69:  Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	3	(5044918525503962090,3381836163833256482)
+ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.	1	(15504011608613565061,6048943706095721476)
+ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.	1	(15504011608613565061,14581416672396321264)
+ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.	1	(16224204290372720939,13975393268888698430)

From e21fe952c6ffb8262104412edac88f598927041e Mon Sep 17 00:00:00 2001
From: Nikolai Kochetov <KochetovNicolai@users.noreply.github.com>
Date: Thu, 12 Jan 2023 18:45:19 +0100
Subject: [PATCH 255/262] Update test_ttl_move_memory_usage.py

---
 .../test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py        | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
index 2f99b066e90..9d53b7c048b 100644
--- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
+++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py
@@ -22,9 +22,6 @@ def started_single_node_cluster():
 
 
 def test_move_and_s3_memory_usage(started_single_node_cluster):
-
-    pytest.skip("Test is too flaky. Disable it for now.")
-
     if small_node.is_built_with_sanitizer() or small_node.is_debug_build():
         pytest.skip("Disabled for debug and sanitizers. Too slow.")
 

From 82e73b583ea91537086c995d8061de7475983613 Mon Sep 17 00:00:00 2001
From: robot-clickhouse <robot-clickhouse@users.noreply.github.com>
Date: Thu, 12 Jan 2023 19:22:22 +0000
Subject: [PATCH 256/262] Update version_date.tsv and changelogs after
 v22.3.17.13-lts

---
 docs/changelogs/v22.3.17.13-lts.md   | 16 ++++++++++++++++
 utils/list-versions/version_date.tsv |  1 +
 2 files changed, 17 insertions(+)
 create mode 100644 docs/changelogs/v22.3.17.13-lts.md

diff --git a/docs/changelogs/v22.3.17.13-lts.md b/docs/changelogs/v22.3.17.13-lts.md
new file mode 100644
index 00000000000..201f74802e3
--- /dev/null
+++ b/docs/changelogs/v22.3.17.13-lts.md
@@ -0,0 +1,16 @@
+---
+sidebar_position: 1
+sidebar_label: 2023
+---
+
+# 2023 Changelog
+
+### ClickHouse release v22.3.17.13-lts (fcc4de7e805) FIXME as compared to v22.3.16.1190-lts (bb4e0934e5a)
+
+#### Improvement
+* Backported in [#45138](https://github.com/ClickHouse/ClickHouse/issues/45138): Allow to use String type instead of Binary in Arrow/Parquet/ORC formats. This PR introduces 3 new settings for it: `output_format_arrow_string_as_string`, `output_format_parquet_string_as_string`, `output_format_orc_string_as_string`. Default value for all settings is `false`. [#37327](https://github.com/ClickHouse/ClickHouse/pull/37327) ([Kruglov Pavel](https://github.com/Avogar)).
+
+#### NOT FOR CHANGELOG / INSIGNIFICANT
+
+* Improve release scripts [#45074](https://github.com/ClickHouse/ClickHouse/pull/45074) ([Mikhail f. Shiryaev](https://github.com/Felixoid)).
+
diff --git a/utils/list-versions/version_date.tsv b/utils/list-versions/version_date.tsv
index 2b39b2dacc5..f11bf7a0c26 100644
--- a/utils/list-versions/version_date.tsv
+++ b/utils/list-versions/version_date.tsv
@@ -55,6 +55,7 @@ v22.4.5.9-stable	2022-05-06
 v22.4.4.7-stable	2022-04-29
 v22.4.3.3-stable	2022-04-26
 v22.4.2.1-stable	2022-04-22
+v22.3.17.13-lts	2023-01-12
 v22.3.16.1190-lts	2023-01-09
 v22.3.15.33-lts	2022-12-02
 v22.3.14.23-lts	2022-10-28

From d9b8cf9c04058d6f0d5fb715c0b0e4edc7bdb206 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Thu, 12 Jan 2023 20:32:39 +0100
Subject: [PATCH 257/262] more verbose logs about replication log entries

---
 src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +-
 src/Storages/StorageReplicatedMergeTree.cpp         | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
index ea7dce61434..2badc592967 100644
--- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
+++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp
@@ -216,7 +216,7 @@ void ReplicatedMergeTreeQueue::insertUnlocked(
 {
     auto entry_virtual_parts = entry->getVirtualPartNames(format_version);
 
-    LOG_TEST(log, "Insert entry {} to queue with type {}", entry->znode_name, entry->getDescriptionForLogs(format_version));
+    LOG_TRACE(log, "Insert entry {} to queue with type {}", entry->znode_name, entry->getDescriptionForLogs(format_version));
 
     for (const String & virtual_part_name : entry_virtual_parts)
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index d7d7afd222d..1d989803e77 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -1760,7 +1760,8 @@ bool StorageReplicatedMergeTree::executeFetch(LogEntry & entry, bool need_to_che
                 if (!need_to_check_missing_part)
                     return false;
 
-                throw Exception("No active replica has part " + entry.new_part_name + " or covering part", ErrorCodes::NO_REPLICA_HAS_PART);
+                throw Exception(ErrorCodes::NO_REPLICA_HAS_PART, "No active replica has part {} or covering part (cannot execute {}: {})",
+                                entry.new_part_name, entry.znode_name, entry.getDescriptionForLogs(format_version));
             }
         }
 

From 9f121d5d9c57cc81ad193e27fefbea02f77f0246 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ra=C3=BAl=20Mar=C3=ADn?= <git@rmr.ninja>
Date: Thu, 12 Jan 2023 23:41:25 +0100
Subject: [PATCH 258/262] Fix background_fetches_pool_size config reload
 (#45189)

---
 src/Storages/StorageReplicatedMergeTree.cpp | 2 +-
 src/Storages/StorageReplicatedMergeTree.h   | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp
index 860264fb837..d4b7b478a04 100644
--- a/src/Storages/StorageReplicatedMergeTree.cpp
+++ b/src/Storages/StorageReplicatedMergeTree.cpp
@@ -287,7 +287,6 @@ StorageReplicatedMergeTree::StorageReplicatedMergeTree(
     , restarting_thread(*this)
     , part_moves_between_shards_orchestrator(*this)
     , renaming_restrictions(renaming_restrictions_)
-    , replicated_fetches_pool_size(getContext()->getFetchesExecutor()->getMaxTasksCount())
     , replicated_fetches_throttler(std::make_shared<Throttler>(getSettings()->max_replicated_fetches_network_bandwidth, getContext()->getReplicatedFetchesThrottler()))
     , replicated_sends_throttler(std::make_shared<Throttler>(getSettings()->max_replicated_sends_network_bandwidth, getContext()->getReplicatedSendsThrottler()))
 {
@@ -3113,6 +3112,7 @@ bool StorageReplicatedMergeTree::canExecuteFetch(const ReplicatedMergeTreeLogEnt
         return false;
     }
 
+    auto replicated_fetches_pool_size = getContext()->getFetchesExecutor()->getMaxTasksCount();
     size_t busy_threads_in_pool = CurrentMetrics::values[CurrentMetrics::BackgroundFetchesPoolTask].load(std::memory_order_relaxed);
     if (busy_threads_in_pool >= replicated_fetches_pool_size)
     {
diff --git a/src/Storages/StorageReplicatedMergeTree.h b/src/Storages/StorageReplicatedMergeTree.h
index c3bd682a29c..218b9d0e31a 100644
--- a/src/Storages/StorageReplicatedMergeTree.h
+++ b/src/Storages/StorageReplicatedMergeTree.h
@@ -463,8 +463,6 @@ private:
     /// Do not allow RENAME TABLE if zookeeper_path contains {database} or {table} macro
     const RenamingRestrictions renaming_restrictions;
 
-    const size_t replicated_fetches_pool_size;
-
     /// Throttlers used in DataPartsExchange to lower maximum fetch/sends
     /// speed.
     ThrottlerPtr replicated_fetches_throttler;

From 296dc5006d9f40da6823b602fde9d2d38c365cae Mon Sep 17 00:00:00 2001
From: Maksim Kita <kitaetoya@gmail.com>
Date: Fri, 13 Jan 2023 10:59:26 +0100
Subject: [PATCH 259/262] Fixed tests

---
 .../Passes/AggregateFunctionsArithmericOperationsPass.cpp       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
index 33ecf549363..149af61e002 100644
--- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
+++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp
@@ -144,7 +144,7 @@ public:
             auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes();
             auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0];
             aggregate_function_arguments_nodes = {inner_function_clone_left_argument};
-            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, function_name_if_constant_is_negative);
+            resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, lower_function_name);
             inner_function_clone_arguments_nodes[0] = node;
             node = std::move(inner_function_clone);
         }

From 023162df1d1b79e591e0188ccea275ffbd1c548d Mon Sep 17 00:00:00 2001
From: vdimir <vdimir@clickhouse.com>
Date: Fri, 13 Jan 2023 11:25:07 +0000
Subject: [PATCH 260/262] fix clang-tidy style

---
 src/Core/Block.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/Core/Block.cpp b/src/Core/Block.cpp
index 29ffef3a0d0..2aa66c3e682 100644
--- a/src/Core/Block.cpp
+++ b/src/Core/Block.cpp
@@ -92,7 +92,7 @@ static ReturnType checkColumnStructure(const ColumnWithTypeAndName & actual, con
             code);
 
     if (isColumnConst(*actual.column) && isColumnConst(*expected.column)
-        && actual.column->size() > 0 && expected.column->size() > 0) /// don't check values in empty columns
+        && !actual.column->empty() && !expected.column->empty()) /// don't check values in empty columns
     {
         Field actual_value = assert_cast<const ColumnConst &>(*actual.column).getField();
         Field expected_value = assert_cast<const ColumnConst &>(*expected.column).getField();

From 00908dcc6c3006415466c5f3abe917661dcb9983 Mon Sep 17 00:00:00 2001
From: Vitaly Baranov <vitlibar@yandex.ru>
Date: Fri, 13 Jan 2023 12:35:39 +0100
Subject: [PATCH 261/262] Fix http requests without path for AWS. (#45238)

---
 src/IO/S3/PocoHTTPClient.cpp | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/src/IO/S3/PocoHTTPClient.cpp b/src/IO/S3/PocoHTTPClient.cpp
index 6153842520b..33553d483eb 100644
--- a/src/IO/S3/PocoHTTPClient.cpp
+++ b/src/IO/S3/PocoHTTPClient.cpp
@@ -320,11 +320,18 @@ void PocoHTTPClient::makeRequestInternal(
             const std::string & query = target_uri.getRawQuery();
             const std::string reserved = "?#:;+@&=%"; /// Poco::URI::RESERVED_QUERY_PARAM without '/' plus percent sign.
             Poco::URI::encode(target_uri.getPath(), reserved, path_and_query);
+
             if (!query.empty())
             {
                 path_and_query += '?';
                 path_and_query += query;
             }
+
+            /// `target_uri.getPath()` could return an empty string, but a proper HTTP request must
+            /// always contain a non-empty URI in its first line (e.g. "POST / HTTP/1.1").
+            if (path_and_query.empty())
+                path_and_query = "/";
+
             poco_request.setURI(path_and_query);
 
             switch (request.GetMethod())
@@ -366,11 +373,12 @@ void PocoHTTPClient::makeRequestInternal(
                 if (enable_s3_requests_logging)
                     LOG_TEST(log, "Writing request body.");
 
-                if (attempt > 0) /// rewind content body buffer.
-                {
-                    request.GetContentBody()->clear();
-                    request.GetContentBody()->seekg(0);
-                }
+                /// Rewind content body buffer.
+                /// NOTE: we should do that always (even if `attempt == 0`) because the same request can be retried also by AWS,
+                /// see retryStrategy in Aws::Client::ClientConfiguration.
+                request.GetContentBody()->clear();
+                request.GetContentBody()->seekg(0);
+
                 auto size = Poco::StreamCopier::copyStream(*request.GetContentBody(), request_body_stream);
                 if (enable_s3_requests_logging)
                     LOG_TEST(log, "Written {} bytes to request body", size);
@@ -385,8 +393,16 @@ void PocoHTTPClient::makeRequestInternal(
 
             int status_code = static_cast<int>(poco_response.getStatus());
 
-            if (enable_s3_requests_logging)
-                LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason());
+            if (status_code >= SUCCESS_RESPONSE_MIN && status_code <= SUCCESS_RESPONSE_MAX)
+            {
+                if (enable_s3_requests_logging)
+                    LOG_TEST(log, "Response status: {}, {}", status_code, poco_response.getReason());
+            }
+            else
+            {
+                /// Error statuses are more important so we show them even if `enable_s3_requests_logging == false`.
+                LOG_INFO(log, "Response status: {}, {}", status_code, poco_response.getReason());
+            }
 
             if (poco_response.getStatus() == Poco::Net::HTTPResponse::HTTP_TEMPORARY_REDIRECT)
             {

From 36c282e48e885a323ef27e2c4eeaaf42d1ce4161 Mon Sep 17 00:00:00 2001
From: Alexander Tokmakov <tavplubix@clickhouse.com>
Date: Fri, 13 Jan 2023 16:29:08 +0300
Subject: [PATCH 262/262] Update clickhouse-test

---
 tests/clickhouse-test | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tests/clickhouse-test b/tests/clickhouse-test
index a5c6b3e0bb8..39bb9aade3c 100755
--- a/tests/clickhouse-test
+++ b/tests/clickhouse-test
@@ -232,17 +232,20 @@ def need_retry(args, stdout, stderr, total_time):
 
 
 def get_processlist(args):
-    if args.replicated_database:
-        return clickhouse_execute_json(
-            args,
-            """
-        SELECT materialize((hostName(), tcpPort())) as host, *
-        FROM clusterAllReplicas('test_cluster_database_replicated', system.processes)
-        WHERE query NOT LIKE '%system.processes%'
-        """,
-        )
-    else:
-        return clickhouse_execute_json(args, "SHOW PROCESSLIST")
+    try:
+        if args.replicated_database:
+            return clickhouse_execute_json(
+                args,
+                """
+            SELECT materialize((hostName(), tcpPort())) as host, *
+            FROM clusterAllReplicas('test_cluster_database_replicated', system.processes)
+            WHERE query NOT LIKE '%system.processes%'
+            """,
+            )
+        else:
+            return clickhouse_execute_json(args, "SHOW PROCESSLIST")
+    except Exception as e:
+        return "Failed to get processlist: " + str(e)
 
 
 def get_transactions_list(args):