From f801772a862505650fb612137a14bf6f32cc3642 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 23 Sep 2022 13:35:22 +0200 Subject: [PATCH 001/262] 40907 Parameterized views as table functions Implementation: * Updated parsers by adding a bool allow_query_parameters while creating ordinary view, which is used in interpreters to allow query parameters in SELECT. * Added a check in ActionsVisitor if multiple parameters have same names while creating parameterised view. * Added bool in StorageView to represent parameterized view. * Updated processing of SELECT with parameter values to check for views and added substitution of values in the query parameters. Testing: * Added a test tests/queries/0_stateless/02428_parameterized_view.sql Documentation: * Updated the english documentation for VIEW. --- .../sql-reference/statements/create/view.md | 16 +++- src/Interpreters/ActionsVisitor.cpp | 25 +++++- src/Interpreters/Context.cpp | 79 ++++++++++++++----- src/Interpreters/ExpressionAnalyzer.cpp | 19 ++++- src/Interpreters/InterpreterSelectQuery.cpp | 3 + src/Interpreters/QueryNormalizer.cpp | 19 ++++- src/Interpreters/QueryNormalizer.h | 3 + src/Parsers/ASTCreateQuery.cpp | 7 ++ src/Parsers/ASTCreateQuery.h | 4 +- src/Parsers/ASTSelectQuery.cpp | 51 ++++++++++++ src/Parsers/ASTSelectQuery.h | 7 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 22 ++++++ src/Parsers/ASTSelectWithUnionQuery.h | 6 ++ src/Parsers/ExpressionListParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.h | 6 ++ src/Parsers/ParserCreateQuery.cpp | 3 + src/Parsers/ParserSelectQuery.cpp | 1 + src/Parsers/ParserSelectQuery.h | 7 ++ src/Parsers/ParserSelectWithUnionQuery.cpp | 2 +- src/Parsers/ParserSelectWithUnionQuery.h | 3 + src/Parsers/ParserUnionQueryElement.cpp | 2 +- src/Parsers/ParserUnionQueryElement.h | 7 ++ src/Storages/StorageView.cpp | 19 ++++- src/Storages/StorageView.h | 6 ++ .../02428_parameterized_view.reference | 3 + .../0_stateless/02428_parameterized_view.sql | 32 ++++++++ 26 files changed, 320 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/02428_parameterized_view.reference create mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..a7b3f4ef762 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery: SELECT a, b, c FROM (SELECT ...) ``` +## Parameterized View +This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes. + +``` sql +CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ... +``` +The above creates a view for table which can be used as table function by substituting value1 & value2 as show below. + +``` sql +SELECT * FROM view(column1=value1, column2=value2 ...) +``` + ## Materialized View ``` sql @@ -74,9 +86,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. ::: -```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... -``` + Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 54faf37f236..0ebc6857779 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -742,9 +743,29 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt return NameAndTypePair(child_column_name, node->result_type); if (!data.only_consts) - throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); + { + bool has_query_parameter = false; + std::queue astQueue; + astQueue.push(ast); + + while (!astQueue.empty()) + { + auto current = astQueue.front(); + astQueue.pop(); + + if (auto * ast_query_parameter = current->as()) + has_query_parameter = true; + + for (auto astChild : current->children) + astQueue.push(astChild); + } + + if (!has_query_parameter) + throw Exception( + "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), + ErrorCodes::UNKNOWN_IDENTIFIER); + } return {}; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 39af21ef027..77c67822aa4 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -93,6 +93,8 @@ #include #include #include +#include +#include #if USE_ROCKSDB #include @@ -136,6 +138,7 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_FUNCTION; } @@ -1129,32 +1132,72 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) if (!res) { - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); - if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + try { - const auto & insertion_table = getInsertionTable(); - if (!insertion_table.empty()) + TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) { - const auto & structure_hint - = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; - table_function_ptr->setStructureHint(structure_hint); + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } } - } - res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); - /// Since ITableFunction::parseArguments() may change table_expression, i.e.: - /// - /// remote('127.1', system.one) -> remote('127.1', 'system.one'), - /// - auto new_hash = table_expression->getTreeHash(); - if (hash != new_hash) + /// Since ITableFunction::parseArguments() may change table_expression, i.e.: + /// + /// remote('127.1', system.one) -> remote('127.1', 'system.one'), + /// + auto new_hash = table_expression->getTreeHash(); + if (hash != new_hash) + { + key = toString(new_hash.first) + '_' + toString(new_hash.second); + table_function_results[key] = res; + } + + return res; + }catch (DB::Exception &table_function_exception) { - key = toString(new_hash.first) + '_' + toString(new_hash.second); - table_function_results[key] = res; + if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION) + { + if (auto ast_function = table_expression->as()) + { + try + { + res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext()); + if (res.get()->isView() && res->as()->isParameterizedView()) + return res; + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Not a parameterized view {}", + ast_function->name); + } + } + catch (DB::Exception &view_exception) + { + if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE) + throw Exception( + ErrorCodes::UNKNOWN_FUNCTION, + "Unknown table function {} OR Unknown parameterized view {}", + table_function_exception.message(), + view_exception.message()); + else + throw; + } + } + else + throw; + } + else + throw; } - return res; } return res; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9daa42bf499..d56cc47a34b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1286,6 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); + if (select_query->allow_query_parameters && select_query->hasQueryParameters()) + return true; + auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); @@ -1902,10 +1905,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ExpressionActions( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); - /// If the filter column is a constant, record it. - if (column_elem.column) - where_constant_filter_description = ConstantFilterDescription(*column_elem.column); + + if (!(query.allow_query_parameters && query.hasQueryParameters())) + { + auto & column_elem + = before_where_sample.getByName(query.where()->getColumnName()); + /// If the filter column is a constant, record it. + if (column_elem.column) + where_constant_filter_description = ConstantFilterDescription(*column_elem.column); + } } } chain.addStep(); @@ -2066,6 +2074,9 @@ void ExpressionAnalysisResult::finalize( ssize_t & having_step_num, const ASTSelectQuery & query) { + if (query.allow_query_parameters && query.hasQueryParameters()) + return; + if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index a77882c85d2..88df37fca34 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -500,7 +500,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; if (view) + { view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); + view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues()); + } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( query_ptr, diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 2a8b256c3d1..280904c9aa9 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -122,6 +122,16 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } +void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data) +{ + auto it_alias = data.aliases.find(node.name); + if (it_alias != data.aliases.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); + + data.aliases[node.name] =ast; +} + + void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) { /// normalize JOIN ON section @@ -142,6 +152,8 @@ static bool needVisitChild(const ASTPtr & child) /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data) { + data.allow_query_parameters = select.allow_query_parameters; + for (auto & child : select.children) { if (needVisitChild(child)) @@ -257,7 +269,12 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) else if (auto * node_select = ast->as()) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) - throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + { + if (data.allow_query_parameters) + visit(*node_param, ast, data); + else + throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + } else if (auto * node_function = ast->as()) if (node_function->parameters) visit(node_function->parameters, data); diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index f532d869789..ffd2c46ca77 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -13,6 +13,7 @@ class ASTSelectQuery; class ASTIdentifier; struct ASTTablesInSelectQueryElement; class Context; +class ASTQueryParameter; class QueryNormalizer @@ -52,6 +53,7 @@ public: /// It's Ok to have "c + 1 AS c" in queries, but not in table definition const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column" + bool allow_query_parameters; Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_) : aliases(aliases_) @@ -80,6 +82,7 @@ private: static void visit(ASTIdentifier &, ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); + static void visit(ASTQueryParameter &, const ASTPtr &, Data &); static void visitChildren(IAST * node, Data & data); }; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index a277960643b..0fd7ca098e1 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -449,4 +449,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } } +bool ASTCreateQuery::isParameterizedView() const +{ + if (is_ordinary_view && select && select->hasQueryParameters()) + return true; + return false; +} + } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index f3729b1523f..2a6da778211 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -24,7 +24,7 @@ public: IAST * sample_by = nullptr; IAST * ttl_table = nullptr; ASTSetQuery * settings = nullptr; - + bool allow_query_parameters = false; String getID(char) const override { return "Storage definition"; } @@ -120,6 +120,8 @@ public: bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; } + bool isParameterizedView() const; + QueryKind getQueryKind() const override { return QueryKind::Create; } protected: diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 76849653b4e..b97c3dbc585 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -474,4 +475,54 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const tables_element.table_expression->as().final = true; } +bool ASTSelectQuery::hasQueryParameters() const +{ + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto ast = queue.front(); + queue.pop(); + + if (ast->as()) + return true; + + for (auto child : ast->children) + queue.push(child); + } + return false; +} + +NameToNameMap ASTSelectQuery::getQueryParameterValues() const +{ + NameToNameMap parameter_values; + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto ast = queue.front(); + queue.pop(); + if (auto expression_list = ast->as()) + { + if (expression_list->children.size() == 2) + { + if (auto identifier = expression_list->children[0]->as()) + { + if (auto literal = expression_list->children[1]->as()) + { + + parameter_values[identifier->name()] = toString(literal->value); + } + } + } + } + for (auto child : ast->children) + queue.push(child); + } + + return parameter_values; +} + } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5e3af545f12..e8eed092472 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,7 +2,9 @@ #include #include - +#include +#include +#include namespace DB { @@ -88,6 +90,7 @@ public: bool group_by_with_constant_keys = false; bool group_by_with_grouping_sets = false; bool limit_with_ties = false; + bool allow_query_parameters = false; ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } @@ -142,6 +145,8 @@ public: void setFinal(); QueryKind getQueryKind() const override { return QueryKind::Select; } + bool hasQueryParameters() const; + NameToNameMap getQueryParameterValues() const; protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index bc413fbe16d..11ac252aee2 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -86,4 +86,26 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT); } +bool ASTSelectWithUnionQuery::hasQueryParameters() const +{ + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto current = queue.front(); + queue.pop(); + + if (auto * select = current->as()) + { + if (select->hasQueryParameters()) + return true; + } + + for (auto child : current->children) + queue.push(child); + } + return false; +} + } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 457a3361b1e..ef8e50c47fd 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,6 +2,8 @@ #include #include +#include +#include namespace DB { @@ -31,6 +33,10 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; + + bool hasQueryParameters() const; + + }; } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4e88e5c68e6..59b5b8b98cb 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -121,7 +121,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserUnionQueryElement elem_parser; + ParserUnionQueryElement elem_parser(allow_query_parameters); ParserKeyword s_union_parser("UNION"); ParserKeyword s_all_parser("ALL"); ParserKeyword s_distinct_parser("DISTINCT"); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 05c7ec946ee..b0f6e66c213 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -84,6 +84,11 @@ private: class ParserUnionList : public IParserBase { public: + ParserUnionList(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + template static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator) { @@ -108,6 +113,7 @@ public: } auto getUnionModes() const { return union_modes; } + bool allow_query_parameters; protected: const char * getName() const override { return "list of union elements"; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 08240abe8c6..26dcfd5079f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1285,7 +1285,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec is_materialized_view = true; } else + { is_ordinary_view = true; + select_p.allow_query_parameters = true; + } if (!s_view.ignore(pos, expected)) return false; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index cf335270734..61381573421 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto select_query = std::make_shared(); + select_query->allow_query_parameters = allow_query_parameters; node = select_query; ParserKeyword s_select("SELECT"); diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h index deac25df57d..708b051e046 100644 --- a/src/Parsers/ParserSelectQuery.h +++ b/src/Parsers/ParserSelectQuery.h @@ -9,6 +9,13 @@ namespace DB class ParserSelectQuery : public IParserBase { +public: + ParserSelectQuery(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + bool allow_query_parameters; + protected: const char * getName() const override { return "SELECT query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 532a9e20735..39204ee457d 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; - ParserUnionList parser; + ParserUnionList parser(allow_query_parameters); if (!parser.parse(pos, list_node, expected)) return false; diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h index 0bf2946e429..6edf8a8d60e 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.h +++ b/src/Parsers/ParserSelectWithUnionQuery.h @@ -8,6 +8,9 @@ namespace DB class ParserSelectWithUnionQuery : public IParserBase { +public: + bool allow_query_parameters = false; + protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index efd022e6362..0ddaa323404 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected)) return false; if (const auto * ast_subquery = node->as()) diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h index 6b63c62c85b..a3fd47c496b 100644 --- a/src/Parsers/ParserUnionQueryElement.h +++ b/src/Parsers/ParserUnionQueryElement.h @@ -9,6 +9,13 @@ namespace DB class ParserUnionQueryElement : public IParserBase { +public: + ParserUnionQueryElement(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + bool allow_query_parameters; + protected: const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index adaf1c4e404..32d0a08777d 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { @@ -99,6 +101,7 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); + is_parameterized_view = query.isParameterizedView(); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -173,6 +176,15 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } +void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values) +{ + if (is_parameterized_view) + { + ReplaceQueryParameterVisitor visitor(parameter_values); + visitor.visit(outer_query); + } +} + void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(outer_query); @@ -185,8 +197,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ auto table_function_name = table_expression->table_function->as()->name; if (table_function_name == "view" || table_function_name == "viewIfPermitted") table_expression->database_and_table_name = std::make_shared("__view"); - if (table_function_name == "merge") + else if (table_function_name == "merge") table_expression->database_and_table_name = std::make_shared("__merge"); + else + table_expression->database_and_table_name = std::make_shared(table_function_name); + } if (!table_expression->database_and_table_name) throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); @@ -204,6 +219,8 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ for (auto & child : table_expression->children) if (child.get() == view_name.get()) child = view_query; + else if (child.get() && child->as() && child->as()->name == table_expression->table_function->as()->name) + child = view_query; } ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 31c96addd08..297847e83bf 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -19,6 +19,7 @@ public: std::string getName() const override { return "View"; } bool isView() const override { return true; } + bool isParameterizedView() const { return is_parameterized_view; } /// It is passed inside the query and solved at its level. bool supportsSampling() const override { return true; } @@ -34,6 +35,8 @@ public: size_t max_block_size, unsigned num_streams) override; + void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values); + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); @@ -41,6 +44,9 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); + +protected: + bool is_parameterized_view; }; } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference new file mode 100644 index 00000000000..d9afe5ff69c --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -0,0 +1,3 @@ +20 +50 +10 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql new file mode 100644 index 00000000000..0153ed95428 --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS v1; +DROP TABLE IF EXISTS Catalog; + +CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; + +INSERT INTO Catalog VALUES ('Pen', 10, 3); +INSERT INTO Catalog VALUES ('Book', 50, 2); +INSERT INTO Catalog VALUES ('Paper', 20, 1); + +CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; +SELECT Price FROM v1(price=20); + +SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION } + +CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10; +SELECT Price FROM v10(price=10); -- { serverError BAD_ARGUMENTS } + + +CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; +SELECT Price FROM v2(price=50,quantity=2); + +SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} + +CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3; +SELECT Price FROM v3(price=10); + +CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS} + +DROP TABLE v1; +DROP TABLE v2; +DROP TABLE v3; +DROP TABLE Catalog; From bb451b2b27f444b98dd2b1def3e360e660f1216e Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 23 Sep 2022 13:35:22 +0200 Subject: [PATCH 002/262] 40907 Parameterized views as table functions Implementation: * Updated parsers by adding a bool allow_query_parameters while creating ordinary view, which is used in interpreters to allow query parameters in SELECT. * Added a check in ActionsVisitor if multiple parameters have same names while creating parameterised view. * Added bool in StorageView to represent parameterized view. * Updated processing of SELECT with parameter values to check for views and added substitution of values in the query parameters. Testing: * Added a test tests/queries/0_stateless/02428_parameterized_view.sql Documentation: * Updated the english documentation for VIEW. --- .../sql-reference/statements/create/view.md | 16 +++- src/Interpreters/ActionsVisitor.cpp | 25 +++++- src/Interpreters/Context.cpp | 79 ++++++++++++++----- src/Interpreters/ExpressionAnalyzer.cpp | 19 ++++- src/Interpreters/InterpreterSelectQuery.cpp | 3 + src/Interpreters/QueryNormalizer.cpp | 19 ++++- src/Interpreters/QueryNormalizer.h | 3 + src/Parsers/ASTCreateQuery.cpp | 7 ++ src/Parsers/ASTCreateQuery.h | 4 +- src/Parsers/ASTSelectQuery.cpp | 51 ++++++++++++ src/Parsers/ASTSelectQuery.h | 7 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 22 ++++++ src/Parsers/ASTSelectWithUnionQuery.h | 6 ++ src/Parsers/ExpressionListParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.h | 6 ++ src/Parsers/ParserCreateQuery.cpp | 3 + src/Parsers/ParserSelectQuery.cpp | 1 + src/Parsers/ParserSelectQuery.h | 7 ++ src/Parsers/ParserSelectWithUnionQuery.cpp | 2 +- src/Parsers/ParserSelectWithUnionQuery.h | 3 + src/Parsers/ParserUnionQueryElement.cpp | 2 +- src/Parsers/ParserUnionQueryElement.h | 7 ++ src/Storages/StorageView.cpp | 19 ++++- src/Storages/StorageView.h | 6 ++ .../02428_parameterized_view.reference | 3 + .../0_stateless/02428_parameterized_view.sql | 32 ++++++++ 26 files changed, 320 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/02428_parameterized_view.reference create mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..a7b3f4ef762 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery: SELECT a, b, c FROM (SELECT ...) ``` +## Parameterized View +This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes. + +``` sql +CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ... +``` +The above creates a view for table which can be used as table function by substituting value1 & value2 as show below. + +``` sql +SELECT * FROM view(column1=value1, column2=value2 ...) +``` + ## Materialized View ``` sql @@ -74,9 +86,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. ::: -```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... -``` + Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 54faf37f236..0ebc6857779 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -742,9 +743,29 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt return NameAndTypePair(child_column_name, node->result_type); if (!data.only_consts) - throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); + { + bool has_query_parameter = false; + std::queue astQueue; + astQueue.push(ast); + + while (!astQueue.empty()) + { + auto current = astQueue.front(); + astQueue.pop(); + + if (auto * ast_query_parameter = current->as()) + has_query_parameter = true; + + for (auto astChild : current->children) + astQueue.push(astChild); + } + + if (!has_query_parameter) + throw Exception( + "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), + ErrorCodes::UNKNOWN_IDENTIFIER); + } return {}; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index be47338541c..e3276f6006d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -94,6 +94,8 @@ #include #include #include +#include +#include #if USE_ROCKSDB #include @@ -137,6 +139,7 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_FUNCTION; } @@ -1131,32 +1134,72 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) if (!res) { - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); - if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + try { - const auto & insertion_table = getInsertionTable(); - if (!insertion_table.empty()) + TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) { - const auto & structure_hint - = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; - table_function_ptr->setStructureHint(structure_hint); + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } } - } - res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); - /// Since ITableFunction::parseArguments() may change table_expression, i.e.: - /// - /// remote('127.1', system.one) -> remote('127.1', 'system.one'), - /// - auto new_hash = table_expression->getTreeHash(); - if (hash != new_hash) + /// Since ITableFunction::parseArguments() may change table_expression, i.e.: + /// + /// remote('127.1', system.one) -> remote('127.1', 'system.one'), + /// + auto new_hash = table_expression->getTreeHash(); + if (hash != new_hash) + { + key = toString(new_hash.first) + '_' + toString(new_hash.second); + table_function_results[key] = res; + } + + return res; + }catch (DB::Exception &table_function_exception) { - key = toString(new_hash.first) + '_' + toString(new_hash.second); - table_function_results[key] = res; + if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION) + { + if (auto ast_function = table_expression->as()) + { + try + { + res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext()); + if (res.get()->isView() && res->as()->isParameterizedView()) + return res; + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Not a parameterized view {}", + ast_function->name); + } + } + catch (DB::Exception &view_exception) + { + if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE) + throw Exception( + ErrorCodes::UNKNOWN_FUNCTION, + "Unknown table function {} OR Unknown parameterized view {}", + table_function_exception.message(), + view_exception.message()); + else + throw; + } + } + else + throw; + } + else + throw; } - return res; } return res; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9daa42bf499..d56cc47a34b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1286,6 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); + if (select_query->allow_query_parameters && select_query->hasQueryParameters()) + return true; + auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); @@ -1902,10 +1905,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ExpressionActions( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); - /// If the filter column is a constant, record it. - if (column_elem.column) - where_constant_filter_description = ConstantFilterDescription(*column_elem.column); + + if (!(query.allow_query_parameters && query.hasQueryParameters())) + { + auto & column_elem + = before_where_sample.getByName(query.where()->getColumnName()); + /// If the filter column is a constant, record it. + if (column_elem.column) + where_constant_filter_description = ConstantFilterDescription(*column_elem.column); + } } } chain.addStep(); @@ -2066,6 +2074,9 @@ void ExpressionAnalysisResult::finalize( ssize_t & having_step_num, const ASTSelectQuery & query) { + if (query.allow_query_parameters && query.hasQueryParameters()) + return; + if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index a77882c85d2..88df37fca34 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -500,7 +500,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; if (view) + { view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); + view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues()); + } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( query_ptr, diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 2a8b256c3d1..280904c9aa9 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -122,6 +122,16 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } +void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data) +{ + auto it_alias = data.aliases.find(node.name); + if (it_alias != data.aliases.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); + + data.aliases[node.name] =ast; +} + + void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) { /// normalize JOIN ON section @@ -142,6 +152,8 @@ static bool needVisitChild(const ASTPtr & child) /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data) { + data.allow_query_parameters = select.allow_query_parameters; + for (auto & child : select.children) { if (needVisitChild(child)) @@ -257,7 +269,12 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) else if (auto * node_select = ast->as()) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) - throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + { + if (data.allow_query_parameters) + visit(*node_param, ast, data); + else + throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + } else if (auto * node_function = ast->as()) if (node_function->parameters) visit(node_function->parameters, data); diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index f532d869789..ffd2c46ca77 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -13,6 +13,7 @@ class ASTSelectQuery; class ASTIdentifier; struct ASTTablesInSelectQueryElement; class Context; +class ASTQueryParameter; class QueryNormalizer @@ -52,6 +53,7 @@ public: /// It's Ok to have "c + 1 AS c" in queries, but not in table definition const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column" + bool allow_query_parameters; Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_) : aliases(aliases_) @@ -80,6 +82,7 @@ private: static void visit(ASTIdentifier &, ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); + static void visit(ASTQueryParameter &, const ASTPtr &, Data &); static void visitChildren(IAST * node, Data & data); }; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index a277960643b..0fd7ca098e1 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -449,4 +449,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } } +bool ASTCreateQuery::isParameterizedView() const +{ + if (is_ordinary_view && select && select->hasQueryParameters()) + return true; + return false; +} + } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index f3729b1523f..2a6da778211 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -24,7 +24,7 @@ public: IAST * sample_by = nullptr; IAST * ttl_table = nullptr; ASTSetQuery * settings = nullptr; - + bool allow_query_parameters = false; String getID(char) const override { return "Storage definition"; } @@ -120,6 +120,8 @@ public: bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; } + bool isParameterizedView() const; + QueryKind getQueryKind() const override { return QueryKind::Create; } protected: diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 76849653b4e..b97c3dbc585 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -474,4 +475,54 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const tables_element.table_expression->as().final = true; } +bool ASTSelectQuery::hasQueryParameters() const +{ + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto ast = queue.front(); + queue.pop(); + + if (ast->as()) + return true; + + for (auto child : ast->children) + queue.push(child); + } + return false; +} + +NameToNameMap ASTSelectQuery::getQueryParameterValues() const +{ + NameToNameMap parameter_values; + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto ast = queue.front(); + queue.pop(); + if (auto expression_list = ast->as()) + { + if (expression_list->children.size() == 2) + { + if (auto identifier = expression_list->children[0]->as()) + { + if (auto literal = expression_list->children[1]->as()) + { + + parameter_values[identifier->name()] = toString(literal->value); + } + } + } + } + for (auto child : ast->children) + queue.push(child); + } + + return parameter_values; +} + } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5e3af545f12..e8eed092472 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,7 +2,9 @@ #include #include - +#include +#include +#include namespace DB { @@ -88,6 +90,7 @@ public: bool group_by_with_constant_keys = false; bool group_by_with_grouping_sets = false; bool limit_with_ties = false; + bool allow_query_parameters = false; ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } @@ -142,6 +145,8 @@ public: void setFinal(); QueryKind getQueryKind() const override { return QueryKind::Select; } + bool hasQueryParameters() const; + NameToNameMap getQueryParameterValues() const; protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index bc413fbe16d..11ac252aee2 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -86,4 +86,26 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT); } +bool ASTSelectWithUnionQuery::hasQueryParameters() const +{ + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto current = queue.front(); + queue.pop(); + + if (auto * select = current->as()) + { + if (select->hasQueryParameters()) + return true; + } + + for (auto child : current->children) + queue.push(child); + } + return false; +} + } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 457a3361b1e..ef8e50c47fd 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,6 +2,8 @@ #include #include +#include +#include namespace DB { @@ -31,6 +33,10 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; + + bool hasQueryParameters() const; + + }; } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4e88e5c68e6..59b5b8b98cb 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -121,7 +121,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserUnionQueryElement elem_parser; + ParserUnionQueryElement elem_parser(allow_query_parameters); ParserKeyword s_union_parser("UNION"); ParserKeyword s_all_parser("ALL"); ParserKeyword s_distinct_parser("DISTINCT"); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 05c7ec946ee..b0f6e66c213 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -84,6 +84,11 @@ private: class ParserUnionList : public IParserBase { public: + ParserUnionList(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + template static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator) { @@ -108,6 +113,7 @@ public: } auto getUnionModes() const { return union_modes; } + bool allow_query_parameters; protected: const char * getName() const override { return "list of union elements"; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 08240abe8c6..26dcfd5079f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1285,7 +1285,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec is_materialized_view = true; } else + { is_ordinary_view = true; + select_p.allow_query_parameters = true; + } if (!s_view.ignore(pos, expected)) return false; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index cf335270734..61381573421 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto select_query = std::make_shared(); + select_query->allow_query_parameters = allow_query_parameters; node = select_query; ParserKeyword s_select("SELECT"); diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h index deac25df57d..708b051e046 100644 --- a/src/Parsers/ParserSelectQuery.h +++ b/src/Parsers/ParserSelectQuery.h @@ -9,6 +9,13 @@ namespace DB class ParserSelectQuery : public IParserBase { +public: + ParserSelectQuery(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + bool allow_query_parameters; + protected: const char * getName() const override { return "SELECT query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 532a9e20735..39204ee457d 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; - ParserUnionList parser; + ParserUnionList parser(allow_query_parameters); if (!parser.parse(pos, list_node, expected)) return false; diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h index 0bf2946e429..6edf8a8d60e 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.h +++ b/src/Parsers/ParserSelectWithUnionQuery.h @@ -8,6 +8,9 @@ namespace DB class ParserSelectWithUnionQuery : public IParserBase { +public: + bool allow_query_parameters = false; + protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index efd022e6362..0ddaa323404 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected)) return false; if (const auto * ast_subquery = node->as()) diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h index 6b63c62c85b..a3fd47c496b 100644 --- a/src/Parsers/ParserUnionQueryElement.h +++ b/src/Parsers/ParserUnionQueryElement.h @@ -9,6 +9,13 @@ namespace DB class ParserUnionQueryElement : public IParserBase { +public: + ParserUnionQueryElement(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + bool allow_query_parameters; + protected: const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index adaf1c4e404..32d0a08777d 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { @@ -99,6 +101,7 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); + is_parameterized_view = query.isParameterizedView(); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -173,6 +176,15 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } +void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values) +{ + if (is_parameterized_view) + { + ReplaceQueryParameterVisitor visitor(parameter_values); + visitor.visit(outer_query); + } +} + void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(outer_query); @@ -185,8 +197,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ auto table_function_name = table_expression->table_function->as()->name; if (table_function_name == "view" || table_function_name == "viewIfPermitted") table_expression->database_and_table_name = std::make_shared("__view"); - if (table_function_name == "merge") + else if (table_function_name == "merge") table_expression->database_and_table_name = std::make_shared("__merge"); + else + table_expression->database_and_table_name = std::make_shared(table_function_name); + } if (!table_expression->database_and_table_name) throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); @@ -204,6 +219,8 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ for (auto & child : table_expression->children) if (child.get() == view_name.get()) child = view_query; + else if (child.get() && child->as() && child->as()->name == table_expression->table_function->as()->name) + child = view_query; } ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 31c96addd08..297847e83bf 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -19,6 +19,7 @@ public: std::string getName() const override { return "View"; } bool isView() const override { return true; } + bool isParameterizedView() const { return is_parameterized_view; } /// It is passed inside the query and solved at its level. bool supportsSampling() const override { return true; } @@ -34,6 +35,8 @@ public: size_t max_block_size, unsigned num_streams) override; + void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values); + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); @@ -41,6 +44,9 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); + +protected: + bool is_parameterized_view; }; } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference new file mode 100644 index 00000000000..d9afe5ff69c --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -0,0 +1,3 @@ +20 +50 +10 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql new file mode 100644 index 00000000000..0153ed95428 --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS v1; +DROP TABLE IF EXISTS Catalog; + +CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; + +INSERT INTO Catalog VALUES ('Pen', 10, 3); +INSERT INTO Catalog VALUES ('Book', 50, 2); +INSERT INTO Catalog VALUES ('Paper', 20, 1); + +CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; +SELECT Price FROM v1(price=20); + +SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION } + +CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10; +SELECT Price FROM v10(price=10); -- { serverError BAD_ARGUMENTS } + + +CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; +SELECT Price FROM v2(price=50,quantity=2); + +SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} + +CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3; +SELECT Price FROM v3(price=10); + +CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS} + +DROP TABLE v1; +DROP TABLE v2; +DROP TABLE v3; +DROP TABLE Catalog; From 456baddbc74a1e1d783647ee9779ad676da08171 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sat, 24 Sep 2022 12:01:18 +0200 Subject: [PATCH 003/262] 40907 Parameterized views as table functions Implementation * Fix for Build fails - updated conversion of Field to String and includes --- src/Parsers/ASTSelectQuery.cpp | 5 ++++- src/Parsers/ASTSelectQuery.h | 3 --- src/Parsers/ASTSelectWithUnionQuery.cpp | 2 ++ src/Parsers/ASTSelectWithUnionQuery.h | 2 -- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index b97c3dbc585..b187dc74f02 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -8,7 +8,10 @@ #include #include #include +#include +#include +#include namespace DB { @@ -513,7 +516,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const if (auto literal = expression_list->children[1]->as()) { - parameter_values[identifier->name()] = toString(literal->value); + parameter_values[identifier->name()] = convertFieldToString(literal->value); } } } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index e8eed092472..8ece05808f7 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,9 +2,6 @@ #include #include -#include -#include -#include namespace DB { diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 11ac252aee2..c38e4e2c747 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -3,7 +3,9 @@ #include #include #include +#include +#include #include namespace DB diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index ef8e50c47fd..8d93760426a 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,8 +2,6 @@ #include #include -#include -#include namespace DB { From 4d414b69b86875dc72e3a458cb021389d33e476c Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 26 Sep 2022 10:30:28 +0200 Subject: [PATCH 004/262] 40907 Parameterized views as table functions Implementation * Fix for clang-today build fails - updated to use const reference in ASTSelectQuery.cpp --- src/Parsers/ASTSelectQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index b187dc74f02..3be3f6c5c9a 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -507,13 +507,13 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const { auto ast = queue.front(); queue.pop(); - if (auto expression_list = ast->as()) + if (auto * expression_list = ast->as()) { if (expression_list->children.size() == 2) { - if (auto identifier = expression_list->children[0]->as()) + if (auto * identifier = expression_list->children[0]->as()) { - if (auto literal = expression_list->children[1]->as()) + if (auto * literal = expression_list->children[1]->as()) { parameter_values[identifier->name()] = convertFieldToString(literal->value); @@ -521,7 +521,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const } } } - for (auto child : ast->children) + for (const auto & child : ast->children) queue.push(child); } From 4fb1dffb9e04d44e9074d7cc8012d84cc96fc662 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 26 Sep 2022 13:08:54 +0200 Subject: [PATCH 005/262] 40907 Parameterized views as table functions Implementation * Fix for clang-today build fails - updated to use const reference in ASTSelectQuery.cpp & ASTSelectWithUnionQuery.cpp --- src/Parsers/ASTSelectQuery.cpp | 2 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 3be3f6c5c9a..7537628b3a5 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -491,7 +491,7 @@ bool ASTSelectQuery::hasQueryParameters() const if (ast->as()) return true; - for (auto child : ast->children) + for (const auto & child : ast->children) queue.push(child); } return false; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index c38e4e2c747..76fe9582615 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -104,7 +104,7 @@ bool ASTSelectWithUnionQuery::hasQueryParameters() const return true; } - for (auto child : current->children) + for (const auto & child : current->children) queue.push(child); } return false; From 038352bfce1a24c1620067f4f5d2a160e69aea8b Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 23 Sep 2022 13:35:22 +0200 Subject: [PATCH 006/262] 40907 Parameterized views as table functions Implementation: * Updated parsers by adding a bool allow_query_parameters while creating ordinary view, which is used in interpreters to allow query parameters in SELECT. * Added a check in ActionsVisitor if multiple parameters have same names while creating parameterised view. * Added bool in StorageView to represent parameterized view. * Updated processing of SELECT with parameter values to check for views and added substitution of values in the query parameters. Testing: * Added a test tests/queries/0_stateless/02428_parameterized_view.sql Documentation: * Updated the english documentation for VIEW. --- .../sql-reference/statements/create/view.md | 16 +++- src/Interpreters/ActionsVisitor.cpp | 25 +++++- src/Interpreters/Context.cpp | 79 ++++++++++++++----- src/Interpreters/ExpressionAnalyzer.cpp | 19 ++++- src/Interpreters/InterpreterSelectQuery.cpp | 3 + src/Interpreters/QueryNormalizer.cpp | 19 ++++- src/Interpreters/QueryNormalizer.h | 3 + src/Parsers/ASTCreateQuery.cpp | 7 ++ src/Parsers/ASTCreateQuery.h | 4 +- src/Parsers/ASTSelectQuery.cpp | 51 ++++++++++++ src/Parsers/ASTSelectQuery.h | 7 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 22 ++++++ src/Parsers/ASTSelectWithUnionQuery.h | 6 ++ src/Parsers/ExpressionListParsers.cpp | 2 +- src/Parsers/ExpressionListParsers.h | 6 ++ src/Parsers/ParserCreateQuery.cpp | 3 + src/Parsers/ParserSelectQuery.cpp | 1 + src/Parsers/ParserSelectQuery.h | 7 ++ src/Parsers/ParserSelectWithUnionQuery.cpp | 2 +- src/Parsers/ParserSelectWithUnionQuery.h | 3 + src/Parsers/ParserUnionQueryElement.cpp | 2 +- src/Parsers/ParserUnionQueryElement.h | 7 ++ src/Storages/StorageView.cpp | 19 ++++- src/Storages/StorageView.h | 6 ++ .../02428_parameterized_view.reference | 3 + .../0_stateless/02428_parameterized_view.sql | 32 ++++++++ 26 files changed, 320 insertions(+), 34 deletions(-) create mode 100644 tests/queries/0_stateless/02428_parameterized_view.reference create mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index da68ca05bbb..a7b3f4ef762 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -36,6 +36,18 @@ This query is fully equivalent to using the subquery: SELECT a, b, c FROM (SELECT ...) ``` +## Parameterized View +This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes. + +``` sql +CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ... +``` +The above creates a view for table which can be used as table function by substituting value1 & value2 as show below. + +``` sql +SELECT * FROM view(column1=value1, column2=value2 ...) +``` + ## Materialized View ``` sql @@ -74,9 +86,7 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. ::: -```sql -CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... -``` + Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 54faf37f236..0ebc6857779 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -38,6 +38,7 @@ #include #include #include +#include #include @@ -742,9 +743,29 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt return NameAndTypePair(child_column_name, node->result_type); if (!data.only_consts) - throw Exception("Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); + { + bool has_query_parameter = false; + std::queue astQueue; + astQueue.push(ast); + + while (!astQueue.empty()) + { + auto current = astQueue.front(); + astQueue.pop(); + + if (auto * ast_query_parameter = current->as()) + has_query_parameter = true; + + for (auto astChild : current->children) + astQueue.push(astChild); + } + + if (!has_query_parameter) + throw Exception( + "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), + ErrorCodes::UNKNOWN_IDENTIFIER); + } return {}; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index be47338541c..e3276f6006d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -94,6 +94,8 @@ #include #include #include +#include +#include #if USE_ROCKSDB #include @@ -137,6 +139,7 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_FUNCTION; } @@ -1131,32 +1134,72 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) if (!res) { - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); - if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + try { - const auto & insertion_table = getInsertionTable(); - if (!insertion_table.empty()) + TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) { - const auto & structure_hint - = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; - table_function_ptr->setStructureHint(structure_hint); + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } } - } - res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); - /// Since ITableFunction::parseArguments() may change table_expression, i.e.: - /// - /// remote('127.1', system.one) -> remote('127.1', 'system.one'), - /// - auto new_hash = table_expression->getTreeHash(); - if (hash != new_hash) + /// Since ITableFunction::parseArguments() may change table_expression, i.e.: + /// + /// remote('127.1', system.one) -> remote('127.1', 'system.one'), + /// + auto new_hash = table_expression->getTreeHash(); + if (hash != new_hash) + { + key = toString(new_hash.first) + '_' + toString(new_hash.second); + table_function_results[key] = res; + } + + return res; + }catch (DB::Exception &table_function_exception) { - key = toString(new_hash.first) + '_' + toString(new_hash.second); - table_function_results[key] = res; + if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION) + { + if (auto ast_function = table_expression->as()) + { + try + { + res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext()); + if (res.get()->isView() && res->as()->isParameterizedView()) + return res; + else + { + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "Not a parameterized view {}", + ast_function->name); + } + } + catch (DB::Exception &view_exception) + { + if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE) + throw Exception( + ErrorCodes::UNKNOWN_FUNCTION, + "Unknown table function {} OR Unknown parameterized view {}", + table_function_exception.message(), + view_exception.message()); + else + throw; + } + } + else + throw; + } + else + throw; } - return res; } return res; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9daa42bf499..d56cc47a34b 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1286,6 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); + if (select_query->allow_query_parameters && select_query->hasQueryParameters()) + return true; + auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); @@ -1902,10 +1905,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( ExpressionActions( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); - /// If the filter column is a constant, record it. - if (column_elem.column) - where_constant_filter_description = ConstantFilterDescription(*column_elem.column); + + if (!(query.allow_query_parameters && query.hasQueryParameters())) + { + auto & column_elem + = before_where_sample.getByName(query.where()->getColumnName()); + /// If the filter column is a constant, record it. + if (column_elem.column) + where_constant_filter_description = ConstantFilterDescription(*column_elem.column); + } } } chain.addStep(); @@ -2066,6 +2074,9 @@ void ExpressionAnalysisResult::finalize( ssize_t & having_step_num, const ASTSelectQuery & query) { + if (query.allow_query_parameters && query.hasQueryParameters()) + return; + if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index a77882c85d2..88df37fca34 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -500,7 +500,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; if (view) + { view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); + view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues()); + } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( query_ptr, diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 2a8b256c3d1..280904c9aa9 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -122,6 +122,16 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } +void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data) +{ + auto it_alias = data.aliases.find(node.name); + if (it_alias != data.aliases.end()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); + + data.aliases[node.name] =ast; +} + + void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) { /// normalize JOIN ON section @@ -142,6 +152,8 @@ static bool needVisitChild(const ASTPtr & child) /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data) { + data.allow_query_parameters = select.allow_query_parameters; + for (auto & child : select.children) { if (needVisitChild(child)) @@ -257,7 +269,12 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) else if (auto * node_select = ast->as()) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) - throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + { + if (data.allow_query_parameters) + visit(*node_param, ast, data); + else + throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); + } else if (auto * node_function = ast->as()) if (node_function->parameters) visit(node_function->parameters, data); diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index f532d869789..ffd2c46ca77 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -13,6 +13,7 @@ class ASTSelectQuery; class ASTIdentifier; struct ASTTablesInSelectQueryElement; class Context; +class ASTQueryParameter; class QueryNormalizer @@ -52,6 +53,7 @@ public: /// It's Ok to have "c + 1 AS c" in queries, but not in table definition const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column" + bool allow_query_parameters; Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_) : aliases(aliases_) @@ -80,6 +82,7 @@ private: static void visit(ASTIdentifier &, ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); + static void visit(ASTQueryParameter &, const ASTPtr &, Data &); static void visitChildren(IAST * node, Data & data); }; diff --git a/src/Parsers/ASTCreateQuery.cpp b/src/Parsers/ASTCreateQuery.cpp index a277960643b..0fd7ca098e1 100644 --- a/src/Parsers/ASTCreateQuery.cpp +++ b/src/Parsers/ASTCreateQuery.cpp @@ -449,4 +449,11 @@ void ASTCreateQuery::formatQueryImpl(const FormatSettings & settings, FormatStat } } +bool ASTCreateQuery::isParameterizedView() const +{ + if (is_ordinary_view && select && select->hasQueryParameters()) + return true; + return false; +} + } diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index f3729b1523f..2a6da778211 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -24,7 +24,7 @@ public: IAST * sample_by = nullptr; IAST * ttl_table = nullptr; ASTSetQuery * settings = nullptr; - + bool allow_query_parameters = false; String getID(char) const override { return "Storage definition"; } @@ -120,6 +120,8 @@ public: bool isView() const { return is_ordinary_view || is_materialized_view || is_live_view || is_window_view; } + bool isParameterizedView() const; + QueryKind getQueryKind() const override { return QueryKind::Create; } protected: diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 76849653b4e..b97c3dbc585 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -7,6 +7,7 @@ #include #include #include +#include namespace DB @@ -474,4 +475,54 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const tables_element.table_expression->as().final = true; } +bool ASTSelectQuery::hasQueryParameters() const +{ + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto ast = queue.front(); + queue.pop(); + + if (ast->as()) + return true; + + for (auto child : ast->children) + queue.push(child); + } + return false; +} + +NameToNameMap ASTSelectQuery::getQueryParameterValues() const +{ + NameToNameMap parameter_values; + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto ast = queue.front(); + queue.pop(); + if (auto expression_list = ast->as()) + { + if (expression_list->children.size() == 2) + { + if (auto identifier = expression_list->children[0]->as()) + { + if (auto literal = expression_list->children[1]->as()) + { + + parameter_values[identifier->name()] = toString(literal->value); + } + } + } + } + for (auto child : ast->children) + queue.push(child); + } + + return parameter_values; +} + } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5e3af545f12..e8eed092472 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,7 +2,9 @@ #include #include - +#include +#include +#include namespace DB { @@ -88,6 +90,7 @@ public: bool group_by_with_constant_keys = false; bool group_by_with_grouping_sets = false; bool limit_with_ties = false; + bool allow_query_parameters = false; ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } @@ -142,6 +145,8 @@ public: void setFinal(); QueryKind getQueryKind() const override { return QueryKind::Select; } + bool hasQueryParameters() const; + NameToNameMap getQueryParameterValues() const; protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index bc413fbe16d..11ac252aee2 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -86,4 +86,26 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT); } +bool ASTSelectWithUnionQuery::hasQueryParameters() const +{ + std::queue queue; + queue.push(this->clone()); + + while (!queue.empty()) + { + auto current = queue.front(); + queue.pop(); + + if (auto * select = current->as()) + { + if (select->hasQueryParameters()) + return true; + } + + for (auto child : current->children) + queue.push(child); + } + return false; +} + } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 457a3361b1e..ef8e50c47fd 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,6 +2,8 @@ #include #include +#include +#include namespace DB { @@ -31,6 +33,10 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; + + bool hasQueryParameters() const; + + }; } diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 4e88e5c68e6..59b5b8b98cb 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -121,7 +121,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserUnionQueryElement elem_parser; + ParserUnionQueryElement elem_parser(allow_query_parameters); ParserKeyword s_union_parser("UNION"); ParserKeyword s_all_parser("ALL"); ParserKeyword s_distinct_parser("DISTINCT"); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 05c7ec946ee..b0f6e66c213 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -84,6 +84,11 @@ private: class ParserUnionList : public IParserBase { public: + ParserUnionList(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + template static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator) { @@ -108,6 +113,7 @@ public: } auto getUnionModes() const { return union_modes; } + bool allow_query_parameters; protected: const char * getName() const override { return "list of union elements"; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 08240abe8c6..26dcfd5079f 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1285,7 +1285,10 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec is_materialized_view = true; } else + { is_ordinary_view = true; + select_p.allow_query_parameters = true; + } if (!s_view.ignore(pos, expected)) return false; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index cf335270734..61381573421 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -34,6 +34,7 @@ namespace ErrorCodes bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto select_query = std::make_shared(); + select_query->allow_query_parameters = allow_query_parameters; node = select_query; ParserKeyword s_select("SELECT"); diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h index deac25df57d..708b051e046 100644 --- a/src/Parsers/ParserSelectQuery.h +++ b/src/Parsers/ParserSelectQuery.h @@ -9,6 +9,13 @@ namespace DB class ParserSelectQuery : public IParserBase { +public: + ParserSelectQuery(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + bool allow_query_parameters; + protected: const char * getName() const override { return "SELECT query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 532a9e20735..39204ee457d 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; - ParserUnionList parser; + ParserUnionList parser(allow_query_parameters); if (!parser.parse(pos, list_node, expected)) return false; diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h index 0bf2946e429..6edf8a8d60e 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.h +++ b/src/Parsers/ParserSelectWithUnionQuery.h @@ -8,6 +8,9 @@ namespace DB class ParserSelectWithUnionQuery : public IParserBase { +public: + bool allow_query_parameters = false; + protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index efd022e6362..0ddaa323404 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected)) return false; if (const auto * ast_subquery = node->as()) diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h index 6b63c62c85b..a3fd47c496b 100644 --- a/src/Parsers/ParserUnionQueryElement.h +++ b/src/Parsers/ParserUnionQueryElement.h @@ -9,6 +9,13 @@ namespace DB class ParserUnionQueryElement : public IParserBase { +public: + ParserUnionQueryElement(bool allow_query_parameters_=false) + : allow_query_parameters(allow_query_parameters_) + { + } + bool allow_query_parameters; + protected: const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index adaf1c4e404..32d0a08777d 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -23,6 +23,8 @@ #include #include +#include + namespace DB { @@ -99,6 +101,7 @@ StorageView::StorageView( SelectQueryDescription description; description.inner_query = query.select->ptr(); + is_parameterized_view = query.isParameterizedView(); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } @@ -173,6 +176,15 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } +void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values) +{ + if (is_parameterized_view) + { + ReplaceQueryParameterVisitor visitor(parameter_values); + visitor.visit(outer_query); + } +} + void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(outer_query); @@ -185,8 +197,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ auto table_function_name = table_expression->table_function->as()->name; if (table_function_name == "view" || table_function_name == "viewIfPermitted") table_expression->database_and_table_name = std::make_shared("__view"); - if (table_function_name == "merge") + else if (table_function_name == "merge") table_expression->database_and_table_name = std::make_shared("__merge"); + else + table_expression->database_and_table_name = std::make_shared(table_function_name); + } if (!table_expression->database_and_table_name) throw Exception("Logical error: incorrect table expression", ErrorCodes::LOGICAL_ERROR); @@ -204,6 +219,8 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ for (auto & child : table_expression->children) if (child.get() == view_name.get()) child = view_query; + else if (child.get() && child->as() && child->as()->name == table_expression->table_function->as()->name) + child = view_query; } ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 31c96addd08..297847e83bf 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -19,6 +19,7 @@ public: std::string getName() const override { return "View"; } bool isView() const override { return true; } + bool isParameterizedView() const { return is_parameterized_view; } /// It is passed inside the query and solved at its level. bool supportsSampling() const override { return true; } @@ -34,6 +35,8 @@ public: size_t max_block_size, unsigned num_streams) override; + void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values); + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); @@ -41,6 +44,9 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); + +protected: + bool is_parameterized_view; }; } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference new file mode 100644 index 00000000000..d9afe5ff69c --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -0,0 +1,3 @@ +20 +50 +10 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql new file mode 100644 index 00000000000..0153ed95428 --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -0,0 +1,32 @@ +DROP TABLE IF EXISTS v1; +DROP TABLE IF EXISTS Catalog; + +CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; + +INSERT INTO Catalog VALUES ('Pen', 10, 3); +INSERT INTO Catalog VALUES ('Book', 50, 2); +INSERT INTO Catalog VALUES ('Paper', 20, 1); + +CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; +SELECT Price FROM v1(price=20); + +SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION } + +CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10; +SELECT Price FROM v10(price=10); -- { serverError BAD_ARGUMENTS } + + +CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; +SELECT Price FROM v2(price=50,quantity=2); + +SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} + +CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3; +SELECT Price FROM v3(price=10); + +CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS} + +DROP TABLE v1; +DROP TABLE v2; +DROP TABLE v3; +DROP TABLE Catalog; From 1e3be976adb3d75fb927d2bb93196f4f2d21ef7d Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Sat, 24 Sep 2022 12:01:18 +0200 Subject: [PATCH 007/262] 40907 Parameterized views as table functions Implementation * Fix for Build fails - updated conversion of Field to String and includes --- src/Parsers/ASTSelectQuery.cpp | 5 ++++- src/Parsers/ASTSelectQuery.h | 3 --- src/Parsers/ASTSelectWithUnionQuery.cpp | 2 ++ src/Parsers/ASTSelectWithUnionQuery.h | 2 -- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index b97c3dbc585..b187dc74f02 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -8,7 +8,10 @@ #include #include #include +#include +#include +#include namespace DB { @@ -513,7 +516,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const if (auto literal = expression_list->children[1]->as()) { - parameter_values[identifier->name()] = toString(literal->value); + parameter_values[identifier->name()] = convertFieldToString(literal->value); } } } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index e8eed092472..8ece05808f7 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,9 +2,6 @@ #include #include -#include -#include -#include namespace DB { diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 11ac252aee2..c38e4e2c747 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -3,7 +3,9 @@ #include #include #include +#include +#include #include namespace DB diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index ef8e50c47fd..8d93760426a 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,8 +2,6 @@ #include #include -#include -#include namespace DB { From 21de85d67cc9de7942317e1c33f1d58bc551045e Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 26 Sep 2022 10:30:28 +0200 Subject: [PATCH 008/262] 40907 Parameterized views as table functions Implementation * Fix for clang-today build fails - updated to use const reference in ASTSelectQuery.cpp --- src/Parsers/ASTSelectQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index b187dc74f02..3be3f6c5c9a 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -507,13 +507,13 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const { auto ast = queue.front(); queue.pop(); - if (auto expression_list = ast->as()) + if (auto * expression_list = ast->as()) { if (expression_list->children.size() == 2) { - if (auto identifier = expression_list->children[0]->as()) + if (auto * identifier = expression_list->children[0]->as()) { - if (auto literal = expression_list->children[1]->as()) + if (auto * literal = expression_list->children[1]->as()) { parameter_values[identifier->name()] = convertFieldToString(literal->value); @@ -521,7 +521,7 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const } } } - for (auto child : ast->children) + for (const auto & child : ast->children) queue.push(child); } From 8b3c4ac50471d76bf0a7c3ae67b6bc70b288d63e Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 26 Sep 2022 13:08:54 +0200 Subject: [PATCH 009/262] 40907 Parameterized views as table functions Implementation * Fix for clang-today build fails - updated to use const reference in ASTSelectQuery.cpp & ASTSelectWithUnionQuery.cpp --- src/Parsers/ASTSelectQuery.cpp | 2 +- src/Parsers/ASTSelectWithUnionQuery.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 3be3f6c5c9a..7537628b3a5 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -491,7 +491,7 @@ bool ASTSelectQuery::hasQueryParameters() const if (ast->as()) return true; - for (auto child : ast->children) + for (const auto & child : ast->children) queue.push(child); } return false; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index c38e4e2c747..76fe9582615 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -104,7 +104,7 @@ bool ASTSelectWithUnionQuery::hasQueryParameters() const return true; } - for (auto child : current->children) + for (const auto & child : current->children) queue.push(child); } return false; From bbc33a54b2e8b63870f08d91eba02f178aaeae6f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 27 Sep 2022 16:30:59 +0200 Subject: [PATCH 010/262] Addressed review comments - 40907 Parameterized views as table functions Implementation * Fix for clang-today build fails - updated to use const in Context.cpp & const function in ActionsVisitior.cpp * Updated to use QueryParameterVisitor to check if query has query parameters * Updated executeTableFunction to check if table/table exists instead of try-catch approach * Fixed small review comments and style comments. Documentation: * Addressed review comments and added the LIVE view part which was removed by mistake in the previous commits. --- .../sql-reference/statements/create/view.md | 6 +- src/Interpreters/ActionsVisitor.cpp | 27 +---- src/Interpreters/Context.cpp | 107 +++++++----------- src/Interpreters/ExpressionAnalyzer.cpp | 5 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/QueryNormalizer.cpp | 2 +- src/Interpreters/QueryParameterVisitor.cpp | 16 ++- src/Interpreters/QueryParameterVisitor.h | 3 + src/Parsers/ASTSelectQuery.cpp | 23 +--- src/Parsers/ASTSelectWithUnionQuery.cpp | 19 +--- src/Parsers/ExpressionListParsers.h | 2 +- src/Parsers/ParserSelectQuery.h | 2 +- src/Parsers/ParserUnionQueryElement.h | 3 +- src/Storages/StorageView.cpp | 8 +- src/Storages/StorageView.h | 2 +- 15 files changed, 89 insertions(+), 138 deletions(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index a7b3f4ef762..23df3f72318 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -42,7 +42,7 @@ This is similar to normal view but can be created with parameter instead of lite ``` sql CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ... ``` -The above creates a view for table which can be used as table function by substituting value1 & value2 as show below. +The above creates a view for table which can be used as table function by substituting parameters as show below. ``` sql SELECT * FROM view(column1=value1, column2=value2 ...) @@ -86,7 +86,9 @@ To delete a view, use [DROP VIEW](../../../sql-reference/statements/drop.md#drop This is an experimental feature that may change in backwards-incompatible ways in the future releases. Enable usage of live views and `WATCH` query using [allow_experimental_live_view](../../../operations/settings/settings.md#allow-experimental-live-view) setting. Input the command `set allow_experimental_live_view = 1`. ::: - +```sql +CREATE LIVE VIEW [IF NOT EXISTS] [db.]table_name [WITH [TIMEOUT [value_in_sec] [AND]] [REFRESH [value_in_sec]]] AS SELECT ... +``` Live views store result of the corresponding [SELECT](../../../sql-reference/statements/select/index.md) query and are updated any time the result of the query changes. Query result as well as partial result needed to combine with new data are stored in memory providing increased performance for repeated queries. Live views can provide push notifications when query result changes using the [WATCH](../../../sql-reference/statements/watch.md) query. diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 0ebc6857779..148d8e4d30b 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -54,6 +54,7 @@ #include #include #include +#include namespace DB @@ -742,29 +743,11 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt if (const auto * node = index.tryGetNode(child_column_name)) return NameAndTypePair(child_column_name, node->result_type); - if (!data.only_consts) + if (!data.only_consts && analyzeReceiveQueryParams(ast).empty()) { - bool has_query_parameter = false; - - std::queue astQueue; - astQueue.push(ast); - - while (!astQueue.empty()) - { - auto current = astQueue.front(); - astQueue.pop(); - - if (auto * ast_query_parameter = current->as()) - has_query_parameter = true; - - for (auto astChild : current->children) - astQueue.push(astChild); - } - - if (!has_query_parameter) - throw Exception( - "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), - ErrorCodes::UNKNOWN_IDENTIFIER); + throw Exception( + "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), + ErrorCodes::UNKNOWN_IDENTIFIER); } return {}; } diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e3276f6006d..45a73e09909 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1127,82 +1127,55 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) { - auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); - - StoragePtr & res = table_function_results[key]; - - if (!res) + if (const auto * function = table_expression->as()) { - try + if (TableFunctionFactory::instance().isTableFunctionName(function->name)) { - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); - if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + auto hash = table_expression->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + StoragePtr & res = table_function_results[key]; + if (!res) { - const auto & insertion_table = getInsertionTable(); - if (!insertion_table.empty()) + TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) { - const auto & structure_hint - = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; - table_function_ptr->setStructureHint(structure_hint); + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } + } + + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); + + /// Since ITableFunction::parseArguments() may change table_expression, i.e.: + /// + /// remote('127.1', system.one) -> remote('127.1', 'system.one'), + /// + auto new_hash = table_expression->getTreeHash(); + if (hash != new_hash) + { + key = toString(new_hash.first) + '_' + toString(new_hash.second); + table_function_results[key] = res; } } - - res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); - - /// Since ITableFunction::parseArguments() may change table_expression, i.e.: - /// - /// remote('127.1', system.one) -> remote('127.1', 'system.one'), - /// - auto new_hash = table_expression->getTreeHash(); - if (hash != new_hash) - { - key = toString(new_hash.first) + '_' + toString(new_hash.second); - table_function_results[key] = res; - } - return res; - }catch (DB::Exception &table_function_exception) - { - if (table_function_exception.code() == ErrorCodes::UNKNOWN_FUNCTION) - { - if (auto ast_function = table_expression->as()) - { - try - { - res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), ast_function->name}, getQueryContext()); - if (res.get()->isView() && res->as()->isParameterizedView()) - return res; - else - { - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "Not a parameterized view {}", - ast_function->name); - } - } - catch (DB::Exception &view_exception) - { - if (view_exception.code() == ErrorCodes::UNKNOWN_TABLE) - throw Exception( - ErrorCodes::UNKNOWN_FUNCTION, - "Unknown table function {} OR Unknown parameterized view {}", - table_function_exception.message(), - view_exception.message()); - else - throw; - } - } - else - throw; - } - else - throw; } - + else if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext())) + { + StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext()); + if (res.get()->isView() && res->as()->isParameterizedView()) + return res; + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name); + } + } + throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function or incorrect parameterized view: `{}`", function->name); } - - return res; + throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query"); } diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d56cc47a34b..b49df1b1fe7 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1906,11 +1906,12 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - if (!(query.allow_query_parameters && query.hasQueryParameters())) + bool has_query_parameters = query.allow_query_parameters && query.hasQueryParameters(); + if (!has_query_parameters) { auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); - /// If the filter column is a constant, record it. + /// If the filter column is a constant and not a query parameter, record it. if (column_elem.column) where_constant_filter_description = ConstantFilterDescription(*column_elem.column); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 88df37fca34..298dfd03bc6 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -502,7 +502,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); - view->replaceQueryParameters(query_ptr, getSelectQuery().getQueryParameterValues()); + view->replaceQueryParametersIfParametrizedView(query_ptr, getSelectQuery().getQueryParameterValues()); } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 280904c9aa9..ae07d5f5ad7 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -128,7 +128,7 @@ void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & if (it_alias != data.aliases.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); - data.aliases[node.name] =ast; + data.aliases[node.name] = ast; } diff --git a/src/Interpreters/QueryParameterVisitor.cpp b/src/Interpreters/QueryParameterVisitor.cpp index 0c0f74d402e..491c05ac3d2 100644 --- a/src/Interpreters/QueryParameterVisitor.cpp +++ b/src/Interpreters/QueryParameterVisitor.cpp @@ -17,11 +17,11 @@ public: void visit(const ASTPtr & ast) { - for (const auto & child : ast->children) + if (const auto & query_parameter = ast->as()) + visitQueryParameter(*query_parameter); + else { - if (const auto & query_parameter = child->as()) - visitQueryParameter(*query_parameter); - else + for (const auto & child : ast->children) visit(child); } } @@ -48,4 +48,12 @@ NameSet analyzeReceiveQueryParams(const std::string & query) return query_params; } +NameSet analyzeReceiveQueryParams(const ASTPtr & ast) +{ + NameSet query_params; + QueryParameterVisitor(query_params).visit(ast); + return query_params; +} + + } diff --git a/src/Interpreters/QueryParameterVisitor.h b/src/Interpreters/QueryParameterVisitor.h index 531de2ddafa..6d9d49e1ed2 100644 --- a/src/Interpreters/QueryParameterVisitor.h +++ b/src/Interpreters/QueryParameterVisitor.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB @@ -10,4 +11,6 @@ namespace DB /// Find parameters in a query and collect them into set. NameSet analyzeReceiveQueryParams(const std::string & query); +NameSet analyzeReceiveQueryParams(const ASTPtr & ast); + } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 7537628b3a5..65fe8e30c44 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -8,8 +8,8 @@ #include #include #include -#include #include +#include #include @@ -480,19 +480,9 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const bool ASTSelectQuery::hasQueryParameters() const { - std::queue queue; - queue.push(this->clone()); - - while (!queue.empty()) + if (!analyzeReceiveQueryParams(this->where()).empty()) { - auto ast = queue.front(); - queue.pop(); - - if (ast->as()) - return true; - - for (const auto & child : ast->children) - queue.push(child); + return true; } return false; } @@ -507,15 +497,14 @@ NameToNameMap ASTSelectQuery::getQueryParameterValues() const { auto ast = queue.front(); queue.pop(); - if (auto * expression_list = ast->as()) + if (const auto * expression_list = ast->as()) { if (expression_list->children.size() == 2) { - if (auto * identifier = expression_list->children[0]->as()) + if (const auto * identifier = expression_list->children[0]->as()) { - if (auto * literal = expression_list->children[1]->as()) + if (const auto * literal = expression_list->children[1]->as()) { - parameter_values[identifier->name()] = convertFieldToString(literal->value); } } diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 76fe9582615..1cd59a0b571 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -4,8 +4,8 @@ #include #include #include +#include -#include #include namespace DB @@ -90,22 +90,9 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const bool ASTSelectWithUnionQuery::hasQueryParameters() const { - std::queue queue; - queue.push(this->clone()); - - while (!queue.empty()) + if (!analyzeReceiveQueryParams(this->list_of_selects).empty()) { - auto current = queue.front(); - queue.pop(); - - if (auto * select = current->as()) - { - if (select->hasQueryParameters()) - return true; - } - - for (const auto & child : current->children) - queue.push(child); + return true; } return false; } diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index b0f6e66c213..9b22b4b4b98 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -84,7 +84,7 @@ private: class ParserUnionList : public IParserBase { public: - ParserUnionList(bool allow_query_parameters_=false) + explicit ParserUnionList(bool allow_query_parameters_ = false) : allow_query_parameters(allow_query_parameters_) { } diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h index 708b051e046..ac79cc0637d 100644 --- a/src/Parsers/ParserSelectQuery.h +++ b/src/Parsers/ParserSelectQuery.h @@ -10,7 +10,7 @@ namespace DB class ParserSelectQuery : public IParserBase { public: - ParserSelectQuery(bool allow_query_parameters_=false) + explicit ParserSelectQuery(bool allow_query_parameters_ = false) : allow_query_parameters(allow_query_parameters_) { } diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h index a3fd47c496b..ca372052306 100644 --- a/src/Parsers/ParserUnionQueryElement.h +++ b/src/Parsers/ParserUnionQueryElement.h @@ -10,10 +10,11 @@ namespace DB class ParserUnionQueryElement : public IParserBase { public: - ParserUnionQueryElement(bool allow_query_parameters_=false) + explicit ParserUnionQueryElement(bool allow_query_parameters_ = false) : allow_query_parameters(allow_query_parameters_) { } + bool allow_query_parameters; protected: diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 32d0a08777d..ec7c665e135 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -176,7 +176,7 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } -void StorageView::replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values) +void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const { if (is_parameterized_view) { @@ -219,7 +219,11 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ for (auto & child : table_expression->children) if (child.get() == view_name.get()) child = view_query; - else if (child.get() && child->as() && child->as()->name == table_expression->table_function->as()->name) + else if (child.get() + && child->as() + && table_expression->table_function + && table_expression->table_function->as() + && child->as()->name == table_expression->table_function->as()->name) child = view_query; } diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 297847e83bf..1ed64c482e0 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -35,7 +35,7 @@ public: size_t max_block_size, unsigned num_streams) override; - void replaceQueryParameters(ASTPtr & outer_query, const NameToNameMap & parameter_values); + void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const; static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) { From a12d2fcf44c3ff5eff64527ed731f89b3e9ee308 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 27 Sep 2022 22:13:20 +0200 Subject: [PATCH 011/262] Updated ASTSelectQuery to fix FastTest fails for 40907 Parameterized views as table functions --- src/Parsers/ASTSelectQuery.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 65fe8e30c44..f3bb094c41e 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -480,7 +480,7 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const bool ASTSelectQuery::hasQueryParameters() const { - if (!analyzeReceiveQueryParams(this->where()).empty()) + if (!analyzeReceiveQueryParams(this->clone()).empty()) { return true; } From 618f63d6c7ec4d92a742bd974c9a611a2fccce2f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 28 Sep 2022 10:00:12 +0200 Subject: [PATCH 012/262] Updated executeTableFunctions in Context.cpp to check for table/view & fallback to function to fix test fails - 40907 Parameterized views as table functions --- src/Interpreters/Context.cpp | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 45a73e09909..136d2b1283f 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1129,7 +1129,17 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) { if (const auto * function = table_expression->as()) { - if (TableFunctionFactory::instance().isTableFunctionName(function->name)) + if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext())) + { + StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext()); + if (res.get()->isView() && res->as()->isParameterizedView()) + return res; + else + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name); + } + } + else { auto hash = table_expression->getTreeHash(); String key = toString(hash.first) + '_' + toString(hash.second); @@ -1163,17 +1173,6 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) } return res; } - else if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext())) - { - StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext()); - if (res.get()->isView() && res->as()->isParameterizedView()) - return res; - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name); - } - } - throw Exception(ErrorCodes::UNKNOWN_FUNCTION, "Unknown table function or incorrect parameterized view: `{}`", function->name); } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query"); } From e6672832b9289c8fbe2a8133e5e84142002c159f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 28 Sep 2022 10:17:04 +0200 Subject: [PATCH 013/262] Removed unused errorcodes in Context.cpp - 40907 Parameterized views as table functions --- src/Interpreters/Context.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 136d2b1283f..957e9a2bce1 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -139,7 +139,6 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; extern const int NOT_IMPLEMENTED; - extern const int UNKNOWN_FUNCTION; } From f78f846503f70cd7f81538c3e48baa16aa1a55ce Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 28 Sep 2022 13:23:11 +0200 Subject: [PATCH 014/262] Fixed issues executeTableFunctions in Context.cpp to fall back on TableFunction if not parameterized view & updated test - 40907 Parameterized views as table functions --- src/Interpreters/Context.cpp | 66 ++++++++++--------- .../0_stateless/02428_parameterized_view.sql | 2 +- 2 files changed, 35 insertions(+), 33 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 957e9a2bce1..2c6f27070e3 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1133,45 +1133,47 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext()); if (res.get()->isView() && res->as()->isParameterizedView()) return res; - else - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Not a parameterized view `{}`", function->name); - } } - else + auto hash = table_expression->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + StoragePtr & res = table_function_results[key]; + if (!res) { - auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); - StoragePtr & res = table_function_results[key]; - if (!res) + TableFunctionPtr table_function_ptr; + try { - TableFunctionPtr table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); - if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + } + catch (Exception & e) + { + e.addMessage(" or incorrect parameterized view"); + throw; + } + if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + { + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) { - const auto & insertion_table = getInsertionTable(); - if (!insertion_table.empty()) - { - const auto & structure_hint - = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; - table_function_ptr->setStructureHint(structure_hint); - } - } - - res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); - - /// Since ITableFunction::parseArguments() may change table_expression, i.e.: - /// - /// remote('127.1', system.one) -> remote('127.1', 'system.one'), - /// - auto new_hash = table_expression->getTreeHash(); - if (hash != new_hash) - { - key = toString(new_hash.first) + '_' + toString(new_hash.second); - table_function_results[key] = res; + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); } } - return res; + + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); + + /// Since ITableFunction::parseArguments() may change table_expression, i.e.: + /// + /// remote('127.1', system.one) -> remote('127.1', 'system.one'), + /// + auto new_hash = table_expression->getTreeHash(); + if (hash != new_hash) + { + key = toString(new_hash.first) + '_' + toString(new_hash.second); + table_function_results[key] = res; + } } + return res; } throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query"); } diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 0153ed95428..ff451f91f5b 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -13,7 +13,7 @@ SELECT Price FROM v1(price=20); SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION } CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10; -SELECT Price FROM v10(price=10); -- { serverError BAD_ARGUMENTS } +SELECT Price FROM v10(price=10); -- { serverError UNKNOWN_FUNCTION } CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; From d94c3438ad1a0938d2604b9b71b999a9891de273 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 29 Sep 2022 09:17:11 +0200 Subject: [PATCH 015/262] Fixing build issues by including QueryParameterVisitor - 40907 Parameterized views as table functions --- src/Parsers/ASTSelectQuery.h | 1 + src/Parsers/ASTSelectWithUnionQuery.h | 1 + 2 files changed, 2 insertions(+) diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 8ece05808f7..19e3d2b814a 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 8d93760426a..64cf5287211 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,6 +2,7 @@ #include #include +#include namespace DB { From e5c0c6a1b6cd24f231971a097f46f1496b74beeb Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 3 Oct 2022 14:27:38 +0200 Subject: [PATCH 016/262] Addressed review comments - 40907 Parameterized views as table functions Implementation * Updated handling of has_query_parameters to avoid recalculation. * Fixed style comments * Updated formatImpl of ASTTableExpression to prioritise table_function before sub_query. Testing: * Added test for ATTACH, DETACH, INSERT INTO view and EXPLAIN SYNTAX to tests/queries/0_stateless/02428_parameterized_view.sql --- src/Parsers/ASTSelectQuery.cpp | 13 ++++---- src/Parsers/ASTSelectQuery.h | 5 ++- src/Parsers/ASTSelectWithUnionQuery.cpp | 32 ++++++++++++++++--- src/Parsers/ASTSelectWithUnionQuery.h | 6 ++-- src/Parsers/ASTTablesInSelectQuery.cpp | 10 +++--- src/Parsers/ExpressionListParsers.cpp | 1 + src/Parsers/ParserSelectQuery.h | 1 + src/Parsers/ParserSelectWithUnionQuery.cpp | 1 + src/Storages/StorageView.cpp | 4 +++ .../02428_parameterized_view.reference | 10 ++++++ .../0_stateless/02428_parameterized_view.sql | 9 +++++- 11 files changed, 72 insertions(+), 20 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index f3bb094c41e..aa4ff96e050 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -478,13 +478,14 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const tables_element.table_expression->as().final = true; } -bool ASTSelectQuery::hasQueryParameters() const + +void ASTSelectQuery::setHasQueryParameters() { - if (!analyzeReceiveQueryParams(this->clone()).empty()) - { - return true; - } - return false; + if (!this->where()) + return; + + if (!analyzeReceiveQueryParams(this->where()).empty()) + has_query_parameters = true; } NameToNameMap ASTSelectQuery::getQueryParameterValues() const diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 19e3d2b814a..b3f29009df6 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -143,7 +143,10 @@ public: void setFinal(); QueryKind getQueryKind() const override { return QueryKind::Select; } - bool hasQueryParameters() const; + + bool has_query_parameters = false; + bool hasQueryParameters() const { return has_query_parameters; } + void setHasQueryParameters(); NameToNameMap getQueryParameterValues() const; protected: diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 1cd59a0b571..50e929a6f46 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include @@ -88,13 +87,36 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT); } -bool ASTSelectWithUnionQuery::hasQueryParameters() const + +void ASTSelectWithUnionQuery::setHasQueryParameters() { - if (!analyzeReceiveQueryParams(this->list_of_selects).empty()) + if (!list_of_selects) + return; + + for (const auto & child : list_of_selects->children) { - return true; + if (auto * select_node = child->as()) + { + select_node->setHasQueryParameters(); + if (select_node->hasQueryParameters()) + { + has_query_parameters = true; + break; + } + } + } +} + +void ASTSelectWithUnionQuery::clearAllowQueryParameters() +{ + if (!list_of_selects) + return; + + for (const auto & child : list_of_selects->children) + { + if (auto * select_node = child->as()) + select_node->allow_query_parameters = false; } - return false; } } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 64cf5287211..6562bdc4c3d 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB { @@ -33,8 +32,11 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; - bool hasQueryParameters() const; + bool has_query_parameters = false; + bool hasQueryParameters() const { return has_query_parameters; } + void setHasQueryParameters(); + void clearAllowQueryParameters(); }; diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 3b7a3a342e6..3f687f76c86 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -112,16 +112,16 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState settings.ostr << " "; database_and_table_name->formatImpl(settings, state, frame); } - else if (table_function) - { - settings.ostr << " "; - table_function->formatImpl(settings, state, frame); - } else if (subquery) { settings.ostr << settings.nl_or_ws << indent_str; subquery->formatImpl(settings, state, frame); } + else if (table_function) + { + settings.ostr << " "; + table_function->formatImpl(settings, state, frame); + } if (final) { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 59b5b8b98cb..a8f0f0d0d58 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -280,6 +280,7 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type) select_with_union_query->list_of_selects = std::make_shared(); select_with_union_query->list_of_selects->children.push_back(std::move(select_query)); select_with_union_query->children.push_back(select_with_union_query->list_of_selects); + select_with_union_query->setHasQueryParameters(); auto new_subquery = std::make_shared(); new_subquery->children.push_back(select_with_union_query); diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h index ac79cc0637d..ea9f71f36e0 100644 --- a/src/Parsers/ParserSelectQuery.h +++ b/src/Parsers/ParserSelectQuery.h @@ -14,6 +14,7 @@ public: : allow_query_parameters(allow_query_parameters_) { } + bool allow_query_parameters; protected: diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 39204ee457d..49f631a2881 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -36,6 +36,7 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & select_with_union_query->list_of_selects = list_node; select_with_union_query->children.push_back(select_with_union_query->list_of_selects); select_with_union_query->list_of_modes = parser.getUnionModes(); + select_with_union_query->setHasQueryParameters(); return true; } diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index ec7c665e135..2a82bf327e7 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -100,6 +100,10 @@ StorageView::StorageView( throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); SelectQueryDescription description; + //When storing the select_query clear allow_query_parameters from the select, so that when this view is used in select, + //the query parameters are expected to be substituted + query.select->clearAllowQueryParameters(); + description.inner_query = query.select->ptr(); is_parameterized_view = query.isParameterizedView(); storage_metadata.setSelectQuery(description); diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index d9afe5ff69c..bf21cdb6308 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -1,3 +1,13 @@ 20 +SELECT + Name, + Price, + Quantity +FROM +( + SELECT * + FROM default.Catalog + WHERE Price = _CAST(10, \'UInt64\') +) AS v1 50 10 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index ff451f91f5b..fe7ec419b1a 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -10,11 +10,18 @@ INSERT INTO Catalog VALUES ('Paper', 20, 1); CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; SELECT Price FROM v1(price=20); +DETACH TABLE v1; +ATTACH TABLE v1; + +EXPLAIN SYNTAX SELECT * from v1(price=10); + +INSERT INTO v1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED} + SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION } CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10; -SELECT Price FROM v10(price=10); -- { serverError UNKNOWN_FUNCTION } +SELECT Price FROM v10(price=10); -- { serverError UNKNOWN_FUNCTION } CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; SELECT Price FROM v2(price=50,quantity=2); From 7033a56ff2bc7b9bc19b40066ffc2bba8a7a67ea Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 3 Oct 2022 16:42:44 +0200 Subject: [PATCH 017/262] Moved QueryParameterVisitor to Parsers & EXPLAIN SYNTAX test fix - 40907 Parameterized views as table functions --- src/Interpreters/ActionsVisitor.cpp | 2 +- src/Interpreters/ExpressionAnalyzer.cpp | 6 ++++++ src/Parsers/ASTSelectQuery.cpp | 5 ++++- src/Parsers/ASTSelectQuery.h | 1 - src/Parsers/ASTSelectWithUnionQuery.cpp | 2 ++ src/Parsers/ASTTablesInSelectQuery.cpp | 10 ++++++++++ .../QueryParameterVisitor.cpp | 2 +- src/{Interpreters => Parsers}/QueryParameterVisitor.h | 0 src/Server/HTTPHandler.cpp | 2 +- 9 files changed, 25 insertions(+), 5 deletions(-) rename src/{Interpreters => Parsers}/QueryParameterVisitor.cpp (96%) rename src/{Interpreters => Parsers}/QueryParameterVisitor.h (100%) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 148d8e4d30b..c694f6007fc 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -54,7 +54,7 @@ #include #include #include -#include +#include namespace DB diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index b49df1b1fe7..4efb08c414f 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1286,6 +1286,8 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); + //For creating parameterized view, query parameters are allowed in select + //As select will be stored without substituting query parameters, we don't want to evaluate the where expression if (select_query->allow_query_parameters && select_query->hasQueryParameters()) return true; @@ -1906,6 +1908,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); + //For creating parameterized view, query parameters are allowed in select + //As select will be stored without substituting query parameters, we don't want to evaluate the where expression bool has_query_parameters = query.allow_query_parameters && query.hasQueryParameters(); if (!has_query_parameters) { @@ -2075,6 +2079,8 @@ void ExpressionAnalysisResult::finalize( ssize_t & having_step_num, const ASTSelectQuery & query) { + //For creating parameterized view, query parameters are allowed in select + //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps if (query.allow_query_parameters && query.hasQueryParameters()) return; diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index aa4ff96e050..c2ca04eaa13 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include @@ -41,6 +41,9 @@ ASTPtr ASTSelectQuery::clone() const for (const auto & child : children) res->children.push_back(child->clone()); + res->allow_query_parameters = allow_query_parameters; + res->has_query_parameters = has_query_parameters; + return res; } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index b3f29009df6..dae718aa040 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -2,7 +2,6 @@ #include #include -#include namespace DB { diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 50e929a6f46..792fffe5f12 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -23,6 +23,8 @@ ASTPtr ASTSelectWithUnionQuery::clone() const res->list_of_modes = list_of_modes; res->set_of_modes = set_of_modes; + res->has_query_parameters = has_query_parameters; + cloneOutputOptions(*res); return res; } diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 3f687f76c86..85db26efcd2 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace DB @@ -112,6 +113,15 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState settings.ostr << " "; database_and_table_name->formatImpl(settings, state, frame); } + //In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX + else if (table_function && table_function->as() && table_function->as()->name=="view") + { + settings.ostr << " "; + table_function->formatImpl(settings, state, frame); + + } + //For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX + //we cannot remove the table function part, as its needed for query substitution else if (subquery) { settings.ostr << settings.nl_or_ws << indent_str; diff --git a/src/Interpreters/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp similarity index 96% rename from src/Interpreters/QueryParameterVisitor.cpp rename to src/Parsers/QueryParameterVisitor.cpp index 491c05ac3d2..14750845034 100644 --- a/src/Interpreters/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/src/Interpreters/QueryParameterVisitor.h b/src/Parsers/QueryParameterVisitor.h similarity index 100% rename from src/Interpreters/QueryParameterVisitor.h rename to src/Parsers/QueryParameterVisitor.h diff --git a/src/Server/HTTPHandler.cpp b/src/Server/HTTPHandler.cpp index 8886a77c9b5..45d4bd824f2 100644 --- a/src/Server/HTTPHandler.cpp +++ b/src/Server/HTTPHandler.cpp @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include From a0b1085f3be248800ccb3a850de3d27161f6949b Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 4 Oct 2022 17:32:48 +0200 Subject: [PATCH 018/262] Fixed issues with using database name in SELECT and added tests - 40907 Parameterized views as table functions --- src/Interpreters/Context.cpp | 22 ++++++++++++++++--- src/Parsers/ASTFunction.h | 3 +++ src/Parsers/ASTSelectWithUnionQuery.h | 3 ++- src/Parsers/ExpressionElementParsers.cpp | 10 +++++++-- .../02428_parameterized_view.reference | 2 ++ .../0_stateless/02428_parameterized_view.sql | 14 ++++++++++++ 6 files changed, 48 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2c6f27070e3..9b371b7477d 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -96,6 +96,7 @@ #include #include #include +#include #if USE_ROCKSDB #include @@ -1126,11 +1127,26 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) { - if (const auto * function = table_expression->as()) + if (auto * function = table_expression->as()) { - if (DatabaseCatalog::instance().isTableExist({getCurrentDatabase(), function->name}, getQueryContext())) + String database_name = getCurrentDatabase(); + String table_name = function->name; + + if (function->has_database_name) { - StoragePtr res = DatabaseCatalog::instance().getTable({getCurrentDatabase(), function->name}, getQueryContext()); + std::vector parts; + splitInto<'.'>(parts, function->name); + + if (parts.size() == 2) + { + database_name = parts[0]; + table_name = parts[1]; + } + } + + if (DatabaseCatalog::instance().isTableExist({database_name, table_name}, getQueryContext())) + { + StoragePtr res = DatabaseCatalog::instance().getTable({database_name, table_name}, getQueryContext()); if (res.get()->isView() && res->as()->isParameterizedView()) return res; } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 6d5089f802e..3da7b4f9862 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -53,6 +53,9 @@ public: std::string getWindowDescription() const; + //This is used for parameterized view, to identify if name is 'db.view' + bool has_database_name = false; + protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void appendColumnNameImpl(WriteBuffer & ostr) const override; diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 6562bdc4c3d..d2a2dff2c7b 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -36,7 +36,8 @@ public: bool hasQueryParameters() const { return has_query_parameters; } void setHasQueryParameters(); - void clearAllowQueryParameters(); + //clang-tidy wants it to be const, but it changes flags of children + void clearAllowQueryParameters();// NOLINT }; diff --git a/src/Parsers/ExpressionElementParsers.cpp b/src/Parsers/ExpressionElementParsers.cpp index 1de9adb834e..8924e64e18e 100644 --- a/src/Parsers/ExpressionElementParsers.cpp +++ b/src/Parsers/ExpressionElementParsers.cpp @@ -822,7 +822,7 @@ namespace bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserIdentifier id_parser; + ParserCompoundIdentifier compound_id_parser; bool has_all = false; bool has_distinct = false; @@ -838,7 +838,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return true; } - if (!id_parser.parse(pos, identifier, expected)) + if (!compound_id_parser.parse(pos, identifier, expected)) return false; if (pos->type != TokenType::OpeningRoundBracket) @@ -1034,6 +1034,12 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) function_node->children.push_back(function_node->parameters); } + if (const auto *compound_identifier = identifier->as()) + { + if (!compound_identifier->isShort()) + function_node->has_database_name = true; + } + ParserKeyword filter("FILTER"); ParserKeyword over("OVER"); diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index bf21cdb6308..8fc3b86ddc1 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -1,4 +1,5 @@ 20 +20 SELECT Name, Price, @@ -11,3 +12,4 @@ FROM ) AS v1 50 10 +20 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index fe7ec419b1a..dffe75c4c4e 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -9,6 +9,7 @@ INSERT INTO Catalog VALUES ('Paper', 20, 1); CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; SELECT Price FROM v1(price=20); +SELECT Price FROM `v1`(price=20); DETACH TABLE v1; ATTACH TABLE v1; @@ -37,3 +38,16 @@ DROP TABLE v1; DROP TABLE v2; DROP TABLE v3; DROP TABLE Catalog; + +CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; + +INSERT INTO system.Catalog VALUES ('Pen', 10, 3); +INSERT INTO system.Catalog VALUES ('Book', 50, 2); +INSERT INTO system.Catalog VALUES ('Paper', 20, 1); + +CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}; +SELECT Price FROM system.v1(price=20); +SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION } + +DROP TABLE system.v1; +DROP TABLE system.Catalog; \ No newline at end of file From 4f0f214e8424f1bb8028843a686477a0cea3f603 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 5 Oct 2022 13:55:30 +0200 Subject: [PATCH 019/262] Fixed test to check for tables and drop after test - 40907 Parameterized views as table functions --- .../0_stateless/02428_parameterized_view.sql | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index dffe75c4c4e..9dfa65a5cbd 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -1,5 +1,9 @@ -DROP TABLE IF EXISTS v1; DROP TABLE IF EXISTS Catalog; +DROP TABLE IF EXISTS v1; +DROP TABLE IF EXISTS v2; +DROP TABLE IF EXISTS v3; +DROP TABLE IF EXISTS system.Catalog; +DROP TABLE IF EXISTS system.v1; CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; @@ -34,11 +38,6 @@ SELECT Price FROM v3(price=10); CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS} -DROP TABLE v1; -DROP TABLE v2; -DROP TABLE v3; -DROP TABLE Catalog; - CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; INSERT INTO system.Catalog VALUES ('Pen', 10, 3); @@ -49,5 +48,9 @@ CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64} SELECT Price FROM system.v1(price=20); SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION } -DROP TABLE system.v1; -DROP TABLE system.Catalog; \ No newline at end of file +DROP TABLE Catalog; +DROP TABLE v1; +DROP TABLE v2; +DROP TABLE v3; +DROP TABLE system.Catalog; +DROP TABLE system.v1; \ No newline at end of file From e256b32fd583f1406a70f4d43bfa9bdc323a5180 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 6 Oct 2022 08:55:15 +0200 Subject: [PATCH 020/262] Fixed clang-tidy non-const function issue - 40907 Parameterized views as table functions --- src/Parsers/ASTSelectWithUnionQuery.cpp | 2 +- src/Parsers/ASTSelectWithUnionQuery.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 99c5bc5f933..739b50fc3a1 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -115,7 +115,7 @@ void ASTSelectWithUnionQuery::setHasQueryParameters() } } -void ASTSelectWithUnionQuery::clearAllowQueryParameters() +void ASTSelectWithUnionQuery::clearAllowQueryParameters() // NOLINT { if (!list_of_selects) return; diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index d2a2dff2c7b..b1c174fb3a6 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -37,7 +37,7 @@ public: void setHasQueryParameters(); //clang-tidy wants it to be const, but it changes flags of children - void clearAllowQueryParameters();// NOLINT + void clearAllowQueryParameters(); // NOLINT }; From e990c8fdb46398260a762ca68c607451e6b4effc Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 6 Oct 2022 13:22:06 +0200 Subject: [PATCH 021/262] Updated order of deletion of tables/views in test - 40907 Parameterized views as table functions --- tests/queries/0_stateless/02428_parameterized_view.sql | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 9dfa65a5cbd..01b76ffc54d 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -48,9 +48,10 @@ CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64} SELECT Price FROM system.v1(price=20); SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION } -DROP TABLE Catalog; + DROP TABLE v1; DROP TABLE v2; DROP TABLE v3; -DROP TABLE system.Catalog; -DROP TABLE system.v1; \ No newline at end of file +DROP TABLE Catalog; +DROP TABLE system.v1; +DROP TABLE system.Catalog; \ No newline at end of file From 8aab336176f9b73f9ff4595c7c56fc8edf802072 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 6 Oct 2022 14:56:54 +0200 Subject: [PATCH 022/262] Updated order of deletion of tables/views in the beginning of test - 40907 Parameterized views as table functions --- tests/queries/0_stateless/02428_parameterized_view.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 01b76ffc54d..fa689937234 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -1,9 +1,9 @@ -DROP TABLE IF EXISTS Catalog; DROP TABLE IF EXISTS v1; DROP TABLE IF EXISTS v2; DROP TABLE IF EXISTS v3; -DROP TABLE IF EXISTS system.Catalog; +DROP TABLE IF EXISTS Catalog; DROP TABLE IF EXISTS system.v1; +DROP TABLE IF EXISTS system.Catalog; CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; From 614fd4cf42ca77dc0329639cc4003e1e2ea2f242 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 17 Oct 2022 18:11:22 +0200 Subject: [PATCH 023/262] Added is_parameterized_view to Context and removed flag from ASTs, updated to use tryGetTable, addressed review comments - 40907 Parameterized views as table functions --- .../sql-reference/statements/create/view.md | 2 +- src/Interpreters/ActionsVisitor.cpp | 2 +- src/Interpreters/Context.cpp | 117 +++++++++--------- src/Interpreters/Context.h | 5 + src/Interpreters/ExpressionAnalyzer.cpp | 11 +- src/Interpreters/InterpreterCreateQuery.cpp | 1 + src/Interpreters/QueryNormalizer.cpp | 14 +-- src/Interpreters/QueryNormalizer.h | 8 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Parsers/ASTCreateQuery.h | 1 - src/Parsers/ASTSelectQuery.cpp | 1 - src/Parsers/ASTSelectQuery.h | 1 - src/Parsers/ASTSelectWithUnionQuery.cpp | 12 -- src/Parsers/ASTSelectWithUnionQuery.h | 3 - src/Parsers/ASTTablesInSelectQuery.cpp | 7 +- src/Parsers/ExpressionListParsers.cpp | 6 +- src/Parsers/ExpressionListParsers.h | 6 - src/Parsers/ParserCreateQuery.cpp | 3 - src/Parsers/ParserSelectQuery.cpp | 1 - src/Parsers/ParserSelectQuery.h | 8 -- src/Parsers/ParserSelectWithUnionQuery.cpp | 2 +- src/Parsers/ParserSelectWithUnionQuery.h | 3 - src/Parsers/ParserUnionQueryElement.cpp | 2 +- src/Parsers/ParserUnionQueryElement.h | 8 -- src/Storages/StorageView.cpp | 4 - 25 files changed, 90 insertions(+), 140 deletions(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index 70ac5629004..c4cf9da7022 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -42,7 +42,7 @@ This is similar to normal view but can be created with parameter instead of lite ``` sql CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ... ``` -The above creates a view for table which can be used as table function by substituting parameters as show below. +The above creates a view for table which can be used as table function by substituting parameters as shown below. ``` sql SELECT * FROM view(column1=value1, column2=value2 ...) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index c694f6007fc..5abf0e61c00 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -743,7 +743,7 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt if (const auto * node = index.tryGetNode(child_column_name)) return NameAndTypePair(child_column_name, node->result_type); - if (!data.only_consts && analyzeReceiveQueryParams(ast).empty()) + if (!data.only_consts && data.getContext()->isParameterizedView() && analyzeReceiveQueryParams(ast).empty()) { throw Exception( "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 679b3c155dc..9d466d45ef2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1229,71 +1229,68 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) { - if (auto * function = table_expression->as()) + const ASTFunction * function = assert_cast(table_expression.get()); + String database_name = getCurrentDatabase(); + String table_name = function->name; + + if (function->has_database_name) { - String database_name = getCurrentDatabase(); - String table_name = function->name; + std::vector parts; + splitInto<'.'>(parts, function->name); - if (function->has_database_name) + if (parts.size() == 2) { - std::vector parts; - splitInto<'.'>(parts, function->name); - - if (parts.size() == 2) - { - database_name = parts[0]; - table_name = parts[1]; - } + database_name = parts[0]; + table_name = parts[1]; } - - if (DatabaseCatalog::instance().isTableExist({database_name, table_name}, getQueryContext())) - { - StoragePtr res = DatabaseCatalog::instance().getTable({database_name, table_name}, getQueryContext()); - if (res.get()->isView() && res->as()->isParameterizedView()) - return res; - } - auto hash = table_expression->getTreeHash(); - String key = toString(hash.first) + '_' + toString(hash.second); - StoragePtr & res = table_function_results[key]; - if (!res) - { - TableFunctionPtr table_function_ptr; - try - { - table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); - } - catch (Exception & e) - { - e.addMessage(" or incorrect parameterized view"); - throw; - } - if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) - { - const auto & insertion_table = getInsertionTable(); - if (!insertion_table.empty()) - { - const auto & structure_hint - = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; - table_function_ptr->setStructureHint(structure_hint); - } - } - - res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); - - /// Since ITableFunction::parseArguments() may change table_expression, i.e.: - /// - /// remote('127.1', system.one) -> remote('127.1', 'system.one'), - /// - auto new_hash = table_expression->getTreeHash(); - if (hash != new_hash) - { - key = toString(new_hash.first) + '_' + toString(new_hash.second); - table_function_results[key] = res; - } - } - return res; } - throw Exception(ErrorCodes::LOGICAL_ERROR, "Unable to fetch function from query"); + + StoragePtr table = DatabaseCatalog::instance().tryGetTable({database_name, table_name}, getQueryContext()); + if (table) + { + if (table.get()->isView() && table->as()->isParameterizedView()) + return table; + } + auto hash = table_expression->getTreeHash(); + String key = toString(hash.first) + '_' + toString(hash.second); + StoragePtr & res = table_function_results[key]; + if (!res) + { + TableFunctionPtr table_function_ptr; + try + { + table_function_ptr = TableFunctionFactory::instance().get(table_expression, shared_from_this()); + } + catch (Exception & e) + { + e.addMessage(" or incorrect parameterized view"); + throw; + } + if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) + { + const auto & insertion_table = getInsertionTable(); + if (!insertion_table.empty()) + { + const auto & structure_hint + = DatabaseCatalog::instance().getTable(insertion_table, shared_from_this())->getInMemoryMetadataPtr()->columns; + table_function_ptr->setStructureHint(structure_hint); + } + } + + res = table_function_ptr->execute(table_expression, shared_from_this(), table_function_ptr->getName()); + + /// Since ITableFunction::parseArguments() may change table_expression, i.e.: + /// + /// remote('127.1', system.one) -> remote('127.1', 'system.one'), + /// + auto new_hash = table_expression->getTreeHash(); + if (hash != new_hash) + { + key = toString(new_hash.first) + '_' + toString(new_hash.second); + table_function_results[key] = res; + } + } + return res; } diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 635c571b173..c39c6fb8ee3 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -366,6 +366,8 @@ private: /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; + + bool is_parameterized_view = false; public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -940,6 +942,9 @@ public: bool applyDeletedMask() const { return apply_deleted_mask; } void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } + bool isParameterizedView() const { return is_parameterized_view; } + void setIsParameterizedView(bool is_parameterized_view_) { is_parameterized_view = is_parameterized_view_; } + ActionLocksManagerPtr getActionLocksManager() const; enum class ApplicationType diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 4efb08c414f..ebfde738e5a 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1286,9 +1286,9 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, getRootActions(select_query->where(), only_types, step.actions()); - //For creating parameterized view, query parameters are allowed in select - //As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (select_query->allow_query_parameters && select_query->hasQueryParameters()) + /// For creating parameterized view, query parameters are allowed in select + /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression + if (this->getContext()->isParameterizedView()) return true; auto where_column_name = select_query->where()->getColumnName(); @@ -1910,8 +1910,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( //For creating parameterized view, query parameters are allowed in select //As select will be stored without substituting query parameters, we don't want to evaluate the where expression - bool has_query_parameters = query.allow_query_parameters && query.hasQueryParameters(); - if (!has_query_parameters) + if (!context->isParameterizedView()) { auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); @@ -2081,7 +2080,7 @@ void ExpressionAnalysisResult::finalize( { //For creating parameterized view, query parameters are allowed in select //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps - if (query.allow_query_parameters && query.hasQueryParameters()) + if (chain.getContext()->isParameterizedView()) return; if (prewhere_step_num >= 0) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 41c378babcd..4d810bb682d 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -714,6 +714,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else if (create.select) { + getContext()->setIsParameterizedView(create.isParameterizedView()); Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); } diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index ae07d5f5ad7..941896c24de 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -122,13 +122,13 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } -void QueryNormalizer::visit(ASTQueryParameter & node, const ASTPtr & ast, Data & data) +void QueryNormalizer::visit(ASTQueryParameter & node, Data & data) { - auto it_alias = data.aliases.find(node.name); - if (it_alias != data.aliases.end()) + auto it_alias = data.query_parameters.find(node.name); + if (it_alias != data.query_parameters.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); - data.aliases[node.name] = ast; + data.query_parameters.insert(node.name); } @@ -152,8 +152,6 @@ static bool needVisitChild(const ASTPtr & child) /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr &, Data & data) { - data.allow_query_parameters = select.allow_query_parameters; - for (auto & child : select.children) { if (needVisitChild(child)) @@ -270,8 +268,8 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) { - if (data.allow_query_parameters) - visit(*node_param, ast, data); + if (data.is_parameterized_view) + visit(*node_param, data); else throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); } diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index ffd2c46ca77..5268c8ac157 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -43,6 +43,7 @@ public: Aliases & aliases; const NameSet & source_columns_set; ExtractedSettings settings; + NameSet query_parameters; /// tmp data size_t level; @@ -53,15 +54,16 @@ public: /// It's Ok to have "c + 1 AS c" in queries, but not in table definition const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column" - bool allow_query_parameters; + bool is_parameterized_view; - Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_) + Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_parameterized_view_ = false) : aliases(aliases_) , source_columns_set(source_columns_set_) , settings(settings_) , level(0) , ignore_alias(ignore_alias_) , allow_self_aliases(allow_self_aliases_) + , is_parameterized_view(is_parameterized_view_) {} }; @@ -82,7 +84,7 @@ private: static void visit(ASTIdentifier &, ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); - static void visit(ASTQueryParameter &, const ASTPtr &, Data &); + static void visit(ASTQueryParameter &, Data &); static void visitChildren(IAST * node, Data & data); }; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 2f5bfd00938..e34ef89d511 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1479,7 +1479,7 @@ void TreeRewriter::normalize( FunctionNameNormalizer().visit(query.get()); /// Common subexpression elimination. Rewrite rules. - QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases); + QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isParameterizedView()); QueryNormalizer(normalizer_data).visit(query); optimizeGroupingSets(query); diff --git a/src/Parsers/ASTCreateQuery.h b/src/Parsers/ASTCreateQuery.h index 2a6da778211..567376cffcb 100644 --- a/src/Parsers/ASTCreateQuery.h +++ b/src/Parsers/ASTCreateQuery.h @@ -24,7 +24,6 @@ public: IAST * sample_by = nullptr; IAST * ttl_table = nullptr; ASTSetQuery * settings = nullptr; - bool allow_query_parameters = false; String getID(char) const override { return "Storage definition"; } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index c2ca04eaa13..5b2644e550b 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -41,7 +41,6 @@ ASTPtr ASTSelectQuery::clone() const for (const auto & child : children) res->children.push_back(child->clone()); - res->allow_query_parameters = allow_query_parameters; res->has_query_parameters = has_query_parameters; return res; diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index dae718aa040..20905d3e1ea 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -87,7 +87,6 @@ public: bool group_by_with_constant_keys = false; bool group_by_with_grouping_sets = false; bool limit_with_ties = false; - bool allow_query_parameters = false; ASTPtr & refSelect() { return getExpression(Expression::SELECT); } ASTPtr & refTables() { return getExpression(Expression::TABLES); } diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index 739b50fc3a1..d139ae42cae 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -115,16 +115,4 @@ void ASTSelectWithUnionQuery::setHasQueryParameters() } } -void ASTSelectWithUnionQuery::clearAllowQueryParameters() // NOLINT -{ - if (!list_of_selects) - return; - - for (const auto & child : list_of_selects->children) - { - if (auto * select_node = child->as()) - select_node->allow_query_parameters = false; - } -} - } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index b1c174fb3a6..ef804616cbb 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -36,9 +36,6 @@ public: bool hasQueryParameters() const { return has_query_parameters; } void setHasQueryParameters(); - //clang-tidy wants it to be const, but it changes flags of children - void clearAllowQueryParameters(); // NOLINT - }; } diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 85db26efcd2..9e889c796f7 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -113,15 +113,14 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState settings.ostr << " "; database_and_table_name->formatImpl(settings, state, frame); } - //In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX + /// In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX else if (table_function && table_function->as() && table_function->as()->name=="view") { settings.ostr << " "; table_function->formatImpl(settings, state, frame); - } - //For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX - //we cannot remove the table function part, as its needed for query substitution + /// For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX + /// we cannot remove the table function part, as its needed for query substitution else if (subquery) { settings.ostr << settings.nl_or_ws << indent_str; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 137c7c87cca..5307d4aec5c 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -62,7 +62,7 @@ bool ParserList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) bool ParserUnionList::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - ParserUnionQueryElement elem_parser(allow_query_parameters); + ParserUnionQueryElement elem_parser; ParserKeyword s_union_parser("UNION"); ParserKeyword s_all_parser("ALL"); ParserKeyword s_distinct_parser("DISTINCT"); @@ -1997,9 +1997,9 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio else { bool has_database_name = false; - if (const auto *compound_identifier = identifier->as()) + if (const auto *ast_identifier = identifier->as()) { - if (!compound_identifier->isShort()) + if (ast_identifier->compound()) has_database_name = true; } return std::make_unique(function_name, allow_function_parameters_, has_database_name); diff --git a/src/Parsers/ExpressionListParsers.h b/src/Parsers/ExpressionListParsers.h index 5ab7b5e7857..653654e5a33 100644 --- a/src/Parsers/ExpressionListParsers.h +++ b/src/Parsers/ExpressionListParsers.h @@ -84,11 +84,6 @@ private: class ParserUnionList : public IParserBase { public: - explicit ParserUnionList(bool allow_query_parameters_ = false) - : allow_query_parameters(allow_query_parameters_) - { - } - template static bool parseUtil(Pos & pos, const ElemFunc & parse_element, const SepFunc & parse_separator) { @@ -113,7 +108,6 @@ public: } auto getUnionModes() const { return union_modes; } - bool allow_query_parameters; protected: const char * getName() const override { return "list of union elements"; } diff --git a/src/Parsers/ParserCreateQuery.cpp b/src/Parsers/ParserCreateQuery.cpp index 8d8e4352769..fc90f9ce3ed 100644 --- a/src/Parsers/ParserCreateQuery.cpp +++ b/src/Parsers/ParserCreateQuery.cpp @@ -1285,10 +1285,7 @@ bool ParserCreateViewQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec is_materialized_view = true; } else - { is_ordinary_view = true; - select_p.allow_query_parameters = true; - } if (!s_view.ignore(pos, expected)) return false; diff --git a/src/Parsers/ParserSelectQuery.cpp b/src/Parsers/ParserSelectQuery.cpp index 61381573421..cf335270734 100644 --- a/src/Parsers/ParserSelectQuery.cpp +++ b/src/Parsers/ParserSelectQuery.cpp @@ -34,7 +34,6 @@ namespace ErrorCodes bool ParserSelectQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { auto select_query = std::make_shared(); - select_query->allow_query_parameters = allow_query_parameters; node = select_query; ParserKeyword s_select("SELECT"); diff --git a/src/Parsers/ParserSelectQuery.h b/src/Parsers/ParserSelectQuery.h index ea9f71f36e0..deac25df57d 100644 --- a/src/Parsers/ParserSelectQuery.h +++ b/src/Parsers/ParserSelectQuery.h @@ -9,14 +9,6 @@ namespace DB class ParserSelectQuery : public IParserBase { -public: - explicit ParserSelectQuery(bool allow_query_parameters_ = false) - : allow_query_parameters(allow_query_parameters_) - { - } - - bool allow_query_parameters; - protected: const char * getName() const override { return "SELECT query"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index 49f631a2881..e046030bc38 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { ASTPtr list_node; - ParserUnionList parser(allow_query_parameters); + ParserUnionList parser; if (!parser.parse(pos, list_node, expected)) return false; diff --git a/src/Parsers/ParserSelectWithUnionQuery.h b/src/Parsers/ParserSelectWithUnionQuery.h index 6edf8a8d60e..0bf2946e429 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.h +++ b/src/Parsers/ParserSelectWithUnionQuery.h @@ -8,9 +8,6 @@ namespace DB class ParserSelectWithUnionQuery : public IParserBase { -public: - bool allow_query_parameters = false; - protected: const char * getName() const override { return "SELECT query, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Parsers/ParserUnionQueryElement.cpp b/src/Parsers/ParserUnionQueryElement.cpp index 0ddaa323404..efd022e6362 100644 --- a/src/Parsers/ParserUnionQueryElement.cpp +++ b/src/Parsers/ParserUnionQueryElement.cpp @@ -10,7 +10,7 @@ namespace DB bool ParserUnionQueryElement::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) { - if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery(allow_query_parameters).parse(pos, node, expected)) + if (!ParserSubquery().parse(pos, node, expected) && !ParserSelectQuery().parse(pos, node, expected)) return false; if (const auto * ast_subquery = node->as()) diff --git a/src/Parsers/ParserUnionQueryElement.h b/src/Parsers/ParserUnionQueryElement.h index ca372052306..6b63c62c85b 100644 --- a/src/Parsers/ParserUnionQueryElement.h +++ b/src/Parsers/ParserUnionQueryElement.h @@ -9,14 +9,6 @@ namespace DB class ParserUnionQueryElement : public IParserBase { -public: - explicit ParserUnionQueryElement(bool allow_query_parameters_ = false) - : allow_query_parameters(allow_query_parameters_) - { - } - - bool allow_query_parameters; - protected: const char * getName() const override { return "SELECT query, subquery, possibly with UNION"; } bool parseImpl(Pos & pos, ASTPtr & node, Expected & expected) override; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 2a82bf327e7..ec7c665e135 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -100,10 +100,6 @@ StorageView::StorageView( throw Exception("SELECT query is not specified for " + getName(), ErrorCodes::INCORRECT_QUERY); SelectQueryDescription description; - //When storing the select_query clear allow_query_parameters from the select, so that when this view is used in select, - //the query parameters are expected to be substituted - query.select->clearAllowQueryParameters(); - description.inner_query = query.select->ptr(); is_parameterized_view = query.isParameterizedView(); storage_metadata.setSelectQuery(description); From e4ac3d0e18f57e4a777aa1bce05cb1a4b034ab5f Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 19 Oct 2022 18:30:03 +0200 Subject: [PATCH 024/262] Added FunctionParameterValuesVisitor, renamed bool to is_create_parameterized_view, added checks for parameterized view and support & test to propagate query parameters - 40907 Parameterized views as table functions --- src/Interpreters/ActionsVisitor.cpp | 6 +- src/Interpreters/Context.h | 6 +- src/Interpreters/ExpressionAnalyzer.cpp | 6 +- src/Interpreters/InterpreterCreateQuery.cpp | 1 - src/Interpreters/InterpreterExplainQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 8 +- src/Interpreters/QueryNormalizer.cpp | 4 +- src/Interpreters/QueryNormalizer.h | 6 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Interpreters/executeQuery.cpp | 5 +- src/Parsers/ASTSelectQuery.cpp | 32 -------- src/Parsers/ASTSelectQuery.h | 1 - .../FunctionParameterValuesVisitor.cpp | 75 +++++++++++++++++++ src/Parsers/FunctionParameterValuesVisitor.h | 14 ++++ src/Storages/SelectQueryInfo.h | 2 + src/Storages/StorageView.cpp | 7 +- src/Storages/StorageView.h | 6 +- .../02428_parameterized_view.reference | 1 + .../0_stateless/02428_parameterized_view.sql | 4 + 19 files changed, 131 insertions(+), 57 deletions(-) create mode 100644 src/Parsers/FunctionParameterValuesVisitor.cpp create mode 100644 src/Parsers/FunctionParameterValuesVisitor.h diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index b7510f41561..2a0bf4016e8 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -763,7 +763,11 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt if (const auto * node = index.tryGetNode(child_column_name)) return NameAndTypePair(child_column_name, node->result_type); - if (!data.only_consts && data.getContext()->isParameterizedView() && analyzeReceiveQueryParams(ast).empty()) + /// For parameterized view, we allow query parameters in create which will be substituted by select queries + /// so these cannot be evaluated. But if its a parameterized view with sub part ast which does not contain query parameters + /// then it can be evaluated + /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair + if (!data.only_consts && (data.getContext()->isCreateParameterizedView() && analyzeReceiveQueryParams(ast).empty())) { throw Exception( "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 9afe2da4384..70e875cb868 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -369,7 +369,7 @@ private: /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; - bool is_parameterized_view = false; + bool is_create_parameterized_view = false; public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -948,8 +948,8 @@ public: bool applyDeletedMask() const { return apply_deleted_mask; } void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } - bool isParameterizedView() const { return is_parameterized_view; } - void setIsParameterizedView(bool is_parameterized_view_) { is_parameterized_view = is_parameterized_view_; } + bool isCreateParameterizedView() const { return is_create_parameterized_view; } + void setIsCreateParameterizedView(bool is_create_parameterized_view_) { is_create_parameterized_view = is_create_parameterized_view_; } ActionLocksManagerPtr getActionLocksManager() const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index ebfde738e5a..53c2eb0bbe3 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1288,7 +1288,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, /// For creating parameterized view, query parameters are allowed in select /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (this->getContext()->isParameterizedView()) + if (this->getContext()->isCreateParameterizedView()) return true; auto where_column_name = select_query->where()->getColumnName(); @@ -1910,7 +1910,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( //For creating parameterized view, query parameters are allowed in select //As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (!context->isParameterizedView()) + if (!context->isCreateParameterizedView()) { auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); @@ -2080,7 +2080,7 @@ void ExpressionAnalysisResult::finalize( { //For creating parameterized view, query parameters are allowed in select //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps - if (chain.getContext()->isParameterizedView()) + if (chain.getContext()->isCreateParameterizedView()) return; if (prewhere_step_num >= 0) diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index 32f57ebdc9a..e89aa2244fe 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -726,7 +726,6 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else if (create.select) { - getContext()->setIsParameterizedView(create.isParameterizedView()); Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); } diff --git a/src/Interpreters/InterpreterExplainQuery.cpp b/src/Interpreters/InterpreterExplainQuery.cpp index 4799970b6a1..b2e61f04b4f 100644 --- a/src/Interpreters/InterpreterExplainQuery.cpp +++ b/src/Interpreters/InterpreterExplainQuery.cpp @@ -68,7 +68,7 @@ namespace if (query_info.view_query) { ASTPtr tmp; - StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp); + StorageView::replaceWithSubquery(select, query_info.view_query->clone(), tmp, query_info.is_parameterized_view); } } }; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 9d96eb5c0a3..a5883a44201 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -88,7 +88,7 @@ #include #include #include - +#include namespace DB { @@ -501,8 +501,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( ASTPtr view_table; if (view) { - view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot); - view->replaceQueryParametersIfParametrizedView(query_ptr, getSelectQuery().getQueryParameterValues()); + NameToNameMap parameter_values = analyzeReceiveFunctionParamValues(query_ptr); + query_info.is_parameterized_view = view->isParameterizedView(); + view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); + view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values); } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 941896c24de..19112a7c4c0 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -124,6 +124,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) void QueryNormalizer::visit(ASTQueryParameter & node, Data & data) { + /// This is used only for create parameterized view to check if same parameter name is used twice + /// Eg: CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2={c1:UInt64}; - c1 is used twice auto it_alias = data.query_parameters.find(node.name); if (it_alias != data.query_parameters.end()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); @@ -268,7 +270,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) { - if (data.is_parameterized_view) + if (data.is_create_parameterized_view) visit(*node_param, data); else throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index 5268c8ac157..5006d3ad83c 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -54,16 +54,16 @@ public: /// It's Ok to have "c + 1 AS c" in queries, but not in table definition const bool allow_self_aliases; /// for constructs like "SELECT column + 1 AS column" - bool is_parameterized_view; + bool is_create_parameterized_view; - Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_parameterized_view_ = false) + Data(Aliases & aliases_, const NameSet & source_columns_set_, bool ignore_alias_, ExtractedSettings && settings_, bool allow_self_aliases_, bool is_create_parameterized_view_ = false) : aliases(aliases_) , source_columns_set(source_columns_set_) , settings(settings_) , level(0) , ignore_alias(ignore_alias_) , allow_self_aliases(allow_self_aliases_) - , is_parameterized_view(is_parameterized_view_) + , is_create_parameterized_view(is_create_parameterized_view_) {} }; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 7ff065f2228..c58aa8f87a1 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1480,7 +1480,7 @@ void TreeRewriter::normalize( FunctionNameNormalizer().visit(query.get()); /// Common subexpression elimination. Rewrite rules. - QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isParameterizedView()); + QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isCreateParameterizedView()); QueryNormalizer(normalizer_data).visit(query); optimizeGroupingSets(query); diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index abca563de55..b73a8769301 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -408,8 +408,11 @@ static std::tuple executeQueryImpl( if (const auto * insert_query = ast->as(); insert_query && insert_query->data) query_end = insert_query->data; + if (const auto * create_query = ast->as()) + context->setIsCreateParameterizedView(create_query->isParameterizedView()); + /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - if (context->hasQueryParameters()) + if (!context->isCreateParameterizedView() && context->hasQueryParameters()) { ReplaceQueryParameterVisitor visitor(context->getQueryParameters()); visitor.visit(ast); diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 5b2644e550b..913e582acf1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -7,8 +7,6 @@ #include #include #include -#include -#include #include #include @@ -490,34 +488,4 @@ void ASTSelectQuery::setHasQueryParameters() has_query_parameters = true; } -NameToNameMap ASTSelectQuery::getQueryParameterValues() const -{ - NameToNameMap parameter_values; - std::queue queue; - queue.push(this->clone()); - - while (!queue.empty()) - { - auto ast = queue.front(); - queue.pop(); - if (const auto * expression_list = ast->as()) - { - if (expression_list->children.size() == 2) - { - if (const auto * identifier = expression_list->children[0]->as()) - { - if (const auto * literal = expression_list->children[1]->as()) - { - parameter_values[identifier->name()] = convertFieldToString(literal->value); - } - } - } - } - for (const auto & child : ast->children) - queue.push(child); - } - - return parameter_values; -} - } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 20905d3e1ea..5ec56de891e 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -145,7 +145,6 @@ public: bool has_query_parameters = false; bool hasQueryParameters() const { return has_query_parameters; } void setHasQueryParameters(); - NameToNameMap getQueryParameterValues() const; protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp new file mode 100644 index 00000000000..93192084187 --- /dev/null +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH; +} + +class FunctionParameterValuesVisitor +{ +public: + explicit FunctionParameterValuesVisitor(NameToNameMap & parameter_values_) + : parameter_values(parameter_values_) + { + } + + void visit(const ASTPtr & ast) + { + if (const auto * expression = ast->as()) + visitExpressionList(*expression); + for (const auto & child : ast->children) + visit(child); + } + +private: + NameToNameMap & parameter_values; + + void visitExpressionList(const ASTExpressionList & expression_list) + { + if (expression_list.children.size() == 2) + { + if (const auto * identifier = expression_list.children[0]->as()) + { + if (const auto * literal = expression_list.children[1]->as()) + { + parameter_values[identifier->name()] = convertFieldToString(literal->value); + } + else if (const auto * function = expression_list.children[1]->as()) + { + if (isFunctionCast(function)) + { + const auto * cast_expression = assert_cast(function->arguments.get()); + if (cast_expression->children.size() != 2) + throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (const auto * cast_literal = cast_expression->children[0]->as()) + { + parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); + } + } + } + } + } + } +}; + +NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast) +{ + NameToNameMap parameter_values; + FunctionParameterValuesVisitor(parameter_values).visit(ast); + return parameter_values; +} + + +} diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h new file mode 100644 index 00000000000..02053fe4a3d --- /dev/null +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -0,0 +1,14 @@ +#pragma once + +#include +#include +#include + + +namespace DB +{ + +/// Find parameters in a query paramater values and collect them into map. +NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast); + +} diff --git a/src/Storages/SelectQueryInfo.h b/src/Storages/SelectQueryInfo.h index f2835ab4dbf..082cab3d49e 100644 --- a/src/Storages/SelectQueryInfo.h +++ b/src/Storages/SelectQueryInfo.h @@ -220,6 +220,8 @@ struct SelectQueryInfo Block minmax_count_projection_block; MergeTreeDataSelectAnalysisResultPtr merge_tree_select_result_ptr; + bool is_parameterized_view = false; + InputOrderInfoPtr getInputOrderInfo() const { return input_order_info ? input_order_info : (projection ? projection->input_order_info : nullptr); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index ec7c665e135..44d0cead4b3 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -185,13 +185,14 @@ void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, } } -void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name) +void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, bool parameterized_view) { ASTTableExpression * table_expression = getFirstTableExpression(outer_query); if (!table_expression->database_and_table_name) { - // If it's a view or merge table function, add a fake db.table name. + /// If it's a view or merge table function, add a fake db.table name. + /// For parameterized view, the function name is the db.view name, so add the function name if (table_expression->table_function) { auto table_function_name = table_expression->table_function->as()->name; @@ -199,7 +200,7 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ table_expression->database_and_table_name = std::make_shared("__view"); else if (table_function_name == "merge") table_expression->database_and_table_name = std::make_shared("__merge"); - else + else if (parameterized_view) table_expression->database_and_table_name = std::make_shared(table_function_name); } diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 1ed64c482e0..1b08801af3b 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -37,12 +37,12 @@ public: void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const; - static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot) + static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view) { - replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name); + replaceWithSubquery(select_query, metadata_snapshot->getSelectQuery().inner_query->clone(), view_name, parameterized_view); } - static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name); + static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); protected: diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 8fc3b86ddc1..37120d2d1b9 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -1,5 +1,6 @@ 20 20 +10 SELECT Name, Price, diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index fa689937234..e0a67e7690b 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -15,6 +15,10 @@ CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; SELECT Price FROM v1(price=20); SELECT Price FROM `v1`(price=20); +set param_price=10; +SELECT Price FROM v1; -- { serverError UNKNOWN_QUERY_PARAMETER} +SELECT Price FROM v1(price={price:UInt64}); + DETACH TABLE v1; ATTACH TABLE v1; From 1dde95b6e7e314d049a3fc33484ba215d368f8bd Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Thu, 20 Oct 2022 10:31:41 +0200 Subject: [PATCH 025/262] Updated hasQueryParameters using mutable optional bool and typo comment fixed - 40907 Parameterized views as table functions --- src/Parsers/ASTSelectQuery.cpp | 15 ++++++++----- src/Parsers/ASTSelectQuery.h | 5 ++--- src/Parsers/ASTSelectWithUnionQuery.cpp | 23 ++++++++++---------- src/Parsers/ASTSelectWithUnionQuery.h | 5 ++--- src/Parsers/ExpressionListParsers.cpp | 1 - src/Parsers/FunctionParameterValuesVisitor.h | 2 +- src/Parsers/ParserSelectWithUnionQuery.cpp | 1 - 7 files changed, 26 insertions(+), 26 deletions(-) diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 913e582acf1..b5198e3df0d 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -478,14 +478,17 @@ void ASTSelectQuery::setFinal() // NOLINT method can be made const tables_element.table_expression->as().final = true; } - -void ASTSelectQuery::setHasQueryParameters() +bool ASTSelectQuery::hasQueryParameters() const { - if (!this->where()) - return; + if (!has_query_parameters.has_value()) + { + if (analyzeReceiveQueryParams(std::make_shared(*this)).empty()) + has_query_parameters = false; + else + has_query_parameters = true; + } - if (!analyzeReceiveQueryParams(this->where()).empty()) - has_query_parameters = true; + return has_query_parameters.value(); } } diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5ec56de891e..5c64534f1c2 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -142,9 +142,8 @@ public: QueryKind getQueryKind() const override { return QueryKind::Select; } - bool has_query_parameters = false; - bool hasQueryParameters() const { return has_query_parameters; } - void setHasQueryParameters(); + mutable std::optional has_query_parameters; + bool hasQueryParameters() const; protected: void formatImpl(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index d139ae42cae..b0030294727 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -95,24 +95,25 @@ bool ASTSelectWithUnionQuery::hasNonDefaultUnionMode() const || set_of_modes.contains(SelectUnionMode::EXCEPT_DISTINCT); } - -void ASTSelectWithUnionQuery::setHasQueryParameters() +bool ASTSelectWithUnionQuery::hasQueryParameters() const { - if (!list_of_selects) - return; - - for (const auto & child : list_of_selects->children) + if (!has_query_parameters.has_value()) { - if (auto * select_node = child->as()) + for (const auto & child : list_of_selects->children) { - select_node->setHasQueryParameters(); - if (select_node->hasQueryParameters()) + if (auto * select_node = child->as()) { - has_query_parameters = true; - break; + if (select_node->hasQueryParameters()) + { + has_query_parameters = true; + return has_query_parameters.value(); + } } } + has_query_parameters = false; } + + return has_query_parameters.value(); } } diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index ef804616cbb..334bb86932f 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -32,9 +32,8 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; - bool has_query_parameters = false; - bool hasQueryParameters() const { return has_query_parameters; } - void setHasQueryParameters(); + mutable std::optional has_query_parameters; + bool hasQueryParameters() const; }; diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 5307d4aec5c..1ae2973657d 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -221,7 +221,6 @@ static bool modifyAST(ASTPtr ast, SubqueryFunctionType type) select_with_union_query->list_of_selects = std::make_shared(); select_with_union_query->list_of_selects->children.push_back(std::move(select_query)); select_with_union_query->children.push_back(select_with_union_query->list_of_selects); - select_with_union_query->setHasQueryParameters(); auto new_subquery = std::make_shared(); new_subquery->children.push_back(select_with_union_query); diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index 02053fe4a3d..7fd594ddc84 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -8,7 +8,7 @@ namespace DB { -/// Find parameters in a query paramater values and collect them into map. +/// Find parameters in a query parameter values and collect them into map. NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast); } diff --git a/src/Parsers/ParserSelectWithUnionQuery.cpp b/src/Parsers/ParserSelectWithUnionQuery.cpp index e046030bc38..532a9e20735 100644 --- a/src/Parsers/ParserSelectWithUnionQuery.cpp +++ b/src/Parsers/ParserSelectWithUnionQuery.cpp @@ -36,7 +36,6 @@ bool ParserSelectWithUnionQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & select_with_union_query->list_of_selects = list_node; select_with_union_query->children.push_back(select_with_union_query->list_of_selects); select_with_union_query->list_of_modes = parser.getUnionModes(); - select_with_union_query->setHasQueryParameters(); return true; } From d8fe1d2d6354d4b26e4c99c66d950a3f1b30a3f7 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 21 Oct 2022 14:45:28 +0200 Subject: [PATCH 026/262] Moved is_create_parameterized_view out of Context , and added into ActionsMatcher::Data and SelectQueryInfo - 40907 Parameterized views as table functions --- src/Interpreters/ActionsVisitor.cpp | 6 +++-- src/Interpreters/ActionsVisitor.h | 4 ++- src/Interpreters/Context.h | 5 +--- src/Interpreters/ExpressionAnalyzer.cpp | 26 +++++++++---------- src/Interpreters/ExpressionAnalyzer.h | 2 +- src/Interpreters/InterpreterCreateQuery.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- .../InterpreterSelectWithUnionQuery.cpp | 10 ++++++- .../InterpreterSelectWithUnionQuery.h | 3 ++- src/Interpreters/SelectQueryOptions.h | 8 ++++++ src/Interpreters/TreeRewriter.cpp | 12 +++++---- src/Interpreters/TreeRewriter.h | 5 ++-- src/Interpreters/executeQuery.cpp | 5 ++-- 13 files changed, 56 insertions(+), 34 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 2a0bf4016e8..4e18740f9b4 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -537,7 +537,8 @@ ActionsMatcher::Data::Data( bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_) + bool build_expression_with_window_functions_, + bool is_create_parameterized_view_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -551,6 +552,7 @@ ActionsMatcher::Data::Data( , actions_stack(std::move(actions_dag), context_) , aggregation_keys_info(aggregation_keys_info_) , build_expression_with_window_functions(build_expression_with_window_functions_) + , is_create_parameterized_view(is_create_parameterized_view_) , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1) { } @@ -767,7 +769,7 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt /// so these cannot be evaluated. But if its a parameterized view with sub part ast which does not contain query parameters /// then it can be evaluated /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair - if (!data.only_consts && (data.getContext()->isCreateParameterizedView() && analyzeReceiveQueryParams(ast).empty())) + if (!data.only_consts && (data.is_create_parameterized_view && analyzeReceiveQueryParams(ast).empty())) { throw Exception( "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index a27745d2cfa..97b4440ff88 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -134,6 +134,7 @@ public: ScopeStack actions_stack; AggregationKeysInfo aggregation_keys_info; bool build_expression_with_window_functions; + bool is_create_parameterized_view; /* * Remember the last unique column suffix to avoid quadratic behavior @@ -154,7 +155,8 @@ public: bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_ = false); + bool build_expression_with_window_functions_ = false, + bool is_create_parameterized_view = false); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 70e875cb868..601ba26d51a 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -369,7 +369,7 @@ private: /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; - bool is_create_parameterized_view = false; + public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. @@ -948,9 +948,6 @@ public: bool applyDeletedMask() const { return apply_deleted_mask; } void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } - bool isCreateParameterizedView() const { return is_create_parameterized_view; } - void setIsCreateParameterizedView(bool is_create_parameterized_view_) { is_create_parameterized_view = is_create_parameterized_view_; } - ActionLocksManagerPtr getActionLocksManager() const; enum class ApplicationType diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 53c2eb0bbe3..b13bcf6199d 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -537,7 +537,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts, bool is_create_parameterized_view) { LogAST log; ActionsVisitor::Data visitor_data( @@ -551,7 +551,9 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ false /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - getAggregationKeysInfo()); + getAggregationKeysInfo(), + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -1284,11 +1286,11 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); - getRootActions(select_query->where(), only_types, step.actions()); + getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/, query_options.is_create_parameterized_view); /// For creating parameterized view, query parameters are allowed in select /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (this->getContext()->isCreateParameterizedView()) + if (query_options.is_create_parameterized_view) return true; auto where_column_name = select_query->where()->getColumnName(); @@ -1824,7 +1826,10 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.finalize(); - finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); + /// For creating parameterized view, query parameters are allowed in select + /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps + if (!query_analyzer.query_options.is_create_parameterized_view) + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); chain.clear(); }; @@ -1908,9 +1913,9 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - //For creating parameterized view, query parameters are allowed in select - //As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (!context->isCreateParameterizedView()) + /// For creating parameterized view, query parameters are allowed in select + /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression + if (!query_analyzer.query_options.is_create_parameterized_view) { auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); @@ -2078,11 +2083,6 @@ void ExpressionAnalysisResult::finalize( ssize_t & having_step_num, const ASTSelectQuery & query) { - //For creating parameterized view, query parameters are allowed in select - //As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps - if (chain.getContext()->isCreateParameterizedView()) - return; - if (prewhere_step_num >= 0) { const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ddb41a00f84..286665bb05f 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -175,7 +175,7 @@ protected: ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false, bool is_create_parameterized_view = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp index e89aa2244fe..a80b5949a79 100644 --- a/src/Interpreters/InterpreterCreateQuery.cpp +++ b/src/Interpreters/InterpreterCreateQuery.cpp @@ -726,7 +726,7 @@ InterpreterCreateQuery::TableProperties InterpreterCreateQuery::getTableProperti } else if (create.select) { - Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext()); + Block as_select_sample = InterpreterSelectWithUnionQuery::getSampleBlock(create.select->clone(), getContext(), false /* is_subquery */, create.isParameterizedView()); properties.columns = ColumnsDescription(as_select_sample.getNamesAndTypesList()); } else if (create.as_table_function) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index a5883a44201..6fd4e53d0f7 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -2014,7 +2014,7 @@ void InterpreterSelectQuery::addPrewhereAliasActions() } auto syntax_result - = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, storage_snapshot); + = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, storage_snapshot, options.is_create_parameterized_view); alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, context).getActionsDAG(true); /// The set of required columns could be added as a result of adding an action to calculate ALIAS. diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp index a679b17a5bd..23caacfdfbd 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.cpp +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.cpp @@ -248,12 +248,14 @@ InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default; -Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery) +Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery, bool is_create_parameterized_view) { if (!context_->hasQueryContext()) { if (is_subquery) return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); + else if (is_create_parameterized_view) + return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()).getSampleBlock(); else return InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); } @@ -271,6 +273,12 @@ Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().subquery().analyze()).getSampleBlock(); } + else if (is_create_parameterized_view) + { + return cache[key] + = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().createParameterizedView().analyze()) + .getSampleBlock(); + } else { return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, SelectQueryOptions().analyze()).getSampleBlock(); diff --git a/src/Interpreters/InterpreterSelectWithUnionQuery.h b/src/Interpreters/InterpreterSelectWithUnionQuery.h index ff763ec6490..a2040e0d2fc 100644 --- a/src/Interpreters/InterpreterSelectWithUnionQuery.h +++ b/src/Interpreters/InterpreterSelectWithUnionQuery.h @@ -41,7 +41,8 @@ public: static Block getSampleBlock( const ASTPtr & query_ptr_, ContextPtr context_, - bool is_subquery = false); + bool is_subquery = false, + bool is_create_parameterized_view = false); void ignoreWithTotals() override; diff --git a/src/Interpreters/SelectQueryOptions.h b/src/Interpreters/SelectQueryOptions.h index 6b5a6a7f8eb..e6895ed243b 100644 --- a/src/Interpreters/SelectQueryOptions.h +++ b/src/Interpreters/SelectQueryOptions.h @@ -50,6 +50,7 @@ struct SelectQueryOptions bool with_all_cols = false; /// asterisk include materialized and aliased columns bool settings_limit_offset_done = false; bool is_explain = false; /// The value is true if it's explain statement. + bool is_create_parameterized_view = false; /// These two fields are used to evaluate shardNum() and shardCount() function when /// prefer_localhost_replica == 1 and local instance is selected. They are needed because local @@ -77,6 +78,13 @@ struct SelectQueryOptions return out; } + SelectQueryOptions createParameterizedView() const + { + SelectQueryOptions out = *this; + out.is_create_parameterized_view = true; + return out; + } + SelectQueryOptions & analyze(bool dry_run = true) { only_analyze = dry_run; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index c58aa8f87a1..45df40ac6a0 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -41,6 +41,7 @@ #include #include #include +#include #include #include @@ -1277,7 +1278,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( } } - normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext()); + normalize(query, result.aliases, all_source_columns_set, select_options.ignore_alias, settings, /* allow_self_aliases = */ true, getContext(), select_options.is_create_parameterized_view); /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. @@ -1370,7 +1371,8 @@ TreeRewriterResultPtr TreeRewriter::analyze( const StorageSnapshotPtr & storage_snapshot, bool allow_aggregations, bool allow_self_aliases, - bool execute_scalar_subqueries) const + bool execute_scalar_subqueries, + bool is_create_parameterized_view) const { if (query->as()) throw Exception("Not select analyze for select asts.", ErrorCodes::LOGICAL_ERROR); @@ -1379,7 +1381,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( TreeRewriterResult result(source_columns, storage, storage_snapshot, false); - normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext()); + normalize(query, result.aliases, result.source_columns_set, false, settings, allow_self_aliases, getContext(), is_create_parameterized_view); /// Executing scalar subqueries. Column defaults could be a scalar subquery. executeScalarSubqueries(query, getContext(), 0, result.scalars, result.local_scalars, !execute_scalar_subqueries); @@ -1408,7 +1410,7 @@ TreeRewriterResultPtr TreeRewriter::analyze( } void TreeRewriter::normalize( - ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_) + ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view) { if (!UserDefinedSQLFunctionFactory::instance().empty()) { @@ -1480,7 +1482,7 @@ void TreeRewriter::normalize( FunctionNameNormalizer().visit(query.get()); /// Common subexpression elimination. Rewrite rules. - QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, context_->isCreateParameterizedView()); + QueryNormalizer::Data normalizer_data(aliases, source_columns_set, ignore_alias, settings, allow_self_aliases, is_create_parameterized_view); QueryNormalizer(normalizer_data).visit(query); optimizeGroupingSets(query); diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 7954547c070..99408ca208b 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -119,7 +119,8 @@ public: const StorageSnapshotPtr & storage_snapshot = {}, bool allow_aggregations = false, bool allow_self_aliases = true, - bool execute_scalar_subqueries = true) const; + bool execute_scalar_subqueries = true, + bool is_create_parameterized_view = false) const; /// Analyze and rewrite select query TreeRewriterResultPtr analyzeSelect( @@ -131,7 +132,7 @@ public: std::shared_ptr table_join = {}) const; private: - static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_); + static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); }; } diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp index b73a8769301..466661296be 100644 --- a/src/Interpreters/executeQuery.cpp +++ b/src/Interpreters/executeQuery.cpp @@ -408,11 +408,12 @@ static std::tuple executeQueryImpl( if (const auto * insert_query = ast->as(); insert_query && insert_query->data) query_end = insert_query->data; + bool is_create_parameterized_view = false; if (const auto * create_query = ast->as()) - context->setIsCreateParameterizedView(create_query->isParameterizedView()); + is_create_parameterized_view = create_query->isParameterizedView(); /// Replace ASTQueryParameter with ASTLiteral for prepared statements. - if (!context->isCreateParameterizedView() && context->hasQueryParameters()) + if (!is_create_parameterized_view && context->hasQueryParameters()) { ReplaceQueryParameterVisitor visitor(context->getQueryParameters()); visitor.visit(ast); From f4483ed19ebfdf28fa3167c337a0edf962f694dd Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Tue, 25 Oct 2022 09:39:56 +0800 Subject: [PATCH 027/262] TreeRewriter: use well defined TablesWithColumns --- src/Interpreters/TreeRewriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index da12dccd8d8..eb713019306 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1203,7 +1203,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( ASTPtr & query, TreeRewriterResult && result, const SelectQueryOptions & select_options, - const std::vector & tables_with_columns, + const TablesWithColumns & tables_with_columns, const Names & required_result_columns, std::shared_ptr table_join) const { From b08961be8a310cdf0fde253343006dd270411c04 Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Tue, 25 Oct 2022 14:44:09 +0800 Subject: [PATCH 028/262] LogicalExpressionsOptimizer: optimze for LowCardinality --- .../LogicalExpressionsOptimizer.cpp | 37 +++++++++++++++++-- .../LogicalExpressionsOptimizer.h | 7 +++- src/Interpreters/TreeRewriter.cpp | 2 +- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index 9e30cac2e19..bd24e13b129 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -1,13 +1,17 @@ #include +#include +#include #include #include #include #include +#include #include #include +#include #include @@ -32,8 +36,9 @@ bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpres return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression); } -LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length) - : select_query(select_query_), settings(optimize_min_equality_disjunction_chain_length) +LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, + const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length) + : select_query(select_query_), tables_with_columns(tables_with_columns_), settings(optimize_min_equality_disjunction_chain_length) { } @@ -196,13 +201,39 @@ inline ASTs & getFunctionOperands(const ASTFunction * or_function) } +bool LogicalExpressionsOptimizer::isLowCardinalityEqualityChain(const std::vector & functions) const +{ + if (functions.size() > 1) + { + /// Check if identifier is LowCardinality type + auto & first_operands = getFunctionOperands(functions[0]); + const auto * identifier = first_operands[0]->as(); + if (identifier) + { + auto pos = IdentifierSemantic::getMembership(*identifier); + if (!pos) + pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables_with_columns, true); + if (pos) + { + if (auto data_type_and_name = tables_with_columns[*pos].columns.tryGetByName(identifier->shortName())) + { + if (typeid_cast(data_type_and_name->type.get())) + return true; + } + } + } + } + return false; +} + bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const { const auto & equalities = chain.second; const auto & equality_functions = equalities.functions; /// We eliminate too short chains. - if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length) + if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length && + !isLowCardinalityEqualityChain(equality_functions)) return false; /// We check that the right-hand sides of all equalities have the same type. diff --git a/src/Interpreters/LogicalExpressionsOptimizer.h b/src/Interpreters/LogicalExpressionsOptimizer.h index 4991d31f8b1..a8a0d186394 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.h +++ b/src/Interpreters/LogicalExpressionsOptimizer.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -36,7 +37,7 @@ class LogicalExpressionsOptimizer final public: /// Constructor. Accepts the root of the query DAG. - LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, UInt64 optimize_min_equality_disjunction_chain_length); + LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length); /** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN * on the expressions `expr` IN (x1, ..., xN). @@ -79,6 +80,9 @@ private: */ bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const; + /// Check if is LowCardinality OR chain + bool isLowCardinalityEqualityChain(const std::vector & functions) const; + /// Insert the IN expression into the OR chain. static void addInExpression(const DisjunctiveEqualityChain & chain); @@ -96,6 +100,7 @@ private: using ColumnToPosition = std::unordered_map; ASTSelectQuery * select_query; + const TablesWithColumns & tables_with_columns; const ExtractedSettings settings; /// Information about the OR-chains inside the query. DisjunctiveEqualityChainsMap disjunctive_equality_chains_map; diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index eb713019306..c61ba9c3286 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1246,7 +1246,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); /// Optimizes logical expressions. - LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); + LogicalExpressionsOptimizer(select_query, tables_with_columns, settings.optimize_min_equality_disjunction_chain_length.value).perform(); NameSet all_source_columns_set = source_columns_set; if (table_join) From ad387ece4375516d4cfd9a65ac7a3a59141f22df Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Wed, 2 Nov 2022 14:21:16 +0800 Subject: [PATCH 029/262] LogicalExpressionsOptimizer: add comments --- src/Interpreters/LogicalExpressionsOptimizer.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/LogicalExpressionsOptimizer.cpp b/src/Interpreters/LogicalExpressionsOptimizer.cpp index bd24e13b129..cd00d77c031 100644 --- a/src/Interpreters/LogicalExpressionsOptimizer.cpp +++ b/src/Interpreters/LogicalExpressionsOptimizer.cpp @@ -231,7 +231,9 @@ bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const Disj const auto & equalities = chain.second; const auto & equality_functions = equalities.functions; - /// We eliminate too short chains. + /// For LowCardinality column, the dict is usually smaller and the index is relatively large. + /// In most cases, merging OR-chain as IN is better than converting each LowCardinality into full column individually. + /// For non-LowCardinality, we need to eliminate too short chains. if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length && !isLowCardinalityEqualityChain(equality_functions)) return false; From 11bc712c8e0c9b8711b71d389d003a7b00bd6e3c Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Mon, 7 Nov 2022 07:12:55 +0000 Subject: [PATCH 030/262] add retries on ConnectionError --- .../test_case.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py index 44df1c369cf..ed4bc74e64f 100644 --- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py +++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py @@ -35,12 +35,23 @@ def start_cluster(): cluster.shutdown() +def requests_get(url, attempts=10, sleep=0.5): + attempt = 0 + while True: + attempt += 1 + try: + return requests.get(url) + except requests.exceptions.ConnectionError as e: + if attempt >= attempts: + raise + time.sleep(sleep) + + def test_request_to_node_with_interserver_listen_host(start_cluster): - time.sleep(5) # waiting for interserver listener to start - response_interserver = requests.get( + response_interserver = requests_get( f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}" ) - response_client = requests.get( + response_client = requests_get( f"http://{node_without_interserver_listen_host.ip_address}:8123" ) assert response_interserver.status_code == 200 @@ -49,7 +60,7 @@ def test_request_to_node_with_interserver_listen_host(start_cluster): def test_request_to_node_without_interserver_listen_host(start_cluster): - response = requests.get( + response = requests_get( f"http://{node_without_interserver_listen_host.ip_address}:{INTERSERVER_HTTP_PORT}" ) assert response.status_code == 200 From 3e5f1a9178d023d518fb7b1cd79f52ae1520e280 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 7 Nov 2022 15:22:45 +0100 Subject: [PATCH 031/262] Added is_parameterized_view in ASTFunction and fixed review comments - 40907 Parameterized views as table functions --- src/Interpreters/ActionsVisitor.cpp | 6 ++-- src/Interpreters/Context.cpp | 5 +++- src/Interpreters/QueryNormalizer.cpp | 2 +- src/Parsers/ASTFunction.h | 6 +++- src/Parsers/ASTSelectQuery.cpp | 7 +---- src/Parsers/ASTSelectQuery.h | 7 +++-- src/Parsers/ASTSelectWithUnionQuery.h | 7 ++++- src/Parsers/ASTTablesInSelectQuery.cpp | 10 +------ src/Parsers/ExpressionListParsers.cpp | 10 +------ .../FunctionParameterValuesVisitor.cpp | 30 +++++++++---------- .../02428_parameterized_view.reference | 1 + .../0_stateless/02428_parameterized_view.sql | 3 ++ 12 files changed, 46 insertions(+), 48 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 4e18740f9b4..d3dd89086e5 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -765,9 +765,9 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt if (const auto * node = index.tryGetNode(child_column_name)) return NameAndTypePair(child_column_name, node->result_type); - /// For parameterized view, we allow query parameters in create which will be substituted by select queries - /// so these cannot be evaluated. But if its a parameterized view with sub part ast which does not contain query parameters - /// then it can be evaluated + /// For parameterized view, we allow query parameters in CREATE which will be substituted by SELECT queries + /// so these cannot be evaluated at this point. But if it's a parameterized view with sub part ast which does + /// not contain query parameters then it can be evaluated /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair if (!data.only_consts && (data.is_create_parameterized_view && analyzeReceiveQueryParams(ast).empty())) { diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 2617eb7ffa3..63e7af896c2 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -1232,7 +1232,7 @@ void Context::addQueryFactoriesInfo(QueryLogFactories factory_type, const String StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) { - const ASTFunction * function = assert_cast(table_expression.get()); + ASTFunction * function = assert_cast(table_expression.get()); String database_name = getCurrentDatabase(); String table_name = function->name; @@ -1252,7 +1252,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) if (table) { if (table.get()->isView() && table->as()->isParameterizedView()) + { + function->is_parameterized_view = true; return table; + } } auto hash = table_expression->getTreeHash(); String key = toString(hash.first) + '_' + toString(hash.second); diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index 19112a7c4c0..76c526d7b12 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -128,7 +128,7 @@ void QueryNormalizer::visit(ASTQueryParameter & node, Data & data) /// Eg: CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2={c1:UInt64}; - c1 is used twice auto it_alias = data.query_parameters.find(node.name); if (it_alias != data.query_parameters.end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {} ", backQuote(node.name)); + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {}", backQuote(node.name)); data.query_parameters.insert(node.name); } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index 4e8874a8839..b658249b42a 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -26,6 +26,10 @@ public: bool is_lambda_function = false; + /// This field is updated in executeTableFunction if its a parameterized_view + /// and used in ASTTablesInSelectQuery::FormatImpl for EXPLAIN SYNTAX of SELECT parameterized view + bool is_parameterized_view = false; + // We have to make these fields ASTPtr because this is what the visitors // expect. Some of them take const ASTPtr & (makes no sense), and some // take ASTPtr & and modify it. I don't understand how the latter is @@ -55,7 +59,7 @@ public: std::string getWindowDescription() const; - //This is used for parameterized view, to identify if name is 'db.view' + /// This is used for parameterized view, to identify if name is 'db.view' bool has_database_name = false; protected: diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index b5198e3df0d..1f93333628d 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -9,8 +9,6 @@ #include #include -#include - namespace DB { @@ -482,10 +480,7 @@ bool ASTSelectQuery::hasQueryParameters() const { if (!has_query_parameters.has_value()) { - if (analyzeReceiveQueryParams(std::make_shared(*this)).empty()) - has_query_parameters = false; - else - has_query_parameters = true; + has_query_parameters = !analyzeReceiveQueryParams(std::make_shared(*this)).empty(); } return has_query_parameters.value(); diff --git a/src/Parsers/ASTSelectQuery.h b/src/Parsers/ASTSelectQuery.h index 5c64534f1c2..881cc954ad9 100644 --- a/src/Parsers/ASTSelectQuery.h +++ b/src/Parsers/ASTSelectQuery.h @@ -141,8 +141,6 @@ public: void setFinal(); QueryKind getQueryKind() const override { return QueryKind::Select; } - - mutable std::optional has_query_parameters; bool hasQueryParameters() const; protected: @@ -151,6 +149,11 @@ protected: private: std::unordered_map positions; + /// This variable is optional as we want to set it on the first call to hasQueryParameters + /// and return the same variable on future calls to hasQueryParameters + /// its mutable as we set it in const function + mutable std::optional has_query_parameters; + ASTPtr & getExpression(Expression expr); }; diff --git a/src/Parsers/ASTSelectWithUnionQuery.h b/src/Parsers/ASTSelectWithUnionQuery.h index 334bb86932f..a775e217308 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.h +++ b/src/Parsers/ASTSelectWithUnionQuery.h @@ -32,9 +32,14 @@ public: /// Consider any mode other than ALL as non-default. bool hasNonDefaultUnionMode() const; - mutable std::optional has_query_parameters; bool hasQueryParameters() const; +private: + /// This variable is optional as we want to set it on the first call to hasQueryParameters + /// and return the same variable on future calls to hasQueryParameters + /// its mutable as we set it in const function + mutable std::optional has_query_parameters; + }; } diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index 9e889c796f7..ed70f961c4d 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -113,24 +113,16 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState settings.ostr << " "; database_and_table_name->formatImpl(settings, state, frame); } - /// In case of table function view, table_function is preferred over subquery for EXPLAIN SYNTAX - else if (table_function && table_function->as() && table_function->as()->name=="view") + else if (table_function && !table_function->as()->is_parameterized_view) { settings.ostr << " "; table_function->formatImpl(settings, state, frame); } - /// For parameterized view, subquery is preferred over table_function for EXPLAIN SYNTAX - /// we cannot remove the table function part, as its needed for query substitution else if (subquery) { settings.ostr << settings.nl_or_ws << indent_str; subquery->formatImpl(settings, state, frame); } - else if (table_function) - { - settings.ostr << " "; - table_function->formatImpl(settings, state, frame); - } if (final) { diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 3ee95600808..48a310542a4 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -2034,15 +2034,7 @@ std::unique_ptr getFunctionLayer(ASTPtr identifier, bool is_table_functio else if (function_name_lowercase == "grouping") return std::make_unique(function_name_lowercase, allow_function_parameters_); else - { - bool has_database_name = false; - if (const auto *ast_identifier = identifier->as()) - { - if (ast_identifier->compound()) - has_database_name = true; - } - return std::make_unique(function_name, allow_function_parameters_, has_database_name); - } + return std::make_unique(function_name, allow_function_parameters_, identifier->as()->compound()); } diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 93192084187..3cb0fe630f8 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -38,25 +38,25 @@ private: void visitExpressionList(const ASTExpressionList & expression_list) { - if (expression_list.children.size() == 2) + if (expression_list.children.size() != 2) + return; + + if (const auto * identifier = expression_list.children[0]->as()) { - if (const auto * identifier = expression_list.children[0]->as()) + if (const auto * literal = expression_list.children[1]->as()) { - if (const auto * literal = expression_list.children[1]->as()) + parameter_values[identifier->name()] = convertFieldToString(literal->value); + } + else if (const auto * function = expression_list.children[1]->as()) + { + if (isFunctionCast(function)) { - parameter_values[identifier->name()] = convertFieldToString(literal->value); - } - else if (const auto * function = expression_list.children[1]->as()) - { - if (isFunctionCast(function)) + const auto * cast_expression = assert_cast(function->arguments.get()); + if (cast_expression->children.size() != 2) + throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + if (const auto * cast_literal = cast_expression->children[0]->as()) { - const auto * cast_expression = assert_cast(function->arguments.get()); - if (cast_expression->children.size() != 2) - throw Exception("Function CAST must have exactly two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - if (const auto * cast_literal = cast_expression->children[0]->as()) - { - parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); - } + parameter_values[identifier->name()] = convertFieldToString(cast_literal->value); } } } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 37120d2d1b9..c79c29bbb28 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -1,6 +1,7 @@ 20 20 10 +50 SELECT Name, Price, diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index e0a67e7690b..6b08ecb5674 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -19,6 +19,9 @@ set param_price=10; SELECT Price FROM v1; -- { serverError UNKNOWN_QUERY_PARAMETER} SELECT Price FROM v1(price={price:UInt64}); +set param_limit=1; +SELECT Price FROM v1(price=50) LIMIT {limit:UInt64}; + DETACH TABLE v1; ATTACH TABLE v1; From a2b83d359244ec1a9dd9b11ebe7c839c60a139ff Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Tue, 8 Nov 2022 15:05:25 +0800 Subject: [PATCH 032/262] add query test: 02477_logical_expressions_optimizer_low_cardinality --- ...cal_expressions_optimizer_low_cardinality.reference | 6 ++++++ ...7_logical_expressions_optimizer_low_cardinality.sql | 10 ++++++++++ 2 files changed, 16 insertions(+) create mode 100644 tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference create mode 100644 tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference new file mode 100644 index 00000000000..dcfcac737c3 --- /dev/null +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.reference @@ -0,0 +1,6 @@ +SELECT a +FROM t_logical_expressions_optimizer_low_cardinality +WHERE a IN (\'x\', \'y\') +SELECT a +FROM t_logical_expressions_optimizer_low_cardinality +WHERE (b = 0) OR (b = 1) diff --git a/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql new file mode 100644 index 00000000000..be355a05675 --- /dev/null +++ b/tests/queries/0_stateless/02477_logical_expressions_optimizer_low_cardinality.sql @@ -0,0 +1,10 @@ +DROP TABLE IF EXISTS t_logical_expressions_optimizer_low_cardinality; +set optimize_min_equality_disjunction_chain_length=3; +CREATE TABLE t_logical_expressions_optimizer_low_cardinality (a LowCardinality(String), b UInt32) ENGINE = Memory; + +-- LowCardinality case, ignore optimize_min_equality_disjunction_chain_length limit, optimzer applied +EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE a = 'x' OR a = 'y'; +-- Non-LowCardinality case, optimizer not applied for short chains +EXPLAIN SYNTAX SELECT a FROM t_logical_expressions_optimizer_low_cardinality WHERE b = 0 OR b = 1; + +DROP TABLE t_logical_expressions_optimizer_low_cardinality; From 887779e8d8bfcf528a5c492cf13fadea6ebe0306 Mon Sep 17 00:00:00 2001 From: Wangyang Guo Date: Tue, 8 Nov 2022 17:19:18 +0800 Subject: [PATCH 033/262] Add perftest: low_cardinality_query --- tests/performance/low_cardinality_query.xml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 tests/performance/low_cardinality_query.xml diff --git a/tests/performance/low_cardinality_query.xml b/tests/performance/low_cardinality_query.xml new file mode 100644 index 00000000000..989c674b443 --- /dev/null +++ b/tests/performance/low_cardinality_query.xml @@ -0,0 +1,13 @@ + + DROP TABLE IF EXISTS test_lc_query + + CREATE TABLE test_lc_query (x UInt64, lc LowCardinality(Nullable(String))) ENGINE = MergeTree order by x + + + INSERT INTO test_lc_query SELECT number, toString(number % 100) FROM numbers(1e7) + + SELECT count() FROM test_lc_query WHERE lc = '12' OR lc = '34' + SELECT count() FROM test_lc_query WHERE lc = '12' OR lc = '34' OR lc = '56' + + DROP TABLE IF EXISTS test_lc_query + From 779b9197e331996ea3c46c328226aa6706c4dfbc Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 8 Nov 2022 11:03:40 +0100 Subject: [PATCH 034/262] Moved is_create_parameterized_view check inside finalize function and refactor check for isParmaeterizedView when replacing subquery - 40907 Parameterized views as table functions --- src/Interpreters/ExpressionAnalyzer.cpp | 15 ++++++++------- src/Interpreters/ExpressionAnalyzer.h | 3 ++- src/Interpreters/InterpreterSelectQuery.cpp | 7 +++++-- src/Storages/StorageView.cpp | 7 ++----- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index b13bcf6199d..2d2cb5fd939 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1826,10 +1826,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.finalize(); - /// For creating parameterized view, query parameters are allowed in select - /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps - if (!query_analyzer.query_options.is_create_parameterized_view) - finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query, query_analyzer.query_options.is_create_parameterized_view); chain.clear(); }; @@ -1915,7 +1912,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( /// For creating parameterized view, query parameters are allowed in select /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (!query_analyzer.query_options.is_create_parameterized_view) + const bool can_evaluate_filter_column = !query_analyzer.query_options.is_create_parameterized_view; + if (can_evaluate_filter_column) { auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); @@ -2081,7 +2079,8 @@ void ExpressionAnalysisResult::finalize( ssize_t & prewhere_step_num, ssize_t & where_step_num, ssize_t & having_step_num, - const ASTSelectQuery & query) + const ASTSelectQuery & query, + bool is_create_parameterized_view) { if (prewhere_step_num >= 0) { @@ -2101,7 +2100,9 @@ void ExpressionAnalysisResult::finalize( prewhere_step_num = -1; } - if (where_step_num >= 0) + /// For creating parameterized view, query parameters are allowed in select + /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps + if (where_step_num >= 0 && !is_create_parameterized_view) { where_column_name = query.where()->getColumnName(); remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second; diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 286665bb05f..4b1e4dd18f5 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -293,7 +293,8 @@ struct ExpressionAnalysisResult ssize_t & prewhere_step_num, ssize_t & where_step_num, ssize_t & having_step_num, - const ASTSelectQuery & query); + const ASTSelectQuery & query, + bool is_create_parameterized_view); }; /// SelectQuery specific ExpressionAnalyzer part. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 35daf55423a..73059cf0684 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -502,10 +502,13 @@ InterpreterSelectQuery::InterpreterSelectQuery( ASTPtr view_table; if (view) { - NameToNameMap parameter_values = analyzeReceiveFunctionParamValues(query_ptr); query_info.is_parameterized_view = view->isParameterizedView(); + NameToNameMap parameter_values; + if (view->isParameterizedView()) + parameter_values = analyzeReceiveFunctionParamValues(query_ptr); view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); - view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values); + if (view->isParameterizedView()) + view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values); } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index bb3e818d63e..8afbe7d461f 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -178,11 +178,8 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const { - if (is_parameterized_view) - { - ReplaceQueryParameterVisitor visitor(parameter_values); - visitor.visit(outer_query); - } + ReplaceQueryParameterVisitor visitor(parameter_values); + visitor.visit(outer_query); } void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, bool parameterized_view) From 2ef30d434302ba4679d1983cec930f0c215c3d1b Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 11 Nov 2022 19:39:18 +0100 Subject: [PATCH 035/262] Fixed clang-tidy build issue by making replaceQueryParametersIfParametrizedView static - 40907 Parameterized views as table functions --- src/Storages/StorageView.cpp | 2 +- src/Storages/StorageView.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 8afbe7d461f..8a2787625fb 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -176,7 +176,7 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } -void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const +void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) { ReplaceQueryParameterVisitor visitor(parameter_values); visitor.visit(outer_query); diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 0db4295d90c..cd88005a207 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -35,7 +35,7 @@ public: size_t max_block_size, size_t num_streams) override; - void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) const; + static void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values); static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view) { From 95abe5d0274694d2627e8af39758cfba6a8655c8 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 16 Nov 2022 12:27:08 +0100 Subject: [PATCH 036/262] Updated to support query parameters in HAVING clause and addressed review comments - 40907 Parameterized views as table functions --- src/Interpreters/Context.cpp | 10 +++++++--- src/Interpreters/Context.h | 2 -- src/Interpreters/ExpressionAnalyzer.cpp | 9 +++++++-- src/Interpreters/InterpreterSelectQuery.cpp | 8 +++++--- src/Parsers/ASTFunction.h | 4 ++-- src/Parsers/ASTTablesInSelectQuery.cpp | 2 +- src/Parsers/ExpressionListParsers.cpp | 8 ++++---- src/Parsers/FunctionParameterValuesVisitor.cpp | 2 +- src/Parsers/FunctionParameterValuesVisitor.h | 2 +- .../0_stateless/02428_parameterized_view.reference | 1 + tests/queries/0_stateless/02428_parameterized_view.sql | 5 +++++ 11 files changed, 34 insertions(+), 19 deletions(-) diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index e0fa4e13abb..c0537a252d8 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -147,6 +147,7 @@ namespace ErrorCodes extern const int INVALID_SETTING_VALUE; extern const int UNKNOWN_READ_METHOD; extern const int NOT_IMPLEMENTED; + extern const int UNKNOWN_FUNCTION; } @@ -1238,7 +1239,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) String database_name = getCurrentDatabase(); String table_name = function->name; - if (function->has_database_name) + if (function->is_compound_name) { std::vector parts; splitInto<'.'>(parts, function->name); @@ -1255,7 +1256,7 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) { if (table.get()->isView() && table->as()->isParameterizedView()) { - function->is_parameterized_view = true; + function->prefer_subquery_to_function_formatting = true; return table; } } @@ -1271,7 +1272,10 @@ StoragePtr Context::executeTableFunction(const ASTPtr & table_expression) } catch (Exception & e) { - e.addMessage(" or incorrect parameterized view"); + if (e.code() == ErrorCodes::UNKNOWN_FUNCTION) + { + e.addMessage(" or incorrect parameterized view"); + } throw; } if (getSettingsRef().use_structure_from_insertion_table_in_table_functions && table_function_ptr->needStructureHint()) diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 1ce2bd914f6..0eaec215588 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -371,8 +371,6 @@ private: /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; - - public: /// Some counters for current query execution. /// Most of them are workarounds and should be removed in the future. diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index eb7d799ec0a..d096a0701d6 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -72,6 +72,7 @@ #include #include #include +#include namespace DB { @@ -1485,6 +1486,10 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); getRootActionsForHaving(select_query->having(), only_types, step.actions()); + + if (query_options.is_create_parameterized_view && !analyzeReceiveQueryParams(select_query->having()).empty()) + return true; + step.addRequiredOutput(select_query->having()->getColumnName()); return true; @@ -2095,14 +2100,14 @@ void ExpressionAnalysisResult::finalize( /// For creating parameterized view, query parameters are allowed in select /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps - if (where_step_num >= 0 && !is_create_parameterized_view) + if (where_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.where()).empty())) { where_column_name = query.where()->getColumnName(); remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second; where_step_num = -1; } - if (having_step_num >= 0) + if (having_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.having()).empty())) { having_column_name = query.having()->getColumnName(); remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second; diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 688d82ae8a4..47b7ba7602a 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -506,11 +506,13 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { query_info.is_parameterized_view = view->isParameterizedView(); + /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced. + /// ad after query is replaced, we use these parameters to substitute in the parameterized view query NameToNameMap parameter_values; - if (view->isParameterizedView()) - parameter_values = analyzeReceiveFunctionParamValues(query_ptr); + if (query_info.is_parameterized_view ) + parameter_values = analyzeFunctionParamValues(query_ptr); view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); - if (view->isParameterizedView()) + if (query_info.is_parameterized_view ) view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values); } diff --git a/src/Parsers/ASTFunction.h b/src/Parsers/ASTFunction.h index b658249b42a..ca182a4fb11 100644 --- a/src/Parsers/ASTFunction.h +++ b/src/Parsers/ASTFunction.h @@ -28,7 +28,7 @@ public: /// This field is updated in executeTableFunction if its a parameterized_view /// and used in ASTTablesInSelectQuery::FormatImpl for EXPLAIN SYNTAX of SELECT parameterized view - bool is_parameterized_view = false; + bool prefer_subquery_to_function_formatting = false; // We have to make these fields ASTPtr because this is what the visitors // expect. Some of them take const ASTPtr & (makes no sense), and some @@ -60,7 +60,7 @@ public: std::string getWindowDescription() const; /// This is used for parameterized view, to identify if name is 'db.view' - bool has_database_name = false; + bool is_compound_name = false; protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; diff --git a/src/Parsers/ASTTablesInSelectQuery.cpp b/src/Parsers/ASTTablesInSelectQuery.cpp index ed70f961c4d..75c0ef26c07 100644 --- a/src/Parsers/ASTTablesInSelectQuery.cpp +++ b/src/Parsers/ASTTablesInSelectQuery.cpp @@ -113,7 +113,7 @@ void ASTTableExpression::formatImpl(const FormatSettings & settings, FormatState settings.ostr << " "; database_and_table_name->formatImpl(settings, state, frame); } - else if (table_function && !table_function->as()->is_parameterized_view) + else if (table_function && !(table_function->as()->prefer_subquery_to_function_formatting && subquery)) { settings.ostr << " "; table_function->formatImpl(settings, state, frame); diff --git a/src/Parsers/ExpressionListParsers.cpp b/src/Parsers/ExpressionListParsers.cpp index 7b2ece9c0c9..ee212a1993b 100644 --- a/src/Parsers/ExpressionListParsers.cpp +++ b/src/Parsers/ExpressionListParsers.cpp @@ -830,8 +830,8 @@ public: class FunctionLayer : public Layer { public: - explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true, bool has_database_name_ = false) - : function_name(function_name_), allow_function_parameters(allow_function_parameters_), has_database_name(has_database_name_){} + explicit FunctionLayer(String function_name_, bool allow_function_parameters_ = true, bool is_compound_name_ = false) + : function_name(function_name_), allow_function_parameters(allow_function_parameters_), is_compound_name(is_compound_name_){} bool parse(IParser::Pos & pos, Expected & expected, Action & action) override { @@ -972,7 +972,7 @@ public: function_name += "Distinct"; auto function_node = makeASTFunction(function_name, std::move(elements)); - function_node->has_database_name = has_database_name; + function_node->is_compound_name = is_compound_name; if (parameters) { @@ -1027,7 +1027,7 @@ private: ASTPtr parameters; bool allow_function_parameters; - bool has_database_name; + bool is_compound_name; }; /// Layer for priority brackets and tuple function diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index 3cb0fe630f8..cb187b2a56a 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -64,7 +64,7 @@ private: } }; -NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast) +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast) { NameToNameMap parameter_values; FunctionParameterValuesVisitor(parameter_values).visit(ast); diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index 7fd594ddc84..f87257fc979 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -9,6 +9,6 @@ namespace DB { /// Find parameters in a query parameter values and collect them into map. -NameToNameMap analyzeReceiveFunctionParamValues(const ASTPtr & ast); +NameToNameMap analyzeFunctionParamValues(const ASTPtr & ast); } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index c79c29bbb28..9ea5e464b8e 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -15,3 +15,4 @@ FROM 50 10 20 +30 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 6b08ecb5674..1ac35f42d4c 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -55,6 +55,11 @@ CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64} SELECT Price FROM system.v1(price=20); SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION } +INSERT INTO Catalog VALUES ('Book2', 30, 8); +INSERT INTO Catalog VALUES ('Book3', 30, 8); + +CREATE VIEW v5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}; +SELECT Price FROM v5(price=30, quantity=8,limit=1); DROP TABLE v1; DROP TABLE v2; From 6d35bc06c54d855367943ed37b52af0f2f42fb27 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 16 Nov 2022 13:01:02 +0100 Subject: [PATCH 037/262] Fixed style check fail in InterpreterSelectQuery.cpp - 40907 Parameterized views as table functions --- src/Interpreters/InterpreterSelectQuery.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 47b7ba7602a..060d07a9763 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -509,10 +509,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced. /// ad after query is replaced, we use these parameters to substitute in the parameterized view query NameToNameMap parameter_values; - if (query_info.is_parameterized_view ) + if (query_info.is_parameterized_view) parameter_values = analyzeFunctionParamValues(query_ptr); view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); - if (query_info.is_parameterized_view ) + if (query_info.is_parameterized_view) view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values); } From 93eaf553f40e0c87cbc205565efc1f7496de14c4 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 16 Nov 2022 15:33:42 +0100 Subject: [PATCH 038/262] Fixed parameterized_view test - 40907 Parameterized views as table functions --- tests/queries/0_stateless/02428_parameterized_view.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 1ac35f42d4c..934ddd18d49 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -15,12 +15,12 @@ CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; SELECT Price FROM v1(price=20); SELECT Price FROM `v1`(price=20); -set param_price=10; +set param_p=10; SELECT Price FROM v1; -- { serverError UNKNOWN_QUERY_PARAMETER} -SELECT Price FROM v1(price={price:UInt64}); +SELECT Price FROM v1(price={p:UInt64}); -set param_limit=1; -SELECT Price FROM v1(price=50) LIMIT {limit:UInt64}; +set param_l=1; +SELECT Price FROM v1(price=50) LIMIT ({l:UInt64}); DETACH TABLE v1; ATTACH TABLE v1; From 40adaeef92eaa9519f0d125f2cc1d070488d8238 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 18 Nov 2022 16:15:37 +0000 Subject: [PATCH 039/262] Fix aggregate functions optimization --- ...egateFunctionsArithmericOperationsPass.cpp | 12 +-- ...optimize_aggregation_arithmetics.reference | 81 +++++++++++++++++++ ...02481_optimize_aggregation_arithmetics.sql | 26 ++++++ 3 files changed, 114 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference create mode 100644 tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index dcf386b2988..7420321d3ee 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -116,8 +116,9 @@ public: if (!function_name_if_constant_is_negative.empty() && left_argument_constant_value_literal < zeroField(left_argument_constant_value_literal)) { - resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative); + lower_function_name = function_name_if_constant_is_negative; } + resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name); auto inner_function = aggregate_function_arguments_nodes[0]; auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]); @@ -132,8 +133,9 @@ public: if (!function_name_if_constant_is_negative.empty() && right_argument_constant_value_literal < zeroField(right_argument_constant_value_literal)) { - resolveAggregateFunctionNode(*aggregate_function_node, function_name_if_constant_is_negative); + lower_function_name = function_name_if_constant_is_negative; } + resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative); auto inner_function = aggregate_function_arguments_nodes[0]; auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]); @@ -144,16 +146,16 @@ public: } private: - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const String & aggregate_function_name) + static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name) { - auto function_result_type = function_node.getResultType(); auto function_aggregate_function = function_node.getAggregateFunction(); AggregateFunctionProperties properties; auto aggregate_function = AggregateFunctionFactory::instance().get(aggregate_function_name, - function_aggregate_function->getArgumentTypes(), + { argument->getResultType() }, function_aggregate_function->getParameters(), properties); + auto function_result_type = aggregate_function->getReturnType(); function_node.resolveAsAggregateFunction(std::move(aggregate_function), std::move(function_result_type)); } diff --git a/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference new file mode 100644 index 00000000000..1cc6fc5d4b1 --- /dev/null +++ b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.reference @@ -0,0 +1,81 @@ +-- { echoOn } +EXPLAIN actions=1 + ( + SELECT round(avg(log(2) * number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; +Expression ((Project names + Projection)) +Actions: INPUT : 0 -> avg(number_0) Float64 : 0 + COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3 + FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0 + ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2 +Positions: 2 + Aggregating + Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8) + Aggregates: + avg(number_0) + Function: avg(UInt64) → Float64 + Arguments: number_0 + Expression ((Before GROUP BY + Change column names to column identifiers)) + Actions: INPUT : 0 -> number UInt64 : 0 + COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1 + COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2 + ALIAS number :: 0 -> number_0 UInt64 : 3 + FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0 + FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1 + Positions: 0 1 3 + ReadFromStorage (SystemNumbers) +EXPLAIN actions=1 + ( + SELECT round(log(2) * avg(number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; +Expression ((Project names + Projection)) +Actions: INPUT : 0 -> avg(number_0) Float64 : 0 + COLUMN Const(Float64) -> 0.6931471805599453_Float64 Float64 : 1 + COLUMN Const(UInt8) -> 6_UInt8 UInt8 : 2 + FUNCTION multiply(0.6931471805599453_Float64 :: 1, avg(number_0) :: 0) -> multiply(0.6931471805599453_Float64, avg(number_0)) Float64 : 3 + FUNCTION round(multiply(0.6931471805599453_Float64, avg(number_0)) :: 3, 6_UInt8 :: 2) -> round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) Float64 : 0 + ALIAS round(multiply(0.6931471805599453_Float64, avg(number_0)), 6_UInt8) :: 0 -> k Float64 : 2 +Positions: 2 + Aggregating + Keys: modulo(number_0, 3_UInt8), modulo(number_0, 2_UInt8) + Aggregates: + avg(number_0) + Function: avg(UInt64) → Float64 + Arguments: number_0 + Expression ((Before GROUP BY + Change column names to column identifiers)) + Actions: INPUT : 0 -> number UInt64 : 0 + COLUMN Const(UInt8) -> 3_UInt8 UInt8 : 1 + COLUMN Const(UInt8) -> 2_UInt8 UInt8 : 2 + ALIAS number :: 0 -> number_0 UInt64 : 3 + FUNCTION modulo(number_0 : 3, 3_UInt8 :: 1) -> modulo(number_0, 3_UInt8) UInt8 : 0 + FUNCTION modulo(number_0 : 3, 2_UInt8 :: 2) -> modulo(number_0, 2_UInt8) UInt8 : 1 + Positions: 0 1 3 + ReadFromStorage (SystemNumbers) +SELECT round(avg(log(2) * number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=1; +3465734.516505 +3465735.209653 +3465735.9028 +3465736.595947 +3465735.209653 +3465735.9028 +SELECT round(log(2) * avg(number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=0; +3465734.516505 +3465735.209653 +3465735.9028 +3465736.595947 +3465735.209653 +3465735.9028 diff --git a/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql new file mode 100644 index 00000000000..5fec5a79813 --- /dev/null +++ b/tests/queries/0_stateless/02481_optimize_aggregation_arithmetics.sql @@ -0,0 +1,26 @@ +-- { echoOn } +EXPLAIN actions=1 + ( + SELECT round(avg(log(2) * number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; + +EXPLAIN actions=1 + ( + SELECT round(log(2) * avg(number), 6) AS k + FROM numbers(10000000) + GROUP BY number % 3, number % 2 + ) +SETTINGS allow_experimental_analyzer=1; + +SELECT round(avg(log(2) * number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=1; + +SELECT round(log(2) * avg(number), 6) AS k +FROM numbers(10000000) +GROUP BY number % 3, number % 2 +SETTINGS allow_experimental_analyzer=0; From 0545e241512791b8d4dc218cf4ff6dc8883acf52 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 28 Nov 2022 19:05:01 +0100 Subject: [PATCH 040/262] Updated to support query parameters in different parts of query (SELECT, ORDERBY, GROUP BY), updated the approach in WHERE & HAVING, added tests for the same - 40907 Parameterized views as table functions --- src/Interpreters/ActionsVisitor.cpp | 24 +++--- src/Interpreters/ActionsVisitor.h | 4 +- src/Interpreters/ExpressionAnalyzer.cpp | 44 ++++------- src/Interpreters/ExpressionAnalyzer.h | 5 +- src/Interpreters/InterpreterSelectQuery.cpp | 19 ++++- src/Interpreters/QueryNormalizer.cpp | 15 +--- src/Interpreters/QueryNormalizer.h | 1 - .../TranslateQualifiedNamesVisitor.cpp | 15 +++- .../TranslateQualifiedNamesVisitor.h | 4 +- src/Interpreters/TreeRewriter.cpp | 30 +++++++- src/Interpreters/TreeRewriter.h | 5 +- src/Parsers/ASTFunction.cpp | 2 + src/Storages/StorageSnapshot.cpp | 23 ++++-- src/Storages/StorageSnapshot.h | 2 +- src/Storages/StorageView.cpp | 4 +- src/Storages/StorageView.h | 8 +- .../02428_parameterized_view.reference | 6 +- .../0_stateless/02428_parameterized_view.sql | 74 +++++++++++-------- 18 files changed, 170 insertions(+), 115 deletions(-) diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index d3dd89086e5..7e4fa5d168a 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -537,8 +538,7 @@ ActionsMatcher::Data::Data( bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_, - bool is_create_parameterized_view_) + bool build_expression_with_window_functions_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -552,7 +552,6 @@ ActionsMatcher::Data::Data( , actions_stack(std::move(actions_dag), context_) , aggregation_keys_info(aggregation_keys_info_) , build_expression_with_window_functions(build_expression_with_window_functions_) - , is_create_parameterized_view(is_create_parameterized_view_) , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1) { } @@ -765,16 +764,11 @@ std::optional ActionsMatcher::getNameAndTypeFromAST(const ASTPt if (const auto * node = index.tryGetNode(child_column_name)) return NameAndTypePair(child_column_name, node->result_type); - /// For parameterized view, we allow query parameters in CREATE which will be substituted by SELECT queries - /// so these cannot be evaluated at this point. But if it's a parameterized view with sub part ast which does - /// not contain query parameters then it can be evaluated - /// Eg : CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2=3; - Column2=3 should get NameAndTypePair - if (!data.only_consts && (data.is_create_parameterized_view && analyzeReceiveQueryParams(ast).empty())) - { + if (!data.only_consts) throw Exception( "Unknown identifier: " + child_column_name + "; there are columns: " + data.actions_stack.dumpNames(), ErrorCodes::UNKNOWN_IDENTIFIER); - } + return {}; } @@ -1130,6 +1124,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & const auto * function = child->as(); const auto * identifier = child->as(); + const auto * query_parameter = child->as(); if (function && function->name == "lambda") { /// If the argument is a lambda expression, just remember its approximate type. @@ -1210,6 +1205,15 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & argument_types.push_back(column.type); argument_names.push_back(column.name); } + else if (query_parameter) + { + const auto data_type = DataTypeFactory::instance().get(query_parameter->type); + ColumnWithTypeAndName column(data_type,query_parameter->getColumnName()); + data.addColumn(column); + + argument_types.push_back(data_type); + argument_names.push_back(query_parameter->name); + } else { /// If the argument is not a lambda expression, call it recursively and find out its type. diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index d02cc3e9f6d..fea013fd075 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -134,7 +134,6 @@ public: ScopeStack actions_stack; AggregationKeysInfo aggregation_keys_info; bool build_expression_with_window_functions; - bool is_create_parameterized_view; /* * Remember the last unique column suffix to avoid quadratic behavior @@ -155,8 +154,7 @@ public: bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_ = false, - bool is_create_parameterized_view = false); + bool build_expression_with_window_functions_ = false); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index d096a0701d6..808637d3171 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -538,7 +538,7 @@ void SelectQueryExpressionAnalyzer::makeSetsForIndex(const ASTPtr & node) } -void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts, bool is_create_parameterized_view) +void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts) { LogAST log; ActionsVisitor::Data visitor_data( @@ -553,8 +553,7 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ only_consts, !isRemoteStorage() /* create_source_for_in */, getAggregationKeysInfo(), - false /* build_expression_with_window_functions */, - is_create_parameterized_view); + false /* build_expression_with_window_functions */); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -1287,12 +1286,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); - getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/, query_options.is_create_parameterized_view); - - /// For creating parameterized view, query parameters are allowed in select - /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression - if (query_options.is_create_parameterized_view) - return true; + getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/); auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); @@ -1487,9 +1481,6 @@ bool SelectQueryExpressionAnalyzer::appendHaving(ExpressionActionsChain & chain, getRootActionsForHaving(select_query->having(), only_types, step.actions()); - if (query_options.is_create_parameterized_view && !analyzeReceiveQueryParams(select_query->having()).empty()) - return true; - step.addRequiredOutput(select_query->having()->getColumnName()); return true; @@ -1501,7 +1492,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->select(), only_types, step.actions()); + getRootActions(select_query->select(), only_types, step.actions(), false /*only_consts*/); for (const auto & child : select_query->select()->children) appendSelectSkipWindowExpressions(step, child); @@ -1831,7 +1822,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( chain.finalize(); - finalize(chain, prewhere_step_num, where_step_num, having_step_num, query, query_analyzer.query_options.is_create_parameterized_view); + finalize(chain, prewhere_step_num, where_step_num, having_step_num, query); chain.clear(); }; @@ -1915,17 +1906,11 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( before_where, ExpressionActionsSettings::fromSettings(context->getSettingsRef())).execute(before_where_sample); - /// For creating parameterized view, query parameters are allowed in select - /// As select will be stored without substituting query parameters, we don't want to evaluate the where expression - const bool can_evaluate_filter_column = !query_analyzer.query_options.is_create_parameterized_view; - if (can_evaluate_filter_column) - { - auto & column_elem - = before_where_sample.getByName(query.where()->getColumnName()); - /// If the filter column is a constant and not a query parameter, record it. - if (column_elem.column) - where_constant_filter_description = ConstantFilterDescription(*column_elem.column); - } + auto & column_elem + = before_where_sample.getByName(query.where()->getColumnName()); + /// If the filter column is a constant and not a query parameter, record it. + if (column_elem.column) + where_constant_filter_description = ConstantFilterDescription(*column_elem.column); } } chain.addStep(); @@ -2077,8 +2062,7 @@ void ExpressionAnalysisResult::finalize( ssize_t & prewhere_step_num, ssize_t & where_step_num, ssize_t & having_step_num, - const ASTSelectQuery & query, - bool is_create_parameterized_view) + const ASTSelectQuery & query) { if (prewhere_step_num >= 0) { @@ -2098,16 +2082,14 @@ void ExpressionAnalysisResult::finalize( prewhere_step_num = -1; } - /// For creating parameterized view, query parameters are allowed in select - /// As select will be stored without substituting query parameters, we don't want to evaluate the expressions/steps - if (where_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.where()).empty())) + if (where_step_num >= 0) { where_column_name = query.where()->getColumnName(); remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second; where_step_num = -1; } - if (having_step_num >= 0 && !(is_create_parameterized_view && !analyzeReceiveQueryParams(query.having()).empty())) + if (having_step_num >= 0) { having_column_name = query.having()->getColumnName(); remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second; diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index 4b1e4dd18f5..ddb41a00f84 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -175,7 +175,7 @@ protected: ArrayJoinActionPtr addMultipleArrayJoinAction(ActionsDAGPtr & actions, bool is_left) const; - void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false, bool is_create_parameterized_view = false); + void getRootActions(const ASTPtr & ast, bool no_makeset_for_subqueries, ActionsDAGPtr & actions, bool only_consts = false); /** Similar to getRootActions but do not make sets when analyzing IN functions. It's used in * analyzeAggregation which happens earlier than analyzing PREWHERE and WHERE. If we did, the @@ -293,8 +293,7 @@ struct ExpressionAnalysisResult ssize_t & prewhere_step_num, ssize_t & where_step_num, ssize_t & having_step_num, - const ASTSelectQuery & query, - bool is_create_parameterized_view); + const ASTSelectQuery & query); }; /// SelectQuery specific ExpressionAnalyzer part. diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 060d07a9763..56a7e3d6996 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -503,23 +503,34 @@ InterpreterSelectQuery::InterpreterSelectQuery( { /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; + NameToNameMap parameter_values; if (view) { query_info.is_parameterized_view = view->isParameterizedView(); /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced. /// ad after query is replaced, we use these parameters to substitute in the parameterized view query - NameToNameMap parameter_values; if (query_info.is_parameterized_view) + { parameter_values = analyzeFunctionParamValues(query_ptr); + view->setParameterValues(parameter_values); + } view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); if (query_info.is_parameterized_view) - view->replaceQueryParametersIfParametrizedView(query_ptr, parameter_values); + { + view->replaceQueryParametersIfParametrizedView(query_ptr); + } + } syntax_analyzer_result = TreeRewriter(context).analyzeSelect( query_ptr, TreeRewriterResult(source_header.getNamesAndTypesList(), storage, storage_snapshot), - options, joined_tables.tablesWithColumns(), required_result_column_names, table_join); + options, + joined_tables.tablesWithColumns(), + required_result_column_names, + table_join, + query_info.is_parameterized_view, + parameter_values); query_info.syntax_analyzer_result = syntax_analyzer_result; context->setDistributed(syntax_analyzer_result->is_remote_storage); @@ -646,7 +657,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( query_info.filter_asts.push_back(query_info.additional_filter_ast); } - source_header = storage_snapshot->getSampleBlockForColumns(required_columns); + source_header = storage_snapshot->getSampleBlockForColumns(required_columns, parameter_values); } /// Calculate structure of the result. diff --git a/src/Interpreters/QueryNormalizer.cpp b/src/Interpreters/QueryNormalizer.cpp index edb90187941..921d004af94 100644 --- a/src/Interpreters/QueryNormalizer.cpp +++ b/src/Interpreters/QueryNormalizer.cpp @@ -130,17 +130,6 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) } } -void QueryNormalizer::visit(ASTQueryParameter & node, Data & data) -{ - /// This is used only for create parameterized view to check if same parameter name is used twice - /// Eg: CREATE VIEW v1 AS SELECT * FROM t1 WHERE Column1={c1:UInt64} AND Column2={c1:UInt64}; - c1 is used twice - auto it_alias = data.query_parameters.find(node.name); - if (it_alias != data.query_parameters.end()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Same alias used multiple times {}", backQuote(node.name)); - - data.query_parameters.insert(node.name); -} - void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data & data) { @@ -278,9 +267,7 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) visit(*node_select, ast, data); else if (auto * node_param = ast->as()) { - if (data.is_create_parameterized_view) - visit(*node_param, data); - else + if (!data.is_create_parameterized_view) throw Exception("Query parameter " + backQuote(node_param->name) + " was not set", ErrorCodes::UNKNOWN_QUERY_PARAMETER); } else if (auto * node_function = ast->as()) diff --git a/src/Interpreters/QueryNormalizer.h b/src/Interpreters/QueryNormalizer.h index 5006d3ad83c..90c70dd71e6 100644 --- a/src/Interpreters/QueryNormalizer.h +++ b/src/Interpreters/QueryNormalizer.h @@ -84,7 +84,6 @@ private: static void visit(ASTIdentifier &, ASTPtr &, Data &); static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); static void visit(ASTSelectQuery &, const ASTPtr &, Data &); - static void visit(ASTQueryParameter &, Data &); static void visitChildren(IAST * node, Data & data); }; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index bc862ed7b38..20e5b034619 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -249,7 +249,20 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt for (const auto & column : *cols) { if (first_table || !data.join_using_columns.contains(column.name)) - addIdentifier(columns, table.table, column.name); + { + std::string column_name = column.name; + std::string::size_type pos = 0u; + for (auto parameter : data.parameter_values) + { + if ((pos = column_name.find(parameter.first)) != std::string::npos) + { + String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')"); + column_name.replace(pos,parameter.first.size(),parameter_name); + break; + } + } + addIdentifier(columns, table.table, column_name); + } } } first_table = false; diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 73e45fc7ea0..777c6241d19 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -28,11 +28,13 @@ public: const TablesWithColumns & tables; std::unordered_set join_using_columns; bool has_columns; + NameToNameMap parameter_values; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, NameToNameMap parameter_values_ = {}) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) + , parameter_values(parameter_values_) {} bool hasColumn(const String & name) const { return source_columns.count(name); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index fa101a84c58..80b25f7352b 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -330,10 +330,10 @@ using ExistsExpressionVisitor = InDepthNodeVisitor & tables_with_columns, const Names & required_result_columns, - std::shared_ptr table_join) const + std::shared_ptr table_join, + bool is_parameterized_view, + const NameToNameMap parameter_values) const { auto * select_query = query->as(); if (!select_query) @@ -1244,7 +1246,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns); + translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values); /// Optimizes logical expressions. LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); @@ -1311,7 +1313,27 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.aggregates = getAggregates(query, *select_query); result.window_function_asts = getWindowFunctions(query, *select_query); result.expressions_with_window_function = getExpressionsWithWindowFunctions(query); + + if (is_parameterized_view) + { + for (auto & column : result.source_columns) + { + std::string column_name = column.name; + std::string::size_type pos = 0u; + for (auto & parameter : parameter_values) + { + if ((pos = column_name.find(parameter.first)) != std::string::npos) + { + String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')"); + column.name.replace(pos,parameter.first.size(),parameter_name); + break; + } + } + } + } + result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key); + result.required_source_columns_before_expanding_alias_columns = result.required_source_columns.getNames(); /// rewrite filters for select query, must go after getArrayJoinedColumns diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 99408ca208b..b60afc6c7fc 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -90,6 +90,7 @@ struct TreeRewriterResult void collectSourceColumns(bool add_special); void collectUsedColumns(const ASTPtr & query, bool is_select, bool visit_index_hint); Names requiredSourceColumns() const { return required_source_columns.getNames(); } + Names sourceColumns() const { return source_columns.getNames(); } const Names & requiredSourceColumnsForAccessCheck() const { return required_source_columns_before_expanding_alias_columns; } NameSet getArrayJoinSourceNameSet() const; const Scalars & getScalars() const { return scalars; } @@ -129,7 +130,9 @@ public: const SelectQueryOptions & select_options = {}, const std::vector & tables_with_columns = {}, const Names & required_result_columns = {}, - std::shared_ptr table_join = {}) const; + std::shared_ptr table_join = {}, + bool is_parameterized_view = false, + const NameToNameMap parameter_values = {}) const; private: static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); diff --git a/src/Parsers/ASTFunction.cpp b/src/Parsers/ASTFunction.cpp index 63dc9f6b3ac..53c40089924 100644 --- a/src/Parsers/ASTFunction.cpp +++ b/src/Parsers/ASTFunction.cpp @@ -127,6 +127,8 @@ ASTPtr ASTFunction::clone() const res->children.push_back(res->window_definition); } + res->prefer_subquery_to_function_formatting = prefer_subquery_to_function_formatting; + return res; } diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 48851f0974d..8dfb7b288e3 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -112,21 +112,34 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } -Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names) const +Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const NameToNameMap & parameter_values) const { Block res; + const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & name : column_names) { - auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); - auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, name); + std::string column_name = name; + std::string substituted_column_name = name; + std::string::size_type pos = 0u; + for (auto parameter : parameter_values) + { + if ((pos = substituted_column_name.find("_CAST(" + parameter.second)) != std::string::npos) + { + substituted_column_name = substituted_column_name.substr(0,pos) + parameter.first + ")"; + break; + } + } + + auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); + auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); if (column && !object_column) { - res.insert({column->type->createColumn(), column->type, column->name}); + res.insert({column->type->createColumn(), column->type, column_name}); } else if (object_column) { - res.insert({object_column->type->createColumn(), object_column->type, object_column->name}); + res.insert({object_column->type->createColumn(), object_column->type, column_name}); } else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) { diff --git a/src/Storages/StorageSnapshot.h b/src/Storages/StorageSnapshot.h index badf0d3a1e8..723b30e49e6 100644 --- a/src/Storages/StorageSnapshot.h +++ b/src/Storages/StorageSnapshot.h @@ -66,7 +66,7 @@ struct StorageSnapshot NameAndTypePair getColumn(const GetColumnsOptions & options, const String & column_name) const; /// Block with ordinary + materialized + aliases + virtuals + subcolumns. - Block getSampleBlockForColumns(const Names & column_names) const; + Block getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values = {}) const; ColumnsDescription getDescriptionForColumns(const Names & column_names) const; diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 8a2787625fb..d9e79607ce4 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -140,7 +140,7 @@ void StorageView::read( query_plan.addStep(std::move(materializing)); /// And also convert to expected structure. - const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names); + const auto & expected_header = storage_snapshot->getSampleBlockForColumns(column_names,parameter_values); const auto & header = query_plan.getCurrentDataStream().header; const auto * select_with_union = current_inner_query->as(); @@ -176,7 +176,7 @@ static ASTTableExpression * getFirstTableExpression(ASTSelectQuery & select_quer return select_element->table_expression->as(); } -void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values) +void StorageView::replaceQueryParametersIfParametrizedView(ASTPtr & outer_query) { ReplaceQueryParameterVisitor visitor(parameter_values); visitor.visit(outer_query); diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index cd88005a207..e913e98901f 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -35,7 +35,7 @@ public: size_t max_block_size, size_t num_streams) override; - static void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query, const NameToNameMap & parameter_values); + void replaceQueryParametersIfParametrizedView(ASTPtr & outer_query); static void replaceWithSubquery(ASTSelectQuery & select_query, ASTPtr & view_name, const StorageMetadataPtr & metadata_snapshot, const bool parameterized_view) { @@ -45,8 +45,14 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); + void setParameterValues (NameToNameMap parameter_values_) + { + parameter_values = parameter_values_; + } + protected: bool is_parameterized_view; + NameToNameMap parameter_values; }; } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 9ea5e464b8e..9ec1cb0efd2 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -11,8 +11,12 @@ FROM SELECT * FROM default.Catalog WHERE Price = _CAST(10, \'UInt64\') -) AS v1 +) AS pv1 50 10 20 30 +20 +30 +40 +60 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 934ddd18d49..9d55dba970a 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -1,8 +1,12 @@ -DROP TABLE IF EXISTS v1; -DROP TABLE IF EXISTS v2; -DROP TABLE IF EXISTS v3; +DROP VIEW IF EXISTS pv1; +DROP VIEW IF EXISTS pv2; +DROP VIEW IF EXISTS pv3; +DROP VIEW IF EXISTS pv4; +DROP VIEW IF EXISTS pv5; +DROP VIEW IF EXISTS pv6; +DROP VIEW IF EXISTS v1; DROP TABLE IF EXISTS Catalog; -DROP TABLE IF EXISTS system.v1; +DROP TABLE IF EXISTS system.pv1; DROP TABLE IF EXISTS system.Catalog; CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; @@ -11,39 +15,39 @@ INSERT INTO Catalog VALUES ('Pen', 10, 3); INSERT INTO Catalog VALUES ('Book', 50, 2); INSERT INTO Catalog VALUES ('Paper', 20, 1); -CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; -SELECT Price FROM v1(price=20); -SELECT Price FROM `v1`(price=20); +CREATE VIEW pv1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; +SELECT Price FROM pv1(price=20); +SELECT Price FROM `pv1`(price=20); set param_p=10; -SELECT Price FROM v1; -- { serverError UNKNOWN_QUERY_PARAMETER} -SELECT Price FROM v1(price={p:UInt64}); +SELECT Price FROM pv1; -- { serverError UNKNOWN_QUERY_PARAMETER} +SELECT Price FROM pv1(price={p:UInt64}); set param_l=1; -SELECT Price FROM v1(price=50) LIMIT ({l:UInt64}); +SELECT Price FROM pv1(price=50) LIMIT ({l:UInt64}); -DETACH TABLE v1; -ATTACH TABLE v1; +DETACH TABLE pv1; +ATTACH TABLE pv1; -EXPLAIN SYNTAX SELECT * from v1(price=10); +EXPLAIN SYNTAX SELECT * from pv1(price=10); -INSERT INTO v1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED} +INSERT INTO pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED} -SELECT Price FROM v123(price=20); -- { serverError UNKNOWN_FUNCTION } +SELECT Price FROM pv123(price=20); -- { serverError UNKNOWN_FUNCTION } -CREATE VIEW v10 AS SELECT * FROM Catalog WHERE Price=10; +CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price=10; -SELECT Price FROM v10(price=10); -- { serverError UNKNOWN_FUNCTION } +SELECT Price FROM v1(price=10); -- { serverError UNKNOWN_FUNCTION } -CREATE VIEW v2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; -SELECT Price FROM v2(price=50,quantity=2); +CREATE VIEW pv2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; +SELECT Price FROM pv2(price=50,quantity=2); -SELECT Price FROM v2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} +SELECT Price FROM pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} -CREATE VIEW v3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3; -SELECT Price FROM v3(price=10); +CREATE VIEW pv3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3; +SELECT Price FROM pv3(price=10); -CREATE VIEW v4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError BAD_ARGUMENTS} +CREATE VIEW pv4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN} CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; @@ -51,19 +55,25 @@ INSERT INTO system.Catalog VALUES ('Pen', 10, 3); INSERT INTO system.Catalog VALUES ('Book', 50, 2); INSERT INTO system.Catalog VALUES ('Paper', 20, 1); -CREATE VIEW system.v1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}; -SELECT Price FROM system.v1(price=20); -SELECT Price FROM `system.v1`(price=20); -- { serverError UNKNOWN_FUNCTION } +CREATE VIEW system.pv1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}; +SELECT Price FROM system.pv1(price=20); +SELECT Price FROM `system.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION } INSERT INTO Catalog VALUES ('Book2', 30, 8); INSERT INTO Catalog VALUES ('Book3', 30, 8); -CREATE VIEW v5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}; -SELECT Price FROM v5(price=30, quantity=8,limit=1); +CREATE VIEW pv5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}; +SELECT Price FROM pv5(price=30, quantity=8,limit=1); -DROP TABLE v1; -DROP TABLE v2; -DROP TABLE v3; +CREATE VIEW pv6 AS SELECT Price+{price:UInt64} FROM Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}; +SELECT * FROM pv6(price=10); + +DROP VIEW pv1; +DROP VIEW pv2; +DROP VIEW pv3; +DROP VIEW pv5; +DROP VIEW pv6; +DROP VIEW v1; DROP TABLE Catalog; -DROP TABLE system.v1; +DROP TABLE system.pv1; DROP TABLE system.Catalog; \ No newline at end of file From bc7a76a48602c0b6b9aa99e2d50543b6ca0fa2f3 Mon Sep 17 00:00:00 2001 From: pufit Date: Sun, 4 Dec 2022 17:27:28 -0500 Subject: [PATCH 041/262] Added mmap for StorageFile --- src/Storages/StorageFile.cpp | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6e032a47943..6baf079275a 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -14,6 +14,8 @@ #include #include +#include +#include #include #include #include @@ -194,10 +196,18 @@ std::unique_ptr createReadBuffer( if (0 != fstat(table_fd, &file_stat)) throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT); - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(table_fd); - else - nested_buffer = std::make_unique(table_fd); + try + { + nested_buffer = std::make_unique(table_fd, 0); + } + catch (const ErrnoException &) + { + /// Fallback if mmap is not supported. + if (S_ISREG(file_stat.st_mode)) + nested_buffer = std::make_unique(table_fd); + else + nested_buffer = std::make_unique(table_fd); + } method = chooseCompressionMethod("", compression_method); } @@ -207,10 +217,18 @@ std::unique_ptr createReadBuffer( if (0 != stat(current_path.c_str(), &file_stat)) throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT); - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - else - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + try + { + nested_buffer = std::make_unique(current_path, 0); + } + catch (const ErrnoException &) + { + /// Fallback if mmap is not supported. + if (S_ISREG(file_stat.st_mode)) + nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + else + nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + } method = chooseCompressionMethod(current_path, compression_method); } From 084e465d84cb2ba4c0773044e2a14bf4e68aca04 Mon Sep 17 00:00:00 2001 From: pufit Date: Sun, 4 Dec 2022 23:39:23 -0500 Subject: [PATCH 042/262] Use mmap only on regular files. --- src/Storages/StorageFile.cpp | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6baf079275a..9ef1039c666 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -196,18 +196,10 @@ std::unique_ptr createReadBuffer( if (0 != fstat(table_fd, &file_stat)) throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT); - try - { + if (S_ISREG(file_stat.st_mode)) nested_buffer = std::make_unique(table_fd, 0); - } - catch (const ErrnoException &) - { - /// Fallback if mmap is not supported. - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(table_fd); - else - nested_buffer = std::make_unique(table_fd); - } + else + nested_buffer = std::make_unique(table_fd); method = chooseCompressionMethod("", compression_method); } @@ -217,18 +209,10 @@ std::unique_ptr createReadBuffer( if (0 != stat(current_path.c_str(), &file_stat)) throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT); - try - { + if (S_ISREG(file_stat.st_mode)) nested_buffer = std::make_unique(current_path, 0); - } - catch (const ErrnoException &) - { - /// Fallback if mmap is not supported. - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - else - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - } + else + nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); method = chooseCompressionMethod(current_path, compression_method); } From e93c0776b6ffd2770cde82b654ab3039e84580f4 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Tue, 6 Dec 2022 11:14:12 +0000 Subject: [PATCH 043/262] Set keytab in environment variable --- programs/server/Server.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 965717d74b9..36b1d86ea53 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -735,6 +735,13 @@ try } #endif + String principal_keytab = config().getString("kerberos.keytab", ""); + if (!principal_keytab.empty() && std::filesystem::exists(principal_keytab)) + { + setenv("KRB5_CLIENT_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT + setenv("KRB5_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT + } + registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); From 540f890291005dba4500765906b5098326c30014 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 7 Dec 2022 11:04:15 +0000 Subject: [PATCH 044/262] Make system.replicas parallel --- src/Core/Settings.h | 2 ++ src/Storages/System/StorageSystemReplicas.cpp | 32 +++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 91647a5f165..4be91ce311f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -155,6 +155,8 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \ \ + M(UInt64, system_replicas_fetch_threads, 16, "The maximum number of threads to fetch data for system.replicas table.", 0) \ + \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ \ M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard; if set to 1 - SELECT is executed on each shard; if set to 2 - SELECT and INSERT are executed on each shard", 0) \ diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 0f7877a6e41..a6afb6eff0d 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -151,14 +151,32 @@ Pipe StorageSystemReplicas::read( MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns(); - for (size_t i = 0, size = col_database->size(); i < size; ++i) - { - StorageReplicatedMergeTree::Status status; - dynamic_cast( - *replicated_tables - [(*col_database)[i].safeGet()] - [(*col_table)[i].safeGet()]).getStatus(status, with_zk_fields); + auto settings = context->getSettingsRef(); + size_t thread_pool_size = settings.system_replicas_fetch_threads; + if (settings.max_threads != 0) + thread_pool_size = std::min(thread_pool_size, static_cast(settings.max_threads)); + + ThreadPool thread_pool(thread_pool_size); + + size_t tables_size = col_database->size(); + std::vector statuses(tables_size); + + for (size_t i = 0; i < tables_size; ++i) + { + thread_pool.scheduleOrThrowOnError([i, &statuses, &replicated_tables, &col_database, &col_table, &with_zk_fields] + { + dynamic_cast( + *replicated_tables + [(*col_database)[i].safeGet()] + [(*col_table)[i].safeGet()]).getStatus(statuses[i], with_zk_fields); + }); + } + + thread_pool.wait(); + + for (const auto & status: statuses) + { size_t col_num = 3; res_columns[col_num++]->insert(status.is_leader); res_columns[col_num++]->insert(status.can_become_leader); From 9b46baa17dc66f6df2f7f0279071f1607b13c510 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 7 Dec 2022 22:31:32 -0500 Subject: [PATCH 045/262] Rewrite `StorageFile` buffer creation with `createReadBufferFromFileBase`. Add file descriptor support for `createReadBufferFromFileBase`. Fix file_size overflow in `createReadBufferFromFileBase`. Fix `MMapReadBufferFromFileWithCache` file_size definition. --- programs/local/LocalServer.cpp | 6 ++ src/Disks/IO/createReadBufferFromFileBase.cpp | 82 ++++++++++++++++--- src/Disks/IO/createReadBufferFromFileBase.h | 18 ++++ src/IO/MMapReadBufferFromFileWithCache.cpp | 1 + src/Storages/StorageFile.cpp | 31 ++----- ...97_storage_file_reader_selection.reference | 6 ++ .../02497_storage_file_reader_selection.sh | 29 +++++++ 7 files changed, 136 insertions(+), 37 deletions(-) create mode 100644 tests/queries/0_stateless/02497_storage_file_reader_selection.reference create mode 100755 tests/queries/0_stateless/02497_storage_file_reader_selection.sh diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 33d11091660..6f75c404174 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -540,6 +540,12 @@ void LocalServer::processConfig() global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::LOCAL); + if (!global_context->getSettingsRef().isChanged("local_filesystem_read_method")) + global_context->setSetting("local_filesystem_read_method", Field{"mmap"}); + + if (!global_context->getSettingsRef().isChanged("min_bytes_to_use_mmap_io")) + global_context->setSetting("min_bytes_to_use_mmap_io", Field{1}); + tryInitPath(); Poco::Logger * log = &logger(); diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index b274786f162..73506e19d11 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -23,22 +23,37 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int CANNOT_STAT; } -std::unique_ptr createReadBufferFromFileBase( +std::unique_ptr createReadBufferFromFileOrFileDescriptorBase( const std::string & filename, const ReadSettings & settings, std::optional read_hint, std::optional file_size, int flags, char * existing_memory, - size_t alignment) + size_t alignment, + bool read_from_fd, + int fd) { if (file_size.has_value() && !*file_size) return std::make_unique(); - size_t estimated_size = 0; + struct stat file_stat{}; + if (read_from_fd) + { + if (0 != fstat(fd, &file_stat)) + throwFromErrno("Cannot stat file descriptor", ErrorCodes::CANNOT_STAT); + } + else + { + if (0 != stat(filename.c_str(), &file_stat)) + throwFromErrno("Cannot stat file " + filename, ErrorCodes::CANNOT_STAT); + } + + size_t estimated_size = file_stat.st_size; if (read_hint.has_value()) estimated_size = *read_hint; else if (file_size.has_value()) @@ -48,17 +63,18 @@ std::unique_ptr createReadBufferFromFileBase( && settings.local_fs_method == LocalFSReadMethod::mmap && settings.mmap_threshold && settings.mmap_cache - && estimated_size >= settings.mmap_threshold) + && estimated_size >= settings.mmap_threshold + && S_ISREG(file_stat.st_mode)) { try { - auto res = std::make_unique(*settings.mmap_cache, filename, 0, file_size.value_or(-1)); + auto res = std::make_unique(*settings.mmap_cache, filename, 0, estimated_size); ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); return res; } catch (const ErrnoException &) { - /// Fallback if mmap is not supported (example: pipe). + /// Fallback if mmap is not supported. ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed); } } @@ -67,13 +83,21 @@ std::unique_ptr createReadBufferFromFileBase( { std::unique_ptr res; - if (settings.local_fs_method == LocalFSReadMethod::read) + /// Pread works only with regular files, so we explicitly fallback to read in other cases. + if (settings.local_fs_method == LocalFSReadMethod::read || !S_ISREG(file_stat.st_mode)) { - res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + if (read_from_fd) + res = std::make_unique(fd, buffer_size, existing_memory, alignment, file_size); + else + res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap) { - res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + if (read_from_fd) + res = std::make_unique(fd, buffer_size, existing_memory, alignment, file_size); + else + res = std::make_unique( + filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) { @@ -82,8 +106,13 @@ std::unique_ptr createReadBufferFromFileBase( throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER); - res = std::make_unique( - reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + + if (read_from_fd) + res = std::make_unique( + reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size); + else + res = std::make_unique( + reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) { @@ -92,8 +121,13 @@ std::unique_ptr createReadBufferFromFileBase( throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER); - res = std::make_unique( - reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + + if (read_from_fd) + res = std::make_unique( + reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size); + else + res = std::make_unique( + reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method"); @@ -169,4 +203,26 @@ std::unique_ptr createReadBufferFromFileBase( return create(buffer_size, flags); } +std::unique_ptr createReadBufferFromFileBase( + const std::string & filename, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size, + int flags_, + char * existing_memory, + size_t alignment) +{ + return createReadBufferFromFileOrFileDescriptorBase(filename, settings, read_hint, file_size, flags_, existing_memory, alignment); +} + +std::unique_ptr createReadBufferFromFileDescriptorBase( + int fd, + const ReadSettings & settings, + std::optional read_hint, + std::optional file_size, + char * existing_memory , + size_t alignment) +{ + return createReadBufferFromFileOrFileDescriptorBase({}, settings, read_hint, file_size, -1, existing_memory, alignment, true, fd); +} } diff --git a/src/Disks/IO/createReadBufferFromFileBase.h b/src/Disks/IO/createReadBufferFromFileBase.h index c2e2040587b..542ea423462 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.h +++ b/src/Disks/IO/createReadBufferFromFileBase.h @@ -14,6 +14,17 @@ namespace DB * @param read_hint - the number of bytes to read hint * @param file_size - size of file */ +std::unique_ptr createReadBufferFromFileOrFileDescriptorBase( + const std::string & filename, + const ReadSettings & settings, + std::optional read_hint = {}, + std::optional file_size = {}, + int flags_ = -1, + char * existing_memory = nullptr, + size_t alignment = 0, + bool read_from_fd = false, + int fd = 0); + std::unique_ptr createReadBufferFromFileBase( const std::string & filename, const ReadSettings & settings, @@ -23,4 +34,11 @@ std::unique_ptr createReadBufferFromFileBase( char * existing_memory = nullptr, size_t alignment = 0); +std::unique_ptr createReadBufferFromFileDescriptorBase( + int fd, + const ReadSettings & settings, + std::optional read_hint = {}, + std::optional file_size = {}, + char * existing_memory = nullptr, + size_t alignment = 0); } diff --git a/src/IO/MMapReadBufferFromFileWithCache.cpp b/src/IO/MMapReadBufferFromFileWithCache.cpp index 503a58b65b9..d13cf5db2f7 100644 --- a/src/IO/MMapReadBufferFromFileWithCache.cpp +++ b/src/IO/MMapReadBufferFromFileWithCache.cpp @@ -18,6 +18,7 @@ void MMapReadBufferFromFileWithCache::init() size_t page_size = static_cast(::getPageSize()); ReadBuffer::padded = (length % page_size) > 0 && (length % page_size) <= (page_size - (PADDING_FOR_SIMD - 1)); + ReadBufferFromFileBase::file_size = length; } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9ef1039c666..8cc9f646406 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -39,6 +39,8 @@ #include #include +#include + #include #include @@ -180,7 +182,6 @@ void checkCreationIsAllowed( std::unique_ptr createReadBuffer( const String & current_path, bool use_table_fd, - const String & storage_name, int table_fd, const String & compression_method, ContextPtr context) @@ -188,32 +189,14 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer; CompressionMethod method; - struct stat file_stat{}; - if (use_table_fd) { - /// Check if file descriptor allows random reads (and reading it twice). - if (0 != fstat(table_fd, &file_stat)) - throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT); - - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(table_fd, 0); - else - nested_buffer = std::make_unique(table_fd); - + nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, context->getReadSettings()); method = chooseCompressionMethod("", compression_method); } else { - /// Check if file descriptor allows random reads (and reading it twice). - if (0 != stat(current_path.c_str(), &file_stat)) - throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT); - - if (S_ISREG(file_stat.st_mode)) - nested_buffer = std::make_unique(current_path, 0); - else - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - + nested_buffer = createReadBufferFromFileBase(current_path, context->getReadSettings()); method = chooseCompressionMethod(current_path, compression_method); } @@ -284,7 +267,7 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c { /// We will use PeekableReadBuffer to create a checkpoint, so we need a place /// where we can store the original read buffer. - read_buffer_from_fd = createReadBuffer("", true, getName(), table_fd, compression_method, context); + read_buffer_from_fd = createReadBuffer("", true, table_fd, compression_method, context); auto read_buf = std::make_unique(*read_buffer_from_fd); read_buf->setCheckpoint(); return read_buf; @@ -333,7 +316,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (it == paths.end()) return nullptr; - return createReadBuffer(*it++, false, "File", -1, compression_method, context); + return createReadBuffer(*it++, false, -1, compression_method, context); }; ColumnsDescription columns; @@ -550,7 +533,7 @@ public: } if (!read_buf) - read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->getName(), storage->table_fd, storage->compression_method, context); + read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->table_fd, storage->compression_method, context); auto format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings); diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference new file mode 100644 index 00000000000..0977ae4d888 --- /dev/null +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference @@ -0,0 +1,6 @@ +key\nfoo\nbar +1 +0 +key\nfoo\nbar +0 +1 diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh new file mode 100755 index 00000000000..9e07050536e --- /dev/null +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -0,0 +1,29 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') + +DATA_FILE=$USER_FILES_PATH/test_02497_storage_file_reader.data +echo 'key\nfoo\nbar' > $DATA_FILE + +QUERY_ID=$RANDOM +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ + --query_id $QUERY_ID \ + --local_filesystem_read_method=mmap \ + --min_bytes_to_use_mmap_io=1 \ + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" + +QUERY_ID=$RANDOM +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ + --query_id $QUERY_ID \ + --local_filesystem_read_method=pread + +$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" From 9cc4868b37c318d8f61005114135817cf242e2c2 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 7 Dec 2022 23:09:19 -0500 Subject: [PATCH 046/262] Try to fix codestyle error. --- .../queries/0_stateless/02497_storage_file_reader_selection.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 9e07050536e..b7c60aac03c 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -7,7 +7,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) USER_FILES_PATH=$(clickhouse-client --query "select _path,_file from file('nonexist.txt', 'CSV', 'val1 char')" 2>&1 | grep Exception | awk '{gsub("/nonexist.txt","",$9); print $9}') DATA_FILE=$USER_FILES_PATH/test_02497_storage_file_reader.data -echo 'key\nfoo\nbar' > $DATA_FILE +echo -e 'key\nfoo\nbar' > $DATA_FILE QUERY_ID=$RANDOM $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ From 76401ad0b9b029a934a3a8d861ef3a59061cfd97 Mon Sep 17 00:00:00 2001 From: pufit Date: Wed, 7 Dec 2022 23:17:10 -0500 Subject: [PATCH 047/262] Test and codestyle fix. --- src/Storages/StorageFile.cpp | 1 - .../02497_storage_file_reader_selection.reference | 8 ++++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 8cc9f646406..9fd40600eba 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -69,7 +69,6 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int TIMEOUT_EXCEEDED; extern const int INCOMPATIBLE_COLUMNS; - extern const int CANNOT_STAT; extern const int LOGICAL_ERROR; extern const int CANNOT_APPEND_TO_FILE; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference index 0977ae4d888..8da37e4219c 100644 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference @@ -1,6 +1,10 @@ -key\nfoo\nbar +key +foo +bar 1 0 -key\nfoo\nbar +key +foo +bar 0 1 From 2d87cc1a6c9d549393cb0ec38c38a89688b31d2f Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 8 Dec 2022 18:02:29 -0500 Subject: [PATCH 048/262] Add `storage_file_read_method` setting. --- programs/local/LocalServer.cpp | 6 ------ src/Core/Settings.h | 1 + src/Disks/IO/createReadBufferFromFileBase.cpp | 10 ++++++++-- src/Storages/StorageFile.cpp | 15 ++++++++++++-- ...2103_tsv_csv_custom_null_representation.sh | 20 +++++++++---------- .../0_stateless/02130_parse_quoted_null.sh | 18 ++++++++--------- .../02497_storage_file_reader_selection.sh | 6 ++---- 7 files changed, 43 insertions(+), 33 deletions(-) diff --git a/programs/local/LocalServer.cpp b/programs/local/LocalServer.cpp index 6f75c404174..33d11091660 100644 --- a/programs/local/LocalServer.cpp +++ b/programs/local/LocalServer.cpp @@ -540,12 +540,6 @@ void LocalServer::processConfig() global_context->makeGlobalContext(); global_context->setApplicationType(Context::ApplicationType::LOCAL); - if (!global_context->getSettingsRef().isChanged("local_filesystem_read_method")) - global_context->setSetting("local_filesystem_read_method", Field{"mmap"}); - - if (!global_context->getSettingsRef().isChanged("min_bytes_to_use_mmap_io")) - global_context->setSetting("min_bytes_to_use_mmap_io", Field{1}); - tryInitPath(); Poco::Logger * log = &logger(); diff --git a/src/Core/Settings.h b/src/Core/Settings.h index df5e9685173..c8c6eb61a73 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -591,6 +591,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ + M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0)\ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index 73506e19d11..911c677300f 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -62,13 +63,18 @@ std::unique_ptr createReadBufferFromFileOrFileDescriptor if (!existing_memory && settings.local_fs_method == LocalFSReadMethod::mmap && settings.mmap_threshold - && settings.mmap_cache && estimated_size >= settings.mmap_threshold && S_ISREG(file_stat.st_mode)) { try { - auto res = std::make_unique(*settings.mmap_cache, filename, 0, estimated_size); + std::unique_ptr res; + + if (settings.mmap_cache) + res = std::make_unique(*settings.mmap_cache, filename, 0, estimated_size); + else + res = std::make_unique(filename, 0, estimated_size); + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); return res; } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 9fd40600eba..f4fc78d5e98 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -72,6 +72,7 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int CANNOT_APPEND_TO_FILE; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; + extern const int UNKNOWN_READ_METHOD; } namespace @@ -188,14 +189,24 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer; CompressionMethod method; + auto read_method = context->getSettingsRef().storage_file_read_method.value; + auto read_settings = context->getReadSettings(); + read_settings.mmap_threshold = 1; + read_settings.mmap_cache = nullptr; /// Turn off mmap cache for Storage File + + if (auto opt_method = magic_enum::enum_cast(read_method)) + read_settings.local_fs_method = *opt_method; + else + throwFromErrno("Unknown read method " + read_method, ErrorCodes::UNKNOWN_READ_METHOD); + if (use_table_fd) { - nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, context->getReadSettings()); + nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, read_settings); method = chooseCompressionMethod("", compression_method); } else { - nested_buffer = createReadBufferFromFileBase(current_path, context->getReadSettings()); + nested_buffer = createReadBufferFromFileBase(current_path, read_settings); method = chooseCompressionMethod(current_path, compression_method); } diff --git a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh index 4162e046ca4..b28c56f9266 100755 --- a/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh +++ b/tests/queries/0_stateless/02103_tsv_csv_custom_null_representation.sh @@ -97,37 +97,37 @@ echo 'Corner cases' echo 'TSV' echo -e "Some text\tCustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text\tCustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_tsv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text\t123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_tsv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo -e "Some text\tNU\tLL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_tsv_null_representation='NU\tL', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'CSV' echo -e "Some text,CustomNull" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text,CustomNull Some text" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=15, format_csv_null_representation='CustomNull', input_format_parallel_parsing=0" echo -e "Some text,123NNN" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(Int32)') settings storage_file_read_method='pread', max_read_buffer_size=14, format_csv_null_representation='123NN', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo -e "Some text,NU,LL" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'CSV', 's String, n Nullable(String)') settings storage_file_read_method='pread', max_read_buffer_size=13, format_csv_null_representation='NU,L', input_format_parallel_parsing=0" 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' echo 'Large custom NULL' $CLICKHOUSE_CLIENT -q "select '0000000000Custom NULL representation0000000000' FROM numbers(10)" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" -$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation0000000000'" +$CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02103_null.data', 'TSV', 's Nullable(String)') SETTINGS storage_file_read_method='pread', max_read_buffer_size=5, input_format_parallel_parsing=0, format_tsv_null_representation='0000000000Custom NULL representation000000000'" rm $DATA_FILE diff --git a/tests/queries/0_stateless/02130_parse_quoted_null.sh b/tests/queries/0_stateless/02130_parse_quoted_null.sh index 9cb6cb73e6c..2da62f9a4ff 100755 --- a/tests/queries/0_stateless/02130_parse_quoted_null.sh +++ b/tests/queries/0_stateless/02130_parse_quoted_null.sh @@ -24,31 +24,31 @@ echo -e "42.42\t3" > $DATA_FILE $CLICKHOUSE_CLIENT -q "$SELECT_QUERY" echo -e "null\t4" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 --storage_file_read_method=pread echo -e "null\t5" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 --storage_file_read_method=pread echo -e "null\t6" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 --storage_file_read_method=pread echo -e "null\t7" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread echo -e "nan\t8" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=1 --storage_file_read_method=pread echo -e "nan\t9" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=2 --storage_file_read_method=pread echo -e "nan\t10" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=3 --storage_file_read_method=pread echo -e "nan\t11" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread echo -e "42\tnan" > $DATA_FILE -$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' +$CLICKHOUSE_CLIENT -q "$SELECT_QUERY" --max_read_buffer_size=4 --storage_file_read_method=pread 2>&1 | grep -F -q "CANNOT_READ_ALL_DATA" && echo 'OK' || echo 'FAIL' $CLICKHOUSE_CLIENT -q "select * from test_02130 order by y" $CLICKHOUSE_CLIENT -q "drop table test_02130" diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index b7c60aac03c..4d9336bc1a0 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -11,9 +11,7 @@ echo -e 'key\nfoo\nbar' > $DATA_FILE QUERY_ID=$RANDOM $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ - --query_id $QUERY_ID \ - --local_filesystem_read_method=mmap \ - --min_bytes_to_use_mmap_io=1 \ + --query_id $QUERY_ID $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" @@ -22,7 +20,7 @@ $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FR QUERY_ID=$RANDOM $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ --query_id $QUERY_ID \ - --local_filesystem_read_method=pread + --storage_file_read_method=pread $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" From 165e39085bbbe7e3d04e304f11520275ceacd0c2 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 9 Dec 2022 12:25:55 +0000 Subject: [PATCH 049/262] Set keytab by call of krb5_gss_register_acceptor_identity() --- programs/server/Server.cpp | 7 ------- src/Access/ExternalAuthenticators.cpp | 7 ++++++- src/Access/GSSAcceptor.h | 1 + 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp index 36b1d86ea53..965717d74b9 100644 --- a/programs/server/Server.cpp +++ b/programs/server/Server.cpp @@ -735,13 +735,6 @@ try } #endif - String principal_keytab = config().getString("kerberos.keytab", ""); - if (!principal_keytab.empty() && std::filesystem::exists(principal_keytab)) - { - setenv("KRB5_CLIENT_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT - setenv("KRB5_KTNAME", principal_keytab.c_str(), true /* overwrite */); // NOLINT - } - registerFunctions(); registerAggregateFunctions(); registerTableFunctions(); diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index e1c598f26f5..e709c40de46 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -9,7 +9,7 @@ #include #include - +#include namespace DB { @@ -223,6 +223,11 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util:: params.realm = config.getString("kerberos.realm", ""); params.principal = config.getString("kerberos.principal", ""); + + String keytab = config.getString("kerberos.keytab", ""); + if (!keytab.empty() && std::filesystem::exists(keytab)) + if (krb5_gss_register_acceptor_identity(keytab.c_str())) + throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS); } } diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h index d2c55b1290c..c2930201a93 100644 --- a/src/Access/GSSAcceptor.h +++ b/src/Access/GSSAcceptor.h @@ -9,6 +9,7 @@ #if USE_KRB5 # include # include +# include # define MAYBE_NORETURN #else # define MAYBE_NORETURN [[noreturn]] From 9c70b13702d0f3f8e9e615fbf8e30d326efa4f39 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 9 Dec 2022 12:33:05 +0000 Subject: [PATCH 050/262] Add #if USE_KRB5 --- src/Access/ExternalAuthenticators.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index e709c40de46..6dcbd7e10b1 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -224,10 +224,12 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util:: params.realm = config.getString("kerberos.realm", ""); params.principal = config.getString("kerberos.principal", ""); +#if USE_KRB5 String keytab = config.getString("kerberos.keytab", ""); if (!keytab.empty() && std::filesystem::exists(keytab)) if (krb5_gss_register_acceptor_identity(keytab.c_str())) throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS); +#endif } } From e38a93c45a947e596a00d4bc03a5bde913c39676 Mon Sep 17 00:00:00 2001 From: pufit Date: Sat, 10 Dec 2022 22:26:07 -0500 Subject: [PATCH 051/262] Fix UB, fix test. --- src/Core/Settings.h | 2 +- src/IO/ReadBufferFromFileBase.cpp | 15 +++++++++++++++ src/IO/ReadBufferFromFileBase.h | 3 +++ src/IO/ReadBufferFromFileDescriptor.cpp | 15 --------------- src/IO/ReadBufferFromFileDescriptor.h | 2 -- src/Storages/StorageFile.cpp | 2 +- .../02497_storage_file_reader_selection.reference | 2 -- .../02497_storage_file_reader_selection.sh | 2 -- 8 files changed, 20 insertions(+), 23 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c8c6eb61a73..a9f4280a0d1 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -591,7 +591,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0)\ + M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 1152804b770..7ea16d679bc 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -1,4 +1,6 @@ +#include #include +#include namespace DB { @@ -31,4 +33,17 @@ size_t ReadBufferFromFileBase::getFileSize() throw Exception(ErrorCodes::UNKNOWN_FILE_SIZE, "Cannot find out file size for read buffer"); } +void ReadBufferFromFileBase::setProgressCallback(ContextPtr context) +{ + auto file_progress_callback = context->getFileProgressCallback(); + + if (!file_progress_callback) + return; + + setProfileCallback([file_progress_callback](const ProfileInfo & progress) + { + file_progress_callback(FileProgress(progress.bytes_read, 0)); + }); +} + } diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index d28be034eb5..cc4a131b10b 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,8 @@ public: size_t getFileSize() override; + void setProgressCallback(ContextPtr context); + protected: std::optional file_size; ProfileCallback profile_callback; diff --git a/src/IO/ReadBufferFromFileDescriptor.cpp b/src/IO/ReadBufferFromFileDescriptor.cpp index cb4b6ca5f3e..b0e3a1ac7cd 100644 --- a/src/IO/ReadBufferFromFileDescriptor.cpp +++ b/src/IO/ReadBufferFromFileDescriptor.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -254,18 +253,4 @@ size_t ReadBufferFromFileDescriptor::getFileSize() return getSizeFromFileDescriptor(fd, getFileName()); } - -void ReadBufferFromFileDescriptor::setProgressCallback(ContextPtr context) -{ - auto file_progress_callback = context->getFileProgressCallback(); - - if (!file_progress_callback) - return; - - setProfileCallback([file_progress_callback](const ProfileInfo & progress) - { - file_progress_callback(FileProgress(progress.bytes_read, 0)); - }); -} - } diff --git a/src/IO/ReadBufferFromFileDescriptor.h b/src/IO/ReadBufferFromFileDescriptor.h index 6edda460bac..71ea1a1c358 100644 --- a/src/IO/ReadBufferFromFileDescriptor.h +++ b/src/IO/ReadBufferFromFileDescriptor.h @@ -62,8 +62,6 @@ public: size_t getFileSize() override; - void setProgressCallback(ContextPtr context); - private: /// Assuming file descriptor supports 'select', check that we have data to read or wait until timeout. bool poll(size_t timeout_microseconds) const; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index f4fc78d5e98..6154cdb73ca 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -214,7 +214,7 @@ std::unique_ptr createReadBuffer( if (context->getApplicationType() == Context::ApplicationType::LOCAL || context->getApplicationType() == Context::ApplicationType::CLIENT) { - auto & in = static_cast(*nested_buffer); + auto & in = static_cast(*nested_buffer); in.setProgressCallback(context); } diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference index 8da37e4219c..39e2f2f6f5e 100644 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference @@ -2,9 +2,7 @@ key foo bar 1 -0 key foo bar 0 -1 diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 4d9336bc1a0..0e27146103e 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -15,7 +15,6 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" -$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" QUERY_ID=$RANDOM $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ @@ -24,4 +23,3 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" -$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" From 1d6e77a29a1e36e755f15d5c720fd1690f7d63a2 Mon Sep 17 00:00:00 2001 From: pufit Date: Sun, 11 Dec 2022 16:15:41 -0500 Subject: [PATCH 052/262] Move reader selection logic back to `StorageFile`. --- src/Disks/IO/createReadBufferFromFileBase.cpp | 91 ++++--------------- src/Disks/IO/createReadBufferFromFileBase.h | 18 ---- src/Storages/StorageFile.cpp | 84 ++++++++++++++--- 3 files changed, 86 insertions(+), 107 deletions(-) diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index 911c677300f..fca05787959 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -2,7 +2,6 @@ #include #include #include -#include #include #include #include @@ -24,37 +23,22 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; - extern const int CANNOT_STAT; } -std::unique_ptr createReadBufferFromFileOrFileDescriptorBase( +std::unique_ptr createReadBufferFromFileBase( const std::string & filename, const ReadSettings & settings, std::optional read_hint, std::optional file_size, int flags, char * existing_memory, - size_t alignment, - bool read_from_fd, - int fd) + size_t alignment) { if (file_size.has_value() && !*file_size) return std::make_unique(); - struct stat file_stat{}; - if (read_from_fd) - { - if (0 != fstat(fd, &file_stat)) - throwFromErrno("Cannot stat file descriptor", ErrorCodes::CANNOT_STAT); - } - else - { - if (0 != stat(filename.c_str(), &file_stat)) - throwFromErrno("Cannot stat file " + filename, ErrorCodes::CANNOT_STAT); - } - - size_t estimated_size = file_stat.st_size; + size_t estimated_size = 0; if (read_hint.has_value()) estimated_size = *read_hint; else if (file_size.has_value()) @@ -63,24 +47,23 @@ std::unique_ptr createReadBufferFromFileOrFileDescriptor if (!existing_memory && settings.local_fs_method == LocalFSReadMethod::mmap && settings.mmap_threshold - && estimated_size >= settings.mmap_threshold - && S_ISREG(file_stat.st_mode)) + && settings.mmap_cache + && estimated_size >= settings.mmap_threshold) { try { - std::unique_ptr res; - - if (settings.mmap_cache) - res = std::make_unique(*settings.mmap_cache, filename, 0, estimated_size); + std::unique_ptr res; + if (file_size) + res = std::make_unique(*settings.mmap_cache, filename, 0, *file_size); else - res = std::make_unique(filename, 0, estimated_size); + res = std::make_unique(*settings.mmap_cache, filename, 0, *file_size); ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); return res; } catch (const ErrnoException &) { - /// Fallback if mmap is not supported. + /// Fallback if mmap is not supported (example: pipe). ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed); } } @@ -89,21 +72,13 @@ std::unique_ptr createReadBufferFromFileOrFileDescriptor { std::unique_ptr res; - /// Pread works only with regular files, so we explicitly fallback to read in other cases. - if (settings.local_fs_method == LocalFSReadMethod::read || !S_ISREG(file_stat.st_mode)) + if (settings.local_fs_method == LocalFSReadMethod::read) { - if (read_from_fd) - res = std::make_unique(fd, buffer_size, existing_memory, alignment, file_size); - else - res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread || settings.local_fs_method == LocalFSReadMethod::mmap) { - if (read_from_fd) - res = std::make_unique(fd, buffer_size, existing_memory, alignment, file_size); - else - res = std::make_unique( - filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + res = std::make_unique(filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_fake_async) { @@ -112,13 +87,8 @@ std::unique_ptr createReadBufferFromFileOrFileDescriptor throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::SYNCHRONOUS_LOCAL_FS_READER); - - if (read_from_fd) - res = std::make_unique( - reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size); - else - res = std::make_unique( - reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + res = std::make_unique( + reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else if (settings.local_fs_method == LocalFSReadMethod::pread_threadpool) { @@ -127,13 +97,8 @@ std::unique_ptr createReadBufferFromFileOrFileDescriptor throw Exception(ErrorCodes::LOGICAL_ERROR, "Global context not initialized"); auto & reader = context->getThreadPoolReader(Context::FilesystemReaderType::ASYNCHRONOUS_LOCAL_FS_READER); - - if (read_from_fd) - res = std::make_unique( - reader, settings.priority, fd, buffer_size, existing_memory, alignment, file_size); - else - res = std::make_unique( - reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); + res = std::make_unique( + reader, settings.priority, filename, buffer_size, actual_flags, existing_memory, alignment, file_size); } else throw Exception(ErrorCodes::LOGICAL_ERROR, "Unknown read method"); @@ -209,26 +174,4 @@ std::unique_ptr createReadBufferFromFileOrFileDescriptor return create(buffer_size, flags); } -std::unique_ptr createReadBufferFromFileBase( - const std::string & filename, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size, - int flags_, - char * existing_memory, - size_t alignment) -{ - return createReadBufferFromFileOrFileDescriptorBase(filename, settings, read_hint, file_size, flags_, existing_memory, alignment); -} - -std::unique_ptr createReadBufferFromFileDescriptorBase( - int fd, - const ReadSettings & settings, - std::optional read_hint, - std::optional file_size, - char * existing_memory , - size_t alignment) -{ - return createReadBufferFromFileOrFileDescriptorBase({}, settings, read_hint, file_size, -1, existing_memory, alignment, true, fd); -} } diff --git a/src/Disks/IO/createReadBufferFromFileBase.h b/src/Disks/IO/createReadBufferFromFileBase.h index 542ea423462..c2e2040587b 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.h +++ b/src/Disks/IO/createReadBufferFromFileBase.h @@ -14,17 +14,6 @@ namespace DB * @param read_hint - the number of bytes to read hint * @param file_size - size of file */ -std::unique_ptr createReadBufferFromFileOrFileDescriptorBase( - const std::string & filename, - const ReadSettings & settings, - std::optional read_hint = {}, - std::optional file_size = {}, - int flags_ = -1, - char * existing_memory = nullptr, - size_t alignment = 0, - bool read_from_fd = false, - int fd = 0); - std::unique_ptr createReadBufferFromFileBase( const std::string & filename, const ReadSettings & settings, @@ -34,11 +23,4 @@ std::unique_ptr createReadBufferFromFileBase( char * existing_memory = nullptr, size_t alignment = 0); -std::unique_ptr createReadBufferFromFileDescriptorBase( - int fd, - const ReadSettings & settings, - std::optional read_hint = {}, - std::optional file_size = {}, - char * existing_memory = nullptr, - size_t alignment = 0); } diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 6154cdb73ca..1c61370a392 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -38,8 +38,7 @@ #include #include #include - -#include +#include #include #include @@ -51,6 +50,13 @@ #include +namespace ProfileEvents +{ + extern const Event CreatedReadBufferOrdinary; + extern const Event CreatedReadBufferMMap; + extern const Event CreatedReadBufferMMapFailed; +} + namespace fs = std::filesystem; namespace DB @@ -69,6 +75,7 @@ namespace ErrorCodes extern const int FILE_DOESNT_EXIST; extern const int TIMEOUT_EXCEEDED; extern const int INCOMPATIBLE_COLUMNS; + extern const int CANNOT_STAT; extern const int LOGICAL_ERROR; extern const int CANNOT_APPEND_TO_FILE; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; @@ -182,6 +189,7 @@ void checkCreationIsAllowed( std::unique_ptr createReadBuffer( const String & current_path, bool use_table_fd, + const String & storage_name, int table_fd, const String & compression_method, ContextPtr context) @@ -189,27 +197,73 @@ std::unique_ptr createReadBuffer( std::unique_ptr nested_buffer; CompressionMethod method; - auto read_method = context->getSettingsRef().storage_file_read_method.value; - auto read_settings = context->getReadSettings(); - read_settings.mmap_threshold = 1; - read_settings.mmap_cache = nullptr; /// Turn off mmap cache for Storage File - - if (auto opt_method = magic_enum::enum_cast(read_method)) - read_settings.local_fs_method = *opt_method; + auto read_method_string = context->getSettingsRef().storage_file_read_method.value; + LocalFSReadMethod read_method; + if (auto opt_method = magic_enum::enum_cast(read_method_string)) + read_method = *opt_method; else - throwFromErrno("Unknown read method " + read_method, ErrorCodes::UNKNOWN_READ_METHOD); + throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD); + + struct stat file_stat{}; if (use_table_fd) { - nested_buffer = createReadBufferFromFileDescriptorBase(table_fd, read_settings); + /// Check if file descriptor allows random reads (and reading it twice). + if (0 != fstat(table_fd, &file_stat)) + throwFromErrno("Cannot stat table file descriptor, inside " + storage_name, ErrorCodes::CANNOT_STAT); + method = chooseCompressionMethod("", compression_method); } else { - nested_buffer = createReadBufferFromFileBase(current_path, read_settings); + /// Check if file descriptor allows random reads (and reading it twice). + if (0 != stat(current_path.c_str(), &file_stat)) + throwFromErrno("Cannot stat file " + current_path, ErrorCodes::CANNOT_STAT); + method = chooseCompressionMethod(current_path, compression_method); } + + bool mmap_failed = false; + if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) + { + try + { + if (use_table_fd) + nested_buffer = std::make_unique(table_fd, 0); + else + nested_buffer = std::make_unique(current_path, 0); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); + } + catch (const ErrnoException &) + { + /// Fallback if mmap is not supported. + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed); + mmap_failed = true; + } + } + + if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || mmap_failed)) + { + if (use_table_fd) + nested_buffer = std::make_unique(table_fd); + else + nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); + } + else + { + if (use_table_fd) + nested_buffer = std::make_unique(table_fd); + else + nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); + } + + /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. if (context->getApplicationType() == Context::ApplicationType::LOCAL || context->getApplicationType() == Context::ApplicationType::CLIENT) @@ -277,7 +331,7 @@ ColumnsDescription StorageFile::getTableStructureFromFileDescriptor(ContextPtr c { /// We will use PeekableReadBuffer to create a checkpoint, so we need a place /// where we can store the original read buffer. - read_buffer_from_fd = createReadBuffer("", true, table_fd, compression_method, context); + read_buffer_from_fd = createReadBuffer("", true, getName(), table_fd, compression_method, context); auto read_buf = std::make_unique(*read_buffer_from_fd); read_buf->setCheckpoint(); return read_buf; @@ -326,7 +380,7 @@ ColumnsDescription StorageFile::getTableStructureFromFile( if (it == paths.end()) return nullptr; - return createReadBuffer(*it++, false, -1, compression_method, context); + return createReadBuffer(*it++, false, "File", -1, compression_method, context); }; ColumnsDescription columns; @@ -543,7 +597,7 @@ public: } if (!read_buf) - read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->table_fd, storage->compression_method, context); + read_buf = createReadBuffer(current_path, storage->use_table_fd, storage->getName(), storage->table_fd, storage->compression_method, context); auto format = context->getInputFormat(storage->format_name, *read_buf, block_for_format, max_block_size, storage->format_settings); From 6979dc9f2f9fa031108d8758bbcb85f1d5661013 Mon Sep 17 00:00:00 2001 From: pufit Date: Sun, 11 Dec 2022 17:36:30 -0500 Subject: [PATCH 053/262] dummy fix, additional test --- src/Storages/StorageFile.cpp | 2 +- .../0_stateless/02497_storage_file_reader_selection.reference | 2 ++ .../queries/0_stateless/02497_storage_file_reader_selection.sh | 2 ++ 3 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 1c61370a392..c425cf4e686 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -253,7 +253,7 @@ std::unique_ptr createReadBuffer( ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); } - else + else if (mmap_failed) { if (use_table_fd) nested_buffer = std::make_unique(table_fd); diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference index 39e2f2f6f5e..8da37e4219c 100644 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.reference +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.reference @@ -2,7 +2,9 @@ key foo bar 1 +0 key foo bar 0 +1 diff --git a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh index 0e27146103e..4d9336bc1a0 100755 --- a/tests/queries/0_stateless/02497_storage_file_reader_selection.sh +++ b/tests/queries/0_stateless/02497_storage_file_reader_selection.sh @@ -15,6 +15,7 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" QUERY_ID=$RANDOM $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', 'TSV', 's String')" \ @@ -23,3 +24,4 @@ $CLICKHOUSE_CLIENT -q "SELECT * FROM file('test_02497_storage_file_reader.data', $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferMMap']) FROM system.query_log WHERE query_id='$QUERY_ID'" +$CLICKHOUSE_CLIENT -q "SELECT sum(ProfileEvents['CreatedReadBufferOrdinary']) FROM system.query_log WHERE query_id='$QUERY_ID'" From 5c52f26823c0cadcec874ba66458ce592a22bb90 Mon Sep 17 00:00:00 2001 From: pufit Date: Mon, 12 Dec 2022 00:39:08 -0500 Subject: [PATCH 054/262] ya fix. --- src/Storages/StorageFile.cpp | 105 +++++++++++++++++++---------------- 1 file changed, 57 insertions(+), 48 deletions(-) diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index c425cf4e686..1cee3e32e87 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -186,6 +186,62 @@ void checkCreationIsAllowed( } } +std::unique_ptr selectReadBuffer( + const String & current_path, + bool use_table_fd, + int table_fd, + const struct stat & file_stat, + ContextPtr context) +{ + auto read_method_string = context->getSettingsRef().storage_file_read_method.value; + LocalFSReadMethod read_method; + if (auto opt_method = magic_enum::enum_cast(read_method_string)) + read_method = *opt_method; + else + throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD); + + if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) + { + try + { + std::unique_ptr res; + if (use_table_fd) + res = std::make_unique(table_fd, 0); + else + res = std::make_unique(current_path, 0); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); + return res; + } + catch (const ErrnoException &) + { + /// Fallback if mmap is not supported. + ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed); + } + } + + std::unique_ptr res; + if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || read_method == LocalFSReadMethod::mmap)) + { + if (use_table_fd) + res = std::make_unique(table_fd); + else + res = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); + } + else + { + if (use_table_fd) + res = std::make_unique(table_fd); + else + res = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); + + ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); + } + return res; +} + std::unique_ptr createReadBuffer( const String & current_path, bool use_table_fd, @@ -194,16 +250,8 @@ std::unique_ptr createReadBuffer( const String & compression_method, ContextPtr context) { - std::unique_ptr nested_buffer; CompressionMethod method; - auto read_method_string = context->getSettingsRef().storage_file_read_method.value; - LocalFSReadMethod read_method; - if (auto opt_method = magic_enum::enum_cast(read_method_string)) - read_method = *opt_method; - else - throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD); - struct stat file_stat{}; if (use_table_fd) @@ -223,46 +271,7 @@ std::unique_ptr createReadBuffer( method = chooseCompressionMethod(current_path, compression_method); } - - bool mmap_failed = false; - if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) - { - try - { - if (use_table_fd) - nested_buffer = std::make_unique(table_fd, 0); - else - nested_buffer = std::make_unique(current_path, 0); - - ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); - } - catch (const ErrnoException &) - { - /// Fallback if mmap is not supported. - ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMapFailed); - mmap_failed = true; - } - } - - if (S_ISREG(file_stat.st_mode) && (read_method == LocalFSReadMethod::pread || mmap_failed)) - { - if (use_table_fd) - nested_buffer = std::make_unique(table_fd); - else - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); - } - else if (mmap_failed) - { - if (use_table_fd) - nested_buffer = std::make_unique(table_fd); - else - nested_buffer = std::make_unique(current_path, context->getSettingsRef().max_read_buffer_size); - - ProfileEvents::increment(ProfileEvents::CreatedReadBufferOrdinary); - } - + std::unique_ptr nested_buffer = selectReadBuffer(current_path, use_table_fd, table_fd, file_stat, context); /// For clickhouse-local and clickhouse-client add progress callback to display progress bar. if (context->getApplicationType() == Context::ApplicationType::LOCAL From fef57b4e3b615c8ec80ab4c116a2afd9c8b70f91 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 14 Dec 2022 02:12:08 +0000 Subject: [PATCH 055/262] Better thread pool --- src/Core/Settings.h | 2 -- src/Storages/System/StorageSystemReplicas.cpp | 27 +++++++++++++++---- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4be91ce311f..91647a5f165 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -155,8 +155,6 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) \ M(Bool, allow_experimental_parallel_reading_from_replicas, false, "If true, ClickHouse will send a SELECT query to all replicas of a table. It will work for any kind on MergeTree table.", 0) \ \ - M(UInt64, system_replicas_fetch_threads, 16, "The maximum number of threads to fetch data for system.replicas table.", 0) \ - \ M(Bool, skip_unavailable_shards, false, "If true, ClickHouse silently skips unavailable shards and nodes unresolvable through DNS. Shard is marked as unavailable when none of the replicas can be reached.", 0) \ \ M(UInt64, parallel_distributed_insert_select, 0, "Process distributed INSERT SELECT query in the same cluster on local tables on every shard; if set to 1 - SELECT is executed on each shard; if set to 2 - SELECT and INSERT are executed on each shard", 0) \ diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index a6afb6eff0d..d36de9afe12 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB @@ -151,30 +152,46 @@ Pipe StorageSystemReplicas::read( MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns(); - auto settings = context->getSettingsRef(); - size_t thread_pool_size = settings.system_replicas_fetch_threads; + size_t tables_size = col_database->size(); + size_t thread_pool_size = std::min(tables_size, static_cast(getNumberOfPhysicalCPUCores())); + auto settings = context->getSettingsRef(); if (settings.max_threads != 0) thread_pool_size = std::min(thread_pool_size, static_cast(settings.max_threads)); ThreadPool thread_pool(thread_pool_size); + std::atomic error_flag = false; + Exception exception; - size_t tables_size = col_database->size(); std::vector statuses(tables_size); for (size_t i = 0; i < tables_size; ++i) { - thread_pool.scheduleOrThrowOnError([i, &statuses, &replicated_tables, &col_database, &col_table, &with_zk_fields] + thread_pool.scheduleOrThrowOnError([&, i=i] { - dynamic_cast( + try + { + dynamic_cast( *replicated_tables [(*col_database)[i].safeGet()] [(*col_table)[i].safeGet()]).getStatus(statuses[i], with_zk_fields); + } + catch (...) + { + tryLogCurrentException("system.replicas", "Failed to fetch system.replicas data"); + + /// We capture one of the exceptions to be thrown later + if (!error_flag.exchange(true)) + exception = Exception(getCurrentExceptionCode(), getCurrentExceptionMessage(false)); + } }); } thread_pool.wait(); + if (error_flag) + throw exception; + for (const auto & status: statuses) { size_t col_num = 3; From b2f3b236d8e36fd3c2b2e050df69300c47fa67c4 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 14 Dec 2022 12:23:08 +0000 Subject: [PATCH 056/262] Slightly better thread pool --- src/Storages/System/StorageSystemReplicas.cpp | 31 +++++++------------ 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index d36de9afe12..51e60c779b8 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -153,45 +153,36 @@ Pipe StorageSystemReplicas::read( MutableColumns res_columns = storage_snapshot->metadata->getSampleBlock().cloneEmptyColumns(); size_t tables_size = col_database->size(); - size_t thread_pool_size = std::min(tables_size, static_cast(getNumberOfPhysicalCPUCores())); + std::vector statuses(tables_size); + size_t thread_pool_size = std::min(tables_size, static_cast(getNumberOfPhysicalCPUCores())); auto settings = context->getSettingsRef(); if (settings.max_threads != 0) thread_pool_size = std::min(thread_pool_size, static_cast(settings.max_threads)); ThreadPool thread_pool(thread_pool_size); - std::atomic error_flag = false; - Exception exception; - - std::vector statuses(tables_size); for (size_t i = 0; i < tables_size; ++i) { - thread_pool.scheduleOrThrowOnError([&, i=i] + try { - try + thread_pool.scheduleOrThrowOnError([&, i=i] { dynamic_cast( *replicated_tables [(*col_database)[i].safeGet()] [(*col_table)[i].safeGet()]).getStatus(statuses[i], with_zk_fields); - } - catch (...) - { - tryLogCurrentException("system.replicas", "Failed to fetch system.replicas data"); - - /// We capture one of the exceptions to be thrown later - if (!error_flag.exchange(true)) - exception = Exception(getCurrentExceptionCode(), getCurrentExceptionMessage(false)); - } - }); + }); + } + catch (...) + { + thread_pool.wait(); + throw; + } } thread_pool.wait(); - if (error_flag) - throw exception; - for (const auto & status: statuses) { size_t col_num = 3; From 2aff560218bd64ae7753c5bb8d776016740282a9 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 14 Dec 2022 12:50:12 +0000 Subject: [PATCH 057/262] Even better thread pool --- src/Storages/System/StorageSystemReplicas.cpp | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/src/Storages/System/StorageSystemReplicas.cpp b/src/Storages/System/StorageSystemReplicas.cpp index 51e60c779b8..7d0970620bc 100644 --- a/src/Storages/System/StorageSystemReplicas.cpp +++ b/src/Storages/System/StorageSystemReplicas.cpp @@ -164,21 +164,13 @@ Pipe StorageSystemReplicas::read( for (size_t i = 0; i < tables_size; ++i) { - try + thread_pool.scheduleOrThrowOnError([&, i=i] { - thread_pool.scheduleOrThrowOnError([&, i=i] - { - dynamic_cast( - *replicated_tables - [(*col_database)[i].safeGet()] - [(*col_table)[i].safeGet()]).getStatus(statuses[i], with_zk_fields); - }); - } - catch (...) - { - thread_pool.wait(); - throw; - } + dynamic_cast( + *replicated_tables + [(*col_database)[i].safeGet()] + [(*col_table)[i].safeGet()]).getStatus(statuses[i], with_zk_fields); + }); } thread_pool.wait(); From e2ced517dd8baf81f44cc5900d0443ac3aaf8790 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Wed, 14 Dec 2022 13:39:23 +0000 Subject: [PATCH 058/262] Add integration test test_kerberos_auth --- .../compose/docker_compose_kerberos_kdc.yml | 11 ++ tests/integration/helpers/cluster.py | 65 ++++++++- .../test_kerberos_auth/__init__.py | 0 .../clickhouse_path/EMPTY_DIR | 0 .../test_kerberos_auth/configs/kerberos.xml | 6 + .../test_kerberos_auth/configs/users.xml | 19 +++ .../kerberos_image_config.sh | 126 ++++++++++++++++++ .../test_kerberos_auth/secrets/krb.conf | 22 +++ tests/integration/test_kerberos_auth/test.py | 63 +++++++++ 9 files changed, 310 insertions(+), 2 deletions(-) create mode 100644 docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml create mode 100644 tests/integration/test_kerberos_auth/__init__.py create mode 100644 tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR create mode 100644 tests/integration/test_kerberos_auth/configs/kerberos.xml create mode 100644 tests/integration/test_kerberos_auth/configs/users.xml create mode 100644 tests/integration/test_kerberos_auth/kerberos_image_config.sh create mode 100644 tests/integration/test_kerberos_auth/secrets/krb.conf create mode 100644 tests/integration/test_kerberos_auth/test.py diff --git a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml new file mode 100644 index 00000000000..e06b1c71bb7 --- /dev/null +++ b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml @@ -0,0 +1,11 @@ +version: '2.3' + +services: + kerberoskdc: + image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} + hostname: kerberoskdc + volumes: + - ${KERBEROS_AUTH_DIR}/secrets:/tmp/keytab + - ${KERBEROS_AUTH_DIR}/../../kerberos_image_config.sh:/config.sh + - /dev/urandom:/dev/random + ports: [88, 749] diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 95d405266ae..7557e8412d2 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -388,6 +388,7 @@ class ClickHouseCluster: self.with_postgres_cluster = False self.with_kafka = False self.with_kerberized_kafka = False + self.with_kerberos_kdc = False self.with_rabbitmq = False self.with_nats = False self.with_odbc_drivers = False @@ -455,6 +456,12 @@ class ClickHouseCluster: self.kerberized_kafka_host ) + # available when with_kerberos_kdc == True + self.kerberos_kdc_host = "kerberoskdc" + self.keberos_docker_id = self.get_instance_docker_id( + self.kerberos_kdc_host + ) + # available when with_mongo == True self.mongo_host = "mongo1" self.mongo_port = get_free_port() @@ -1059,6 +1066,31 @@ class ClickHouseCluster: ] return self.base_kerberized_kafka_cmd + + def setup_kerberos_cmd( + self, instance, env_variables, docker_compose_yml_dir + ): + self.with_kerberos_kdc = True + env_variables["KERBEROS_AUTH_DIR"] = instance.path + "/" + env_variables["KERBEROS_KDC_HOST"] = self.kerberos_kdc_host + self.base_cmd.extend( + [ + "--file", + p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"), + ] + ) + self.base_kerberos_cmd = [ + "docker-compose", + "--env-file", + instance.env_file, + "--project-name", + self.project_name, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"), + ] + return self.base_kerberos_cmd + + def setup_redis_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_redis = True env_variables["REDIS_HOST"] = self.redis_host @@ -1329,6 +1361,7 @@ class ClickHouseCluster: with_mysql_cluster=False, with_kafka=False, with_kerberized_kafka=False, + with_kerberos_kdc=False, with_rabbitmq=False, with_nats=False, clickhouse_path_dir=None, @@ -1420,6 +1453,7 @@ class ClickHouseCluster: with_mysql_cluster=with_mysql_cluster, with_kafka=with_kafka, with_kerberized_kafka=with_kerberized_kafka, + with_kerberos_kdc=with_kerberos_kdc, with_rabbitmq=with_rabbitmq, with_nats=with_nats, with_nginx=with_nginx, @@ -1554,6 +1588,13 @@ class ClickHouseCluster: ) ) + if with_kerberos_kdc and not self.with_kerberos_kdc: + cmds.append( + self.setup_kerberos_cmd( + instance, env_variables, docker_compose_yml_dir + ) + ) + if with_rabbitmq and not self.with_rabbitmq: cmds.append( self.setup_rabbitmq_cmd(instance, env_variables, docker_compose_yml_dir) @@ -2124,6 +2165,11 @@ class ClickHouseCluster: logging.debug("Waiting for Kafka to start up") time.sleep(1) + def wait_kerberos_kdc_is_available(self, kafka_docker_id): + logging.debug("Waiting for Kerberos KDC to start up") + # temp code: sleep 50 seconds + time.sleep(50) + def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() while time.time() - start < timeout: @@ -2473,6 +2519,16 @@ class ClickHouseCluster: self.kerberized_kafka_docker_id, self.kerberized_kafka_port, 100 ) + if self.with_kerberos_kdc and self.base_kerberos_cmd: + logging.debug("Setup Kerberos KDC") + run_and_check( + self.base_kerberos_cmd + + common_opts + + ["--renew-anon-volumes"] + ) + self.up_called = True + self.wait_kerberos_kdc_is_available(self.keberos_docker_id) + if self.with_rabbitmq and self.base_rabbitmq_cmd: logging.debug("Setup RabbitMQ") os.makedirs(self.rabbitmq_logs_dir) @@ -2872,6 +2928,7 @@ class ClickHouseInstance: with_mysql_cluster, with_kafka, with_kerberized_kafka, + with_kerberos_kdc, with_rabbitmq, with_nats, with_nginx, @@ -2955,6 +3012,7 @@ class ClickHouseInstance: self.with_postgres_cluster = with_postgres_cluster self.with_kafka = with_kafka self.with_kerberized_kafka = with_kerberized_kafka + self.with_kerberos_kdc = with_kerberos_kdc self.with_rabbitmq = with_rabbitmq self.with_nats = with_nats self.with_nginx = with_nginx @@ -2988,7 +3046,7 @@ class ClickHouseInstance: else: self.odbc_ini_path = "" - if with_kerberized_kafka or with_kerberized_hdfs: + if with_kerberized_kafka or with_kerberized_hdfs or with_kerberos_kdc: self.keytab_path = ( "- " + os.path.dirname(self.docker_compose_path) @@ -3906,7 +3964,7 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) - if self.with_kerberized_kafka or self.with_kerberized_hdfs: + if self.with_kerberized_kafka or self.with_kerberized_hdfs or self.with_kerberos_kdc: shutil.copytree( self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets")) ) @@ -3978,6 +4036,9 @@ class ClickHouseInstance: if self.with_kerberized_kafka: depends_on.append("kerberized_kafka1") + if self.with_kerberos_kdc: + depends_on.append("kerberoskdc") + if self.with_kerberized_hdfs: depends_on.append("kerberizedhdfs1") diff --git a/tests/integration/test_kerberos_auth/__init__.py b/tests/integration/test_kerberos_auth/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR b/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/integration/test_kerberos_auth/configs/kerberos.xml b/tests/integration/test_kerberos_auth/configs/kerberos.xml new file mode 100644 index 00000000000..5b6be45e78e --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/kerberos.xml @@ -0,0 +1,6 @@ + + + TEST.CLICKHOUSE.TECH + /tmp/keytab/clickhouse.keytab + + diff --git a/tests/integration/test_kerberos_auth/configs/users.xml b/tests/integration/test_kerberos_auth/configs/users.xml new file mode 100644 index 00000000000..33d658e6335 --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/users.xml @@ -0,0 +1,19 @@ + + + + + + + + + TEST.CLICKHOUSE.TECH + + 1 + + ::/0 + + default + default + + + diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh new file mode 100644 index 00000000000..c99ce1def9f --- /dev/null +++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh @@ -0,0 +1,126 @@ +#!/bin/bash + + +set -x # trace + +: "${REALM:=TEST.CLICKHOUSE.TECH}" +: "${DOMAIN_REALM:=test.clickhouse.com}" +: "${KERB_MASTER_KEY:=masterkey}" +: "${KERB_ADMIN_USER:=admin}" +: "${KERB_ADMIN_PASS:=admin}" + +create_config() { + : "${KDC_ADDRESS:=$(hostname -f)}" + + cat>/etc/krb5.conf</var/kerberos/krb5kdc/kdc.conf< /var/kerberos/krb5kdc/kadm5.acl +} + +create_keytabs() { + rm /tmp/keytab/*.keytab + + kadmin.local -q "addprinc -randkey kuser@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kuser.keytab kuser@${REALM}" + + kadmin.local -q "addprinc -randkey HTTP/instance@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab HTTP/instance@${REALM}" + + chmod g+r /tmp/keytab/kuser.keytab + chmod g+r /tmp/keytab/clickhouse.keytab +} + +main() { + + if [ ! -f /kerberos_initialized ]; then + create_config + create_db + create_admin_user + start_kdc + + touch /kerberos_initialized + fi + + if [ ! -f /var/kerberos/krb5kdc/principal ]; then + while true; do sleep 1000; done + else + start_kdc + create_keytabs + tail -F /var/log/kerberos/krb5kdc.log + fi + +} + +[[ "$0" == "${BASH_SOURCE[0]}" ]] && main "$@" diff --git a/tests/integration/test_kerberos_auth/secrets/krb.conf b/tests/integration/test_kerberos_auth/secrets/krb.conf new file mode 100644 index 00000000000..87520f65b2d --- /dev/null +++ b/tests/integration/test_kerberos_auth/secrets/krb.conf @@ -0,0 +1,22 @@ +[logging] + default = FILE:/var/log/kerberos/krb5libs.log + kdc = FILE:/var/log/kerberos/krb5kdc.log + admin_server = FILE:/var/log/kerberos/kadmind.log + +[libdefaults] + default_realm = TEST.CLICKHOUSE.TECH + dns_lookup_realm = false + dns_lookup_kdc = false + ticket_lifetime = 15s + renew_lifetime = 15s + forwardable = true + +[realms] + TEST.CLICKHOUSE.TECH = { + kdc = kerberoskdc + admin_server = kerberoskdc + } + +[domain_realm] + .TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH + TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py new file mode 100644 index 00000000000..f4c30a69c9f --- /dev/null +++ b/tests/integration/test_kerberos_auth/test.py @@ -0,0 +1,63 @@ +import os.path as p +import random +import threading +import time +import pytest +import logging + +from helpers.cluster import ClickHouseCluster +from helpers.test_tools import TSV +from helpers.client import QueryRuntimeException + +import json +import subprocess + +import socket + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/kerberos.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, + clickhouse_path_dir="clickhouse_path", +) + + +# Fixtures + + +@pytest.fixture(scope="module") +def kerberos_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +@pytest.fixture(autouse=True) +def kerberos_setup_teardown(): + yield # run test + + +# Tests + + +def test_kerberos_auth_with_keytab(kerberos_cluster): + logging.debug("kerberos test") + instance.exec_in_container( + ["bash", "-c", "kinit -V -k -t /tmp/keytab/kuser.keytab kuser"] + ) + assert ( + instance.exec_in_container( + ["bash", "-c", "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(instance.hostname)] + ) + == "kuser\n" + ) + + +if __name__ == "__main__": + cluster.start() + input("Cluster created, press any key to destroy...") + cluster.shutdown() From 5491aa9627de96c89147229031546edf9c2d7ce9 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 15 Dec 2022 11:50:23 +0000 Subject: [PATCH 059/262] Implement wait_kerberos_kdc_is_available --- tests/integration/helpers/cluster.py | 36 +++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 7557e8412d2..ba5d02095f3 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -206,6 +206,22 @@ def check_kafka_is_available(kafka_id, kafka_port): p.communicate() return p.returncode == 0 +def check_kerberos_kdc_is_available(kerberos_kdc_id): + p = subprocess.Popen( + ( + "docker", + "exec", + "-i", + kerberos_kdc_id, + "/etc/rc.d/init.d/krb5kdc", + "status", + ), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + p.communicate() + return p.returncode == 0 + def check_rabbitmq_is_available(rabbitmq_id): p = subprocess.Popen( @@ -458,7 +474,7 @@ class ClickHouseCluster: # available when with_kerberos_kdc == True self.kerberos_kdc_host = "kerberoskdc" - self.keberos_docker_id = self.get_instance_docker_id( + self.keberos_kdc_docker_id = self.get_instance_docker_id( self.kerberos_kdc_host ) @@ -2165,10 +2181,18 @@ class ClickHouseCluster: logging.debug("Waiting for Kafka to start up") time.sleep(1) - def wait_kerberos_kdc_is_available(self, kafka_docker_id): - logging.debug("Waiting for Kerberos KDC to start up") - # temp code: sleep 50 seconds - time.sleep(50) + def wait_kerberos_kdc_is_available(self, kerberos_kdc_docker_id, max_retries=50): + retries = 0 + while True: + if check_kerberos_kdc_is_available(kerberos_kdc_docker_id): + break + else: + retries += 1 + if retries > max_retries: + raise Exception("Kerberos KDC is not available") + logging.debug("Waiting for Kerberos KDC to start up") + time.sleep(1) + def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() @@ -2527,7 +2551,7 @@ class ClickHouseCluster: + ["--renew-anon-volumes"] ) self.up_called = True - self.wait_kerberos_kdc_is_available(self.keberos_docker_id) + self.wait_kerberos_kdc_is_available(self.keberos_kdc_docker_id) if self.with_rabbitmq and self.base_rabbitmq_cmd: logging.debug("Setup RabbitMQ") From b7df68476246dfd83ca27aa591b673fe2e59b36c Mon Sep 17 00:00:00 2001 From: pufit Date: Thu, 15 Dec 2022 18:08:19 -0500 Subject: [PATCH 060/262] Enum settings, fix else branch. --- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 5 +++++ src/Core/SettingsEnums.h | 3 +++ src/Disks/IO/createReadBufferFromFileBase.cpp | 2 +- src/Storages/StorageFile.cpp | 7 +------ 5 files changed, 11 insertions(+), 8 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index a9f4280a0d1..f0cb1be73eb 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -591,7 +591,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(String, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap, pread_threadpool.", 0) \ + M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 632587106a1..26fcff1d410 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -162,4 +162,9 @@ IMPLEMENT_SETTING_ENUM(MsgPackUUIDRepresentation , ErrorCodes::BAD_ARGUMENTS, IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, {"kusto", Dialect::kusto}}) + +IMPLEMENT_SETTING_ENUM(StorageFileReadMethod, ErrorCodes::BAD_ARGUMENTS, + {{"mmap", LocalFSReadMethod::mmap}, + {"pread", LocalFSReadMethod::pread}, + {"read", LocalFSReadMethod::read}}) } diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 97c4275c4d2..236bc7e9b10 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -4,6 +4,7 @@ #include #include #include +#include namespace DB @@ -191,4 +192,6 @@ enum class Dialect }; DECLARE_SETTING_ENUM(Dialect) + +DECLARE_SETTING_ENUM_WITH_RENAME(StorageFileReadMethod, LocalFSReadMethod) } diff --git a/src/Disks/IO/createReadBufferFromFileBase.cpp b/src/Disks/IO/createReadBufferFromFileBase.cpp index fca05787959..96947ddb242 100644 --- a/src/Disks/IO/createReadBufferFromFileBase.cpp +++ b/src/Disks/IO/createReadBufferFromFileBase.cpp @@ -56,7 +56,7 @@ std::unique_ptr createReadBufferFromFileBase( if (file_size) res = std::make_unique(*settings.mmap_cache, filename, 0, *file_size); else - res = std::make_unique(*settings.mmap_cache, filename, 0, *file_size); + res = std::make_unique(*settings.mmap_cache, filename, 0); ProfileEvents::increment(ProfileEvents::CreatedReadBufferMMap); return res; diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 1cee3e32e87..e4eb912bffe 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -193,12 +193,7 @@ std::unique_ptr selectReadBuffer( const struct stat & file_stat, ContextPtr context) { - auto read_method_string = context->getSettingsRef().storage_file_read_method.value; - LocalFSReadMethod read_method; - if (auto opt_method = magic_enum::enum_cast(read_method_string)) - read_method = *opt_method; - else - throwFromErrno("Unknown read method " + read_method_string, ErrorCodes::UNKNOWN_READ_METHOD); + auto read_method = context->getSettingsRef().storage_file_read_method; if (S_ISREG(file_stat.st_mode) && read_method == LocalFSReadMethod::mmap) { From 775c700c2428519fb1ce7f151d5b83229494ebfe Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Fri, 16 Dec 2022 08:20:01 +0000 Subject: [PATCH 061/262] Add second test for the case when there is no keytab --- .../configs/kerberos_with_keytab.xml | 6 +++++ ...rberos.xml => kerberos_without_keytab.xml} | 1 - .../kerberos_image_config.sh | 10 ++++--- tests/integration/test_kerberos_auth/test.py | 27 ++++++++++++++----- 4 files changed, 33 insertions(+), 11 deletions(-) create mode 100644 tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml rename tests/integration/test_kerberos_auth/configs/{kerberos.xml => kerberos_without_keytab.xml} (64%) diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml new file mode 100644 index 00000000000..7f4e17438a6 --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/kerberos_with_keytab.xml @@ -0,0 +1,6 @@ + + + TEST.CLICKHOUSE.TECH + /tmp/keytab/clickhouse1.keytab + + diff --git a/tests/integration/test_kerberos_auth/configs/kerberos.xml b/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml similarity index 64% rename from tests/integration/test_kerberos_auth/configs/kerberos.xml rename to tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml index 5b6be45e78e..f01ceea1eb1 100644 --- a/tests/integration/test_kerberos_auth/configs/kerberos.xml +++ b/tests/integration/test_kerberos_auth/configs/kerberos_without_keytab.xml @@ -1,6 +1,5 @@ TEST.CLICKHOUSE.TECH - /tmp/keytab/clickhouse.keytab diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh index c99ce1def9f..90bbc49f2bf 100644 --- a/tests/integration/test_kerberos_auth/kerberos_image_config.sh +++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh @@ -95,11 +95,15 @@ create_keytabs() { kadmin.local -q "addprinc -randkey kuser@${REALM}" kadmin.local -q "ktadd -norandkey -k /tmp/keytab/kuser.keytab kuser@${REALM}" - kadmin.local -q "addprinc -randkey HTTP/instance@${REALM}" - kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse.keytab HTTP/instance@${REALM}" + kadmin.local -q "addprinc -randkey HTTP/instance1@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse1.keytab HTTP/instance1@${REALM}" + + kadmin.local -q "addprinc -randkey HTTP/instance2@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse2.keytab HTTP/instance2@${REALM}" chmod g+r /tmp/keytab/kuser.keytab - chmod g+r /tmp/keytab/clickhouse.keytab + chmod g+r /tmp/keytab/clickhouse1.keytab + chmod g+r /tmp/keytab/clickhouse2.keytab } main() { diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index f4c30a69c9f..cdc4bdf78e5 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -15,9 +15,16 @@ import subprocess import socket cluster = ClickHouseCluster(__file__) -instance = cluster.add_instance( - "instance", - main_configs=["configs/kerberos.xml"], +instance1 = cluster.add_instance( + "instance1", + main_configs=["configs/kerberos_with_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, + clickhouse_path_dir="clickhouse_path", +) +instance2 = cluster.add_instance( + "instance2", + main_configs=["configs/kerberos_without_keytab.xml"], user_configs=["configs/users.xml"], with_kerberos_kdc=True, clickhouse_path_dir="clickhouse_path", @@ -43,9 +50,7 @@ def kerberos_setup_teardown(): # Tests - -def test_kerberos_auth_with_keytab(kerberos_cluster): - logging.debug("kerberos test") +def make_auth(instance, user): instance.exec_in_container( ["bash", "-c", "kinit -V -k -t /tmp/keytab/kuser.keytab kuser"] ) @@ -53,10 +58,18 @@ def test_kerberos_auth_with_keytab(kerberos_cluster): instance.exec_in_container( ["bash", "-c", "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(instance.hostname)] ) - == "kuser\n" + == user + "\n" ) + +def test_kerberos_auth_with_keytab(kerberos_cluster): + make_auth(instance1, "kuser") + +def test_kerberos_auth_without_keytab(kerberos_cluster): + make_auth(instance2, "default") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From dd8df3347b46c84ebd5a4b03e66c58e4dba7eeff Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 16 Dec 2022 11:11:19 +0100 Subject: [PATCH 062/262] Addressed style review comments and removed bool variables assignment from Clone of ASTs - 40907 Parameterized views as table functions --- src/Interpreters/ExpressionAnalyzer.cpp | 2 +- src/Interpreters/InterpreterSelectQuery.cpp | 2 +- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 4 ++-- src/Interpreters/TranslateQualifiedNamesVisitor.h | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Interpreters/TreeRewriter.h | 1 - src/Parsers/ASTFunction.cpp | 2 -- src/Parsers/ASTSelectQuery.cpp | 2 -- src/Parsers/ASTSelectWithUnionQuery.cpp | 2 -- 9 files changed, 6 insertions(+), 13 deletions(-) diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 9a1bf92a28d..f94d7c6682d 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -1952,7 +1952,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult( auto & column_elem = before_where_sample.getByName(query.where()->getColumnName()); - /// If the filter column is a constant and not a query parameter, record it. + /// If the filter column is a constant, record it. if (column_elem.column) where_constant_filter_description = ConstantFilterDescription(*column_elem.column); } diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 00b66d4402a..104bfd3d037 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -510,7 +510,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { query_info.is_parameterized_view = view->isParameterizedView(); /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced. - /// ad after query is replaced, we use these parameters to substitute in the parameterized view query + /// and after query is replaced, we use these parameters to substitute in the parameterized view query if (query_info.is_parameterized_view) { parameter_values = analyzeFunctionParamValues(query_ptr); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 20e5b034619..1596cb90a14 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -252,12 +252,12 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { std::string column_name = column.name; std::string::size_type pos = 0u; - for (auto parameter : data.parameter_values) + for (const auto & parameter : data.parameter_values) { if ((pos = column_name.find(parameter.first)) != std::string::npos) { String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')"); - column_name.replace(pos,parameter.first.size(),parameter_name); + column_name.replace(pos, parameter.first.size(), parameter_name); break; } } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 777c6241d19..7fc95a19d1b 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -30,7 +30,7 @@ public: bool has_columns; NameToNameMap parameter_values; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, NameToNameMap parameter_values_ = {}) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 611e53ddaf2..13593f9711b 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -362,7 +362,7 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitorchildren.push_back(res->window_definition); } - res->prefer_subquery_to_function_formatting = prefer_subquery_to_function_formatting; - return res; } diff --git a/src/Parsers/ASTSelectQuery.cpp b/src/Parsers/ASTSelectQuery.cpp index 5bbd6161052..d2e55efadb1 100644 --- a/src/Parsers/ASTSelectQuery.cpp +++ b/src/Parsers/ASTSelectQuery.cpp @@ -37,8 +37,6 @@ ASTPtr ASTSelectQuery::clone() const for (const auto & child : children) res->children.push_back(child->clone()); - res->has_query_parameters = has_query_parameters; - return res; } diff --git a/src/Parsers/ASTSelectWithUnionQuery.cpp b/src/Parsers/ASTSelectWithUnionQuery.cpp index b0030294727..9550752b1f3 100644 --- a/src/Parsers/ASTSelectWithUnionQuery.cpp +++ b/src/Parsers/ASTSelectWithUnionQuery.cpp @@ -23,8 +23,6 @@ ASTPtr ASTSelectWithUnionQuery::clone() const res->list_of_modes = list_of_modes; res->set_of_modes = set_of_modes; - res->has_query_parameters = has_query_parameters; - cloneOutputOptions(*res); return res; } From 22c2956a067fe7013f6c499d7fde943298be2b79 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 16 Dec 2022 12:19:11 +0100 Subject: [PATCH 063/262] Updated comment on fetching parameter values from query and fixed style comment- 40907 Parameterized views as table functions --- src/Interpreters/InterpreterSelectQuery.cpp | 3 +++ src/Interpreters/TreeRewriter.cpp | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 104bfd3d037..9714596dce8 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -510,6 +510,9 @@ InterpreterSelectQuery::InterpreterSelectQuery( { query_info.is_parameterized_view = view->isParameterizedView(); /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced. + /// replaceWithSubquery replaces the function child and adds the subquery in its place. + /// the parameters are children of function child, if function is replaced the parameters are also gone from tree + /// So we need to get the parameters before they are removed from the tree /// and after query is replaced, we use these parameters to substitute in the parameterized view query if (query_info.is_parameterized_view) { diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 13593f9711b..ab3189f39d2 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1409,7 +1409,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( if ((pos = column_name.find(parameter.first)) != std::string::npos) { String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')"); - column.name.replace(pos,parameter.first.size(),parameter_name); + column.name.replace(pos, parameter.first.size(), parameter_name); break; } } From 2d942af7b4a0fd644b727c27adb9bc90514c02b6 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 16 Dec 2022 11:55:50 -0500 Subject: [PATCH 064/262] Fix codestyle, fix test. --- src/Core/Settings.h | 2 +- src/Storages/StorageFile.cpp | 1 - tests/queries/0_stateless/02473_infile_progress.py | 6 +++++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index e28d01d886e..4c14fea742b 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -592,7 +592,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap", 0) \ + M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index e4eb912bffe..0140a6110f9 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -79,7 +79,6 @@ namespace ErrorCodes extern const int LOGICAL_ERROR; extern const int CANNOT_APPEND_TO_FILE; extern const int CANNOT_EXTRACT_TABLE_STRUCTURE; - extern const int UNKNOWN_READ_METHOD; } namespace diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 6c1c32822d3..053175031d2 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -14,7 +14,11 @@ log = None # uncomment the line below for debugging # log=sys.stdout -with client(name="client>", log=log) as client1: +with client( + name="client>", + log=log, + command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") + " client --storage_file_read_method=pread" +) as client1: filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv" client1.expect(prompt) From 34e669b8b5505d9858f093c210438072b7486ad9 Mon Sep 17 00:00:00 2001 From: pufit Date: Fri, 16 Dec 2022 12:03:55 -0500 Subject: [PATCH 065/262] Fix test codestyle. --- tests/queries/0_stateless/02473_infile_progress.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 053175031d2..28ad2c8413a 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -15,9 +15,10 @@ log = None # log=sys.stdout with client( - name="client>", - log=log, - command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") + " client --storage_file_read_method=pread" + name="client>", + log=log, + command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") + + " client --storage_file_read_method=pread" ) as client1: filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv" From 64c0c6ae994e1ecc40614500263a773acc8fdf02 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 19 Dec 2022 11:28:49 +0000 Subject: [PATCH 066/262] Refactor test_kerberos_auth test --- .../compose/docker_compose_kerberos_kdc.yml | 4 +- tests/integration/helpers/cluster.py | 57 +++++++++---------- tests/integration/test_kerberos_auth/test.py | 48 ++++++++++------ 3 files changed, 61 insertions(+), 48 deletions(-) diff --git a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml index e06b1c71bb7..3ce9a6df1fb 100644 --- a/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml +++ b/docker/test/integration/runner/compose/docker_compose_kerberos_kdc.yml @@ -5,7 +5,7 @@ services: image: clickhouse/kerberos-kdc:${DOCKER_KERBEROS_KDC_TAG:-latest} hostname: kerberoskdc volumes: - - ${KERBEROS_AUTH_DIR}/secrets:/tmp/keytab - - ${KERBEROS_AUTH_DIR}/../../kerberos_image_config.sh:/config.sh + - ${KERBEROS_KDC_DIR}/secrets:/tmp/keytab + - ${KERBEROS_KDC_DIR}/../kerberos_image_config.sh:/config.sh - /dev/urandom:/dev/random ports: [88, 749] diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index ba5d02095f3..5c93f64e57a 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -206,6 +206,7 @@ def check_kafka_is_available(kafka_id, kafka_port): p.communicate() return p.returncode == 0 + def check_kerberos_kdc_is_available(kerberos_kdc_id): p = subprocess.Popen( ( @@ -387,6 +388,7 @@ class ClickHouseCluster: self.base_mysql_cmd = [] self.base_kafka_cmd = [] self.base_kerberized_kafka_cmd = [] + self.base_kerberos_kdc_cmd = [] self.base_rabbitmq_cmd = [] self.base_nats_cmd = [] self.base_cassandra_cmd = [] @@ -474,9 +476,7 @@ class ClickHouseCluster: # available when with_kerberos_kdc == True self.kerberos_kdc_host = "kerberoskdc" - self.keberos_kdc_docker_id = self.get_instance_docker_id( - self.kerberos_kdc_host - ) + self.keberos_kdc_docker_id = self.get_instance_docker_id(self.kerberos_kdc_host) # available when with_mongo == True self.mongo_host = "mongo1" @@ -1082,12 +1082,9 @@ class ClickHouseCluster: ] return self.base_kerberized_kafka_cmd - - def setup_kerberos_cmd( - self, instance, env_variables, docker_compose_yml_dir - ): + def setup_kerberos_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_kerberos_kdc = True - env_variables["KERBEROS_AUTH_DIR"] = instance.path + "/" + env_variables["KERBEROS_KDC_DIR"] = self.instances_dir + "/" env_variables["KERBEROS_KDC_HOST"] = self.kerberos_kdc_host self.base_cmd.extend( [ @@ -1095,7 +1092,7 @@ class ClickHouseCluster: p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"), ] ) - self.base_kerberos_cmd = [ + self.base_kerberos_kdc_cmd = [ "docker-compose", "--env-file", instance.env_file, @@ -1104,8 +1101,7 @@ class ClickHouseCluster: "--file", p.join(docker_compose_yml_dir, "docker_compose_kerberos_kdc.yml"), ] - return self.base_kerberos_cmd - + return self.base_kerberos_kdc_cmd def setup_redis_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_redis = True @@ -1606,9 +1602,7 @@ class ClickHouseCluster: if with_kerberos_kdc and not self.with_kerberos_kdc: cmds.append( - self.setup_kerberos_cmd( - instance, env_variables, docker_compose_yml_dir - ) + self.setup_kerberos_cmd(instance, env_variables, docker_compose_yml_dir) ) if with_rabbitmq and not self.with_rabbitmq: @@ -2193,7 +2187,6 @@ class ClickHouseCluster: logging.debug("Waiting for Kerberos KDC to start up") time.sleep(1) - def wait_hdfs_to_start(self, timeout=300, check_marker=False): start = time.time() while time.time() - start < timeout: @@ -2543,12 +2536,10 @@ class ClickHouseCluster: self.kerberized_kafka_docker_id, self.kerberized_kafka_port, 100 ) - if self.with_kerberos_kdc and self.base_kerberos_cmd: + if self.with_kerberos_kdc and self.base_kerberos_kdc_cmd: logging.debug("Setup Kerberos KDC") run_and_check( - self.base_kerberos_cmd - + common_opts - + ["--renew-anon-volumes"] + self.base_kerberos_kdc_cmd + common_opts + ["--renew-anon-volumes"] ) self.up_called = True self.wait_kerberos_kdc_is_available(self.keberos_kdc_docker_id) @@ -3071,15 +3062,13 @@ class ClickHouseInstance: self.odbc_ini_path = "" if with_kerberized_kafka or with_kerberized_hdfs or with_kerberos_kdc: - self.keytab_path = ( - "- " - + os.path.dirname(self.docker_compose_path) - + "/secrets:/tmp/keytab" - ) + if with_kerberos_kdc: + base_secrets_dir = self.cluster.instances_dir + else: + base_secrets_dir = os.path.dirname(self.docker_compose_path) + self.keytab_path = "- " + base_secrets_dir + "/secrets:/tmp/keytab" self.krb5_conf = ( - "- " - + os.path.dirname(self.docker_compose_path) - + "/secrets/krb.conf:/etc/krb5.conf:ro" + "- " + base_secrets_dir + "/secrets/krb.conf:/etc/krb5.conf:ro" ) else: self.keytab_path = "" @@ -3988,9 +3977,19 @@ class ClickHouseInstance: if self.with_zookeeper: shutil.copy(self.zookeeper_config_path, conf_d_dir) - if self.with_kerberized_kafka or self.with_kerberized_hdfs or self.with_kerberos_kdc: + if ( + self.with_kerberized_kafka + or self.with_kerberized_hdfs + or self.with_kerberos_kdc + ): + if self.with_kerberos_kdc: + base_secrets_dir = self.cluster.instances_dir + else: + base_secrets_dir = self.path shutil.copytree( - self.kerberos_secrets_dir, p.abspath(p.join(self.path, "secrets")) + self.kerberos_secrets_dir, + p.abspath(p.join(base_secrets_dir, "secrets")), + dirs_exist_ok=True, ) if self.with_coredns: diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index cdc4bdf78e5..3ab9bc0ef12 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -43,31 +43,45 @@ def kerberos_cluster(): cluster.shutdown() -@pytest.fixture(autouse=True) -def kerberos_setup_teardown(): - yield # run test - - # Tests -def make_auth(instance, user): - instance.exec_in_container( - ["bash", "-c", "kinit -V -k -t /tmp/keytab/kuser.keytab kuser"] - ) - assert ( - instance.exec_in_container( - ["bash", "-c", "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format(instance.hostname)] - ) - == user + "\n" - ) +def exec_kinit(instance): + instance.exec_in_container( + ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"] + ) def test_kerberos_auth_with_keytab(kerberos_cluster): - make_auth(instance1, "kuser") + exec_kinit(instance1) + assert ( + instance1.exec_in_container( + [ + "bash", + "-c", + "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format( + instance1.hostname + ), + ] + ) + == "kuser\n" + ) + def test_kerberos_auth_without_keytab(kerberos_cluster): - make_auth(instance2, "default") + exec_kinit(instance2) + assert ( + "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name." + in instance2.exec_in_container( + [ + "bash", + "-c", + "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format( + instance2.hostname + ), + ] + ) + ) if __name__ == "__main__": From c26c257c57b50cc9f48e9a0b318adbb24990d920 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 19 Dec 2022 11:39:30 +0000 Subject: [PATCH 067/262] Refactor test.py --- tests/integration/test_kerberos_auth/test.py | 36 +++++++------------- 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index 3ab9bc0ef12..64df0278c3e 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -46,41 +46,29 @@ def kerberos_cluster(): # Tests -def exec_kinit(instance): +def make_auth(instance): instance.exec_in_container( ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"] ) + return instance.exec_in_container( + [ + "bash", + "-c", + "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format( + instance.hostname + ), + ] + ) def test_kerberos_auth_with_keytab(kerberos_cluster): - exec_kinit(instance1) - assert ( - instance1.exec_in_container( - [ - "bash", - "-c", - "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format( - instance1.hostname - ), - ] - ) - == "kuser\n" - ) + assert make_auth(instance1) == "kuser\n" def test_kerberos_auth_without_keytab(kerberos_cluster): - exec_kinit(instance2) assert ( "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name." - in instance2.exec_in_container( - [ - "bash", - "-c", - "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format( - instance2.hostname - ), - ] - ) + in make_auth(instance2) ) From 8a692432de0e6c1712818f8b073fbe0297e281e8 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 19 Dec 2022 11:48:10 +0000 Subject: [PATCH 068/262] Remove unneeded headers from test.py --- .../test_kerberos_auth/clickhouse_path/EMPTY_DIR | 0 tests/integration/test_kerberos_auth/test.py | 15 --------------- 2 files changed, 15 deletions(-) delete mode 100644 tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR diff --git a/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR b/tests/integration/test_kerberos_auth/clickhouse_path/EMPTY_DIR deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index 64df0278c3e..df6233e0cbb 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -1,18 +1,5 @@ -import os.path as p -import random -import threading -import time import pytest -import logging - from helpers.cluster import ClickHouseCluster -from helpers.test_tools import TSV -from helpers.client import QueryRuntimeException - -import json -import subprocess - -import socket cluster = ClickHouseCluster(__file__) instance1 = cluster.add_instance( @@ -20,14 +7,12 @@ instance1 = cluster.add_instance( main_configs=["configs/kerberos_with_keytab.xml"], user_configs=["configs/users.xml"], with_kerberos_kdc=True, - clickhouse_path_dir="clickhouse_path", ) instance2 = cluster.add_instance( "instance2", main_configs=["configs/kerberos_without_keytab.xml"], user_configs=["configs/users.xml"], with_kerberos_kdc=True, - clickhouse_path_dir="clickhouse_path", ) From 5001cf9fa21121f6a90201b780b9676ca1fe5f95 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 19 Dec 2022 15:05:38 +0100 Subject: [PATCH 069/262] Updated to get column data type based not function , not the query parameter type , added test for the same - 40907 Parameterized views as table functions --- src/Interpreters/InterpreterSelectQuery.cpp | 5 +++- .../TranslateQualifiedNamesVisitor.cpp | 10 +++++-- .../TranslateQualifiedNamesVisitor.h | 4 ++- src/Interpreters/TreeRewriter.cpp | 20 ++++++++----- src/Interpreters/TreeRewriter.h | 3 +- src/Parsers/QueryParameterVisitor.cpp | 28 ++++++++++++++----- src/Parsers/QueryParameterVisitor.h | 2 ++ src/Storages/StorageSnapshot.cpp | 4 +-- src/Storages/StorageView.cpp | 2 ++ src/Storages/StorageView.h | 6 ++++ .../02428_parameterized_view.reference | 5 ++++ .../0_stateless/02428_parameterized_view.sql | 5 ++++ 12 files changed, 72 insertions(+), 22 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 9714596dce8..8381fee22b1 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -506,6 +506,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( /// Allow push down and other optimizations for VIEW: replace with subquery and rewrite it. ASTPtr view_table; NameToNameMap parameter_values; + NameToNameMap parameter_types; if (view) { query_info.is_parameterized_view = view->isParameterizedView(); @@ -518,6 +519,7 @@ InterpreterSelectQuery::InterpreterSelectQuery( { parameter_values = analyzeFunctionParamValues(query_ptr); view->setParameterValues(parameter_values); + parameter_types = view->getParameterValues(); } view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot, view->isParameterizedView()); if (query_info.is_parameterized_view) @@ -535,7 +537,8 @@ InterpreterSelectQuery::InterpreterSelectQuery( required_result_column_names, table_join, query_info.is_parameterized_view, - parameter_values); + parameter_values, + parameter_types); query_info.syntax_analyzer_result = syntax_analyzer_result; context->setDistributed(syntax_analyzer_result->is_remote_storage); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 1596cb90a14..ff97eccab58 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -256,9 +256,13 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt { if ((pos = column_name.find(parameter.first)) != std::string::npos) { - String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')"); - column_name.replace(pos, parameter.first.size(), parameter_name); - break; + auto parameter_datatype_iterator = data.parameter_types.find(parameter.first); + if (parameter_datatype_iterator != data.parameter_types.end()) + { + String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); + column_name.replace(pos, parameter.first.size(), parameter_name); + break; + } } } addIdentifier(columns, table.table, column_name); diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 7fc95a19d1b..6c804ad6c90 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -29,12 +29,14 @@ public: std::unordered_set join_using_columns; bool has_columns; NameToNameMap parameter_values; + NameToNameMap parameter_types; - Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}) + Data(const NameSet & source_columns_, const TablesWithColumns & tables_, bool has_columns_ = true, const NameToNameMap & parameter_values_ = {}, const NameToNameMap & parameter_types_ = {}) : source_columns(source_columns_) , tables(tables_) , has_columns(has_columns_) , parameter_values(parameter_values_) + , parameter_types(parameter_types_) {} bool hasColumn(const String & name) const { return source_columns.count(name); } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index ab3189f39d2..3c7367b869e 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -362,10 +362,11 @@ using ReplacePositionalArgumentsVisitor = InDepthNodeVisitor table_join, bool is_parameterized_view, - const NameToNameMap parameter_values) const + const NameToNameMap parameter_values, + const NameToNameMap parameter_types) const { auto * select_query = query->as(); if (!select_query) @@ -1343,7 +1345,7 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.analyzed_join->setColumnsFromJoinedTable(std::move(columns_from_joined_table), source_columns_set, right_table.table.getQualifiedNamePrefix()); } - translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values); + translateQualifiedNames(query, *select_query, source_columns_set, tables_with_columns, parameter_values, parameter_types); /// Optimizes logical expressions. LogicalExpressionsOptimizer(select_query, settings.optimize_min_equality_disjunction_chain_length.value).perform(); @@ -1408,9 +1410,13 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( { if ((pos = column_name.find(parameter.first)) != std::string::npos) { - String parameter_name("_CAST(" + parameter.second + ", '" + column.type->getName() + "')"); - column.name.replace(pos, parameter.first.size(), parameter_name); - break; + auto parameter_datatype_iterator = parameter_types.find(parameter.first); + if (parameter_datatype_iterator != parameter_types.end()) + { + String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); + column.name.replace(pos, parameter.first.size(), parameter_name); + break; + } } } } diff --git a/src/Interpreters/TreeRewriter.h b/src/Interpreters/TreeRewriter.h index 1f400588c6e..b94043b8983 100644 --- a/src/Interpreters/TreeRewriter.h +++ b/src/Interpreters/TreeRewriter.h @@ -131,7 +131,8 @@ public: const Names & required_result_columns = {}, std::shared_ptr table_join = {}, bool is_parameterized_view = false, - const NameToNameMap parameter_values = {}) const; + const NameToNameMap parameter_values = {}, + const NameToNameMap parameter_types = {}) const; private: static void normalize(ASTPtr & query, Aliases & aliases, const NameSet & source_columns_set, bool ignore_alias, const Settings & settings, bool allow_self_aliases, ContextPtr context_, bool is_create_parameterized_view = false); diff --git a/src/Parsers/QueryParameterVisitor.cpp b/src/Parsers/QueryParameterVisitor.cpp index 14750845034..1282c12cce6 100644 --- a/src/Parsers/QueryParameterVisitor.cpp +++ b/src/Parsers/QueryParameterVisitor.cpp @@ -10,8 +10,8 @@ namespace DB class QueryParameterVisitor { public: - explicit QueryParameterVisitor(NameSet & parameters_name) - : query_parameters(parameters_name) + explicit QueryParameterVisitor(NameToNameMap & parameters) + : query_parameters(parameters) { } @@ -27,30 +27,44 @@ public: } private: - NameSet & query_parameters; + NameToNameMap & query_parameters; void visitQueryParameter(const ASTQueryParameter & query_parameter) { - query_parameters.insert(query_parameter.name); + query_parameters[query_parameter.name]= query_parameter.type; } }; NameSet analyzeReceiveQueryParams(const std::string & query) { - NameSet query_params; + NameToNameMap query_params; const char * query_begin = query.data(); const char * query_end = query.data() + query.size(); ParserQuery parser(query_end); ASTPtr extract_query_ast = parseQuery(parser, query_begin, query_end, "analyzeReceiveQueryParams", 0, 0); QueryParameterVisitor(query_params).visit(extract_query_ast); - return query_params; + + NameSet query_param_names; + for (const auto & query_param : query_params) + query_param_names.insert(query_param.first); + return query_param_names; } NameSet analyzeReceiveQueryParams(const ASTPtr & ast) { - NameSet query_params; + NameToNameMap query_params; + QueryParameterVisitor(query_params).visit(ast); + NameSet query_param_names; + for (const auto & query_param : query_params) + query_param_names.insert(query_param.first); + return query_param_names; +} + +NameToNameMap analyzeReceiveQueryParamsWithType(const ASTPtr & ast) +{ + NameToNameMap query_params; QueryParameterVisitor(query_params).visit(ast); return query_params; } diff --git a/src/Parsers/QueryParameterVisitor.h b/src/Parsers/QueryParameterVisitor.h index 6d9d49e1ed2..40b2fa6978f 100644 --- a/src/Parsers/QueryParameterVisitor.h +++ b/src/Parsers/QueryParameterVisitor.h @@ -13,4 +13,6 @@ NameSet analyzeReceiveQueryParams(const std::string & query); NameSet analyzeReceiveQueryParams(const ASTPtr & ast); +NameToNameMap analyzeReceiveQueryParamsWithType(const ASTPtr & ast); + } diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 8dfb7b288e3..b88e07d93b8 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -119,10 +119,10 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & name : column_names) { - std::string column_name = name; + const std::string & column_name = name; std::string substituted_column_name = name; std::string::size_type pos = 0u; - for (auto parameter : parameter_values) + for (const auto & parameter : parameter_values) { if ((pos = substituted_column_name.find("_CAST(" + parameter.second)) != std::string::npos) { diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index d9e79607ce4..f9ab6b0bc40 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -24,6 +24,7 @@ #include #include +#include namespace DB { @@ -102,6 +103,7 @@ StorageView::StorageView( description.inner_query = query.select->ptr(); is_parameterized_view = query.isParameterizedView(); + parameter_types = analyzeReceiveQueryParamsWithType(description.inner_query); storage_metadata.setSelectQuery(description); setInMemoryMetadata(storage_metadata); } diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index e913e98901f..756106a95d1 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -50,9 +50,15 @@ public: parameter_values = parameter_values_; } + NameToNameMap getParameterValues() const + { + return parameter_types; + } + protected: bool is_parameterized_view; NameToNameMap parameter_values; + NameToNameMap parameter_types; }; } diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 9ec1cb0efd2..38355617601 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -20,3 +20,8 @@ FROM 30 40 60 +1 +2 +3 +3 +5 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index 9d55dba970a..d2118cd1279 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -4,6 +4,7 @@ DROP VIEW IF EXISTS pv3; DROP VIEW IF EXISTS pv4; DROP VIEW IF EXISTS pv5; DROP VIEW IF EXISTS pv6; +DROP VIEW IF EXISTS pv7; DROP VIEW IF EXISTS v1; DROP TABLE IF EXISTS Catalog; DROP TABLE IF EXISTS system.pv1; @@ -68,11 +69,15 @@ SELECT Price FROM pv5(price=30, quantity=8,limit=1); CREATE VIEW pv6 AS SELECT Price+{price:UInt64} FROM Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}; SELECT * FROM pv6(price=10); +CREATE VIEW pv7 AS SELECT Price/{price:UInt64} FROM Catalog ORDER BY Price; +SELECT * FROM pv7(price=10); + DROP VIEW pv1; DROP VIEW pv2; DROP VIEW pv3; DROP VIEW pv5; DROP VIEW pv6; +DROP VIEW pv7; DROP VIEW v1; DROP TABLE Catalog; DROP TABLE system.pv1; From 616efdd3364dd45b00d7ea807748f4b44cc1604e Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 19 Dec 2022 15:28:15 +0000 Subject: [PATCH 070/262] Move keytab init into GSSAcceptorContext::initHandles() --- src/Access/ExternalAuthenticators.cpp | 9 +-------- src/Access/GSSAcceptor.cpp | 10 ++++++++++ src/Access/GSSAcceptor.h | 1 + 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 6dcbd7e10b1..8709b3af2d5 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -9,7 +9,6 @@ #include #include -#include namespace DB { @@ -223,13 +222,7 @@ void parseKerberosParams(GSSAcceptorContext::Params & params, const Poco::Util:: params.realm = config.getString("kerberos.realm", ""); params.principal = config.getString("kerberos.principal", ""); - -#if USE_KRB5 - String keytab = config.getString("kerberos.keytab", ""); - if (!keytab.empty() && std::filesystem::exists(keytab)) - if (krb5_gss_register_acceptor_identity(keytab.c_str())) - throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS); -#endif + params.keytab = config.getString("kerberos.keytab", ""); } } diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp index 02fa3f8e1d3..b107293ce39 100644 --- a/src/Access/GSSAcceptor.cpp +++ b/src/Access/GSSAcceptor.cpp @@ -6,6 +6,7 @@ #include #include +#include namespace DB @@ -261,6 +262,15 @@ void GSSAcceptorContext::initHandles() resetHandles(); + if (!params.keytab.empty()) + { + if (!std::filesystem::exists(params.keytab)) + throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS); + + if (krb5_gss_register_acceptor_identity(params.keytab.c_str())) + throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS); + } + if (!params.principal.empty()) { if (!params.realm.empty()) diff --git a/src/Access/GSSAcceptor.h b/src/Access/GSSAcceptor.h index c2930201a93..ba448ae474e 100644 --- a/src/Access/GSSAcceptor.h +++ b/src/Access/GSSAcceptor.h @@ -29,6 +29,7 @@ public: String mechanism = "1.2.840.113554.1.2.2"; // OID: krb5 String principal; String realm; + String keytab; }; explicit GSSAcceptorContext(const Params & params_); From cdc3912743bea2022aa9e1b6d482d1685af38c6f Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 20 Dec 2022 22:44:27 +0100 Subject: [PATCH 071/262] fix incorrect usages of getPartName() --- .../BackupCoordinationReplicatedTables.cpp | 4 +-- src/Storages/MergeTree/ActiveDataPartSet.cpp | 4 +-- src/Storages/MergeTree/DropPartsRanges.cpp | 2 +- .../MergeTree/FutureMergedMutatedPart.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- src/Storages/MergeTree/MergeTreeData.cpp | 26 +++++++++---------- .../MergeTree/MergeTreeDataWriter.cpp | 2 +- .../MergeTree/MergeTreeDeduplicationLog.cpp | 4 +-- src/Storages/MergeTree/MergeTreePartInfo.cpp | 20 +++++++++++++- src/Storages/MergeTree/MergeTreePartInfo.h | 4 ++- src/Storages/MergeTree/MergeTreeSink.cpp | 2 +- .../PartMovesBetweenShardsOrchestrator.cpp | 2 +- .../MergeTree/ReplicatedMergeTreeQueue.cpp | 2 +- src/Storages/StorageReplicatedMergeTree.cpp | 15 ++++++----- 14 files changed, 57 insertions(+), 34 deletions(-) diff --git a/src/Backups/BackupCoordinationReplicatedTables.cpp b/src/Backups/BackupCoordinationReplicatedTables.cpp index 910719b5365..27977445641 100644 --- a/src/Backups/BackupCoordinationReplicatedTables.cpp +++ b/src/Backups/BackupCoordinationReplicatedTables.cpp @@ -78,9 +78,9 @@ public: throw Exception( ErrorCodes::CANNOT_BACKUP_TABLE, "Intersected parts detected: {} on replica {} and {} on replica {}", - part.info.getPartName(), + part.info.getPartNameForLogs(), *part.replica_name, - new_part_info.getPartName(), + new_part_info.getPartNameForLogs(), *replica_name); } ++last_it; diff --git a/src/Storages/MergeTree/ActiveDataPartSet.cpp b/src/Storages/MergeTree/ActiveDataPartSet.cpp index 13976f6ec45..b438f18c1b8 100644 --- a/src/Storages/MergeTree/ActiveDataPartSet.cpp +++ b/src/Storages/MergeTree/ActiveDataPartSet.cpp @@ -47,7 +47,7 @@ bool ActiveDataPartSet::addImpl(const MergeTreePartInfo & part_info, const Strin if (!part_info.contains(it->first)) { if (!part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects previous part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs()); ++it; break; } @@ -70,7 +70,7 @@ bool ActiveDataPartSet::addImpl(const MergeTreePartInfo & part_info, const Strin } if (it != part_info_to_name.end() && !part_info.isDisjoint(it->first)) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Part {} intersects part {}. It is a bug or a result of manual intervention in the ZooKeeper data.", name, it->first.getPartNameForLogs()); part_info_to_name.emplace(part_info, name); return true; diff --git a/src/Storages/MergeTree/DropPartsRanges.cpp b/src/Storages/MergeTree/DropPartsRanges.cpp index d467a7cac3d..bc4f20a3471 100644 --- a/src/Storages/MergeTree/DropPartsRanges.cpp +++ b/src/Storages/MergeTree/DropPartsRanges.cpp @@ -19,7 +19,7 @@ bool DropPartsRanges::isAffectedByDropRange(const std::string & new_part_name, s { if (!drop_range.isDisjoint(entry_info)) { - postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartName(), new_part_name); + postpone_reason = fmt::format("Has DROP RANGE affecting entry {} producing part {}. Will postpone it's execution.", drop_range.getPartNameForLogs(), new_part_name); return true; } } diff --git a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp index 019b24f6916..ffd444b7135 100644 --- a/src/Storages/MergeTree/FutureMergedMutatedPart.cpp +++ b/src/Storages/MergeTree/FutureMergedMutatedPart.cpp @@ -81,7 +81,7 @@ void FutureMergedMutatedPart::assign(MergeTreeData::DataPartsVector parts_, Merg name = part_info.getPartNameV0(min_date, max_date); } else - name = part_info.getPartName(); + name = part_info.getPartNameV1(); } void FutureMergedMutatedPart::updatePath(const MergeTreeData & storage, const IReservation * reservation) diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 02a7a2ae641..3ed5dc4a710 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -375,7 +375,7 @@ String IMergeTreeDataPart::getNewName(const MergeTreePartInfo & new_part_info) c return new_part_info.getPartNameV0(min_date, max_date); } else - return new_part_info.getPartName(); + return new_part_info.getPartNameV1(); } std::optional IMergeTreeDataPart::getColumnPosition(const String & column_name) const diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index b92fada821c..214f0fc2e77 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -2034,7 +2034,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t MergeTreePartInfo range_info = part->info; range_info.level = static_cast(range_info.max_block - range_info.min_block); range_info.mutation = 0; - independent_ranges_set.addImpl(range_info, range_info.getPartName()); + independent_ranges_set.addImpl(range_info, range_info.getPartNameV1()); } auto independent_ranges_infos = independent_ranges_set.getPartInfos(); @@ -2050,7 +2050,7 @@ void MergeTreeData::clearPartsFromFilesystemImpl(const DataPartsVector & parts_t parts_in_range.push_back(part); sum_of_ranges += parts_in_range.size(); - LOG_TRACE(log, "Scheduling removal of {} parts in blocks range {}", parts_in_range.size(), range.getPartName()); + LOG_TRACE(log, "Scheduling removal of {} parts in blocks range {}", parts_in_range.size(), range.getPartNameForLogs()); pool.scheduleOrThrowOnError([&part_names_mutex, part_names_succeed, thread_group = CurrentThread::getGroup(), batch = std::move(parts_in_range)] { @@ -3335,7 +3335,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW DataPartsVector parts_to_remove; if (drop_range.min_block > drop_range.max_block) - throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartName()); + throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid drop range: {}", drop_range.getPartNameForLogs()); auto partition_range = getVisibleDataPartsVectorInPartition(txn, drop_range.partition_id, &lock); @@ -3367,7 +3367,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW bool is_covered_by_min_max_block = part->info.min_block <= drop_range.min_block && part->info.max_block >= drop_range.max_block && part->info.getMutationVersion() >= drop_range.getMutationVersion(); if (is_covered_by_min_max_block) { - LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartName(), part->name); + LOG_INFO(log, "Skipping drop range for part {} because covering part {} already exists", drop_range.getPartNameForLogs(), part->name); return {}; } } @@ -3378,7 +3378,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW { /// Intersect left border throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}", - part->name, drop_range.getPartName()); + part->name, drop_range.getPartNameForLogs()); } continue; @@ -3392,7 +3392,7 @@ MergeTreeData::PartsToRemoveFromZooKeeper MergeTreeData::removePartsInRangeFromW { /// Intersect right border throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected merged part {} intersecting drop range {}", - part->name, drop_range.getPartName()); + part->name, drop_range.getPartNameForLogs()); } parts_to_remove.emplace_back(part); @@ -4150,8 +4150,8 @@ void MergeTreeData::movePartitionToDisk(const ASTPtr & partition, const String & { auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version); parts.push_back(getActiveContainingPart(part_info)); - if (!parts.back() || parts.back()->name != part_info.getPartName()) - throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART); + if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version)) + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id); } else parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); @@ -4192,18 +4192,18 @@ void MergeTreeData::movePartitionToVolume(const ASTPtr & partition, const String { auto part_info = MergeTreePartInfo::fromPartName(partition_id, format_version); parts.emplace_back(getActiveContainingPart(part_info)); - if (!parts.back() || parts.back()->name != part_info.getPartName()) - throw Exception("Part " + partition_id + " is not exists or not active", ErrorCodes::NO_SUCH_DATA_PART); + if (!parts.back() || parts.back()->name != part_info.getPartNameAndCheckFormat(format_version)) + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Part {} is not exists or not active", partition_id); } else parts = getVisibleDataPartsVectorInPartition(local_context, partition_id); auto volume = getStoragePolicy()->getVolumeByName(name); if (!volume) - throw Exception("Volume " + name + " does not exists on policy " + getStoragePolicy()->getName(), ErrorCodes::UNKNOWN_DISK); + throw Exception(ErrorCodes::UNKNOWN_DISK, "Volume {} does not exists on policy {}", name, getStoragePolicy()->getName()); if (parts.empty()) - throw Exception("Nothing to move (check that the partition exists).", ErrorCodes::NO_SUCH_DATA_PART); + throw Exception(ErrorCodes::NO_SUCH_DATA_PART, "Nothing to move (check that the partition exists)."); std::erase_if(parts, [&](auto part_ptr) { @@ -4570,7 +4570,7 @@ void MergeTreeData::restorePartsFromBackup(RestorerFromBackup & restorer, const void MergeTreeData::restorePartFromBackup(std::shared_ptr restored_parts_holder, const MergeTreePartInfo & part_info, const String & part_path_in_backup) const { - String part_name = part_info.getPartName(); + String part_name = part_info.getPartNameAndCheckFormat(format_version); auto backup = restored_parts_holder->getBackup(); UInt64 total_size_of_part = 0; diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index c50c01ea356..7d664e69703 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -362,7 +362,7 @@ MergeTreeDataWriter::TemporaryPart MergeTreeDataWriter::writeTempPart( part_name = new_part_info.getPartNameV0(min_date, max_date); } else - part_name = new_part_info.getPartName(); + part_name = new_part_info.getPartNameV1(); String part_dir = TMP_PREFIX + part_name; temp_part.temporary_directory_lock = data.getTemporaryPartDirectoryHolder(part_dir); diff --git a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp index d0f4d8b3604..b843ce6a078 100644 --- a/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp +++ b/src/Storages/MergeTree/MergeTreeDeduplicationLog.cpp @@ -232,7 +232,7 @@ std::pair MergeTreeDeduplicationLog::addPart(const std: /// Create new record MergeTreeDeduplicationLogRecord record; record.operation = MergeTreeDeduplicationOp::ADD; - record.part_name = part_info.getPartName(); + record.part_name = part_info.getPartNameAndCheckFormat(format_version); record.block_id = block_id; /// Write it to disk writeRecord(record, *current_writer); @@ -269,7 +269,7 @@ void MergeTreeDeduplicationLog::dropPart(const MergeTreePartInfo & drop_part_inf /// Create drop record MergeTreeDeduplicationLogRecord record; record.operation = MergeTreeDeduplicationOp::DROP; - record.part_name = part_info.getPartName(); + record.part_name = part_info.getPartNameAndCheckFormat(format_version); record.block_id = itr->key; /// Write it to disk writeRecord(record, *current_writer); diff --git a/src/Storages/MergeTree/MergeTreePartInfo.cpp b/src/Storages/MergeTree/MergeTreePartInfo.cpp index 8c518e4d17f..038bf26ca91 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.cpp +++ b/src/Storages/MergeTree/MergeTreePartInfo.cpp @@ -167,7 +167,25 @@ bool MergeTreePartInfo::contains(const String & outer_part_name, const String & } -String MergeTreePartInfo::getPartName() const +String MergeTreePartInfo::getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const +{ + if (format_version == MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + getPartNameV1(); + + /// We cannot just call getPartNameV0 because it requires extra arguments, but at least we can warn about it. + chassert(false); /// Catch it in CI. Feel free to remove this line. + throw Exception(ErrorCodes::BAD_DATA_PART_NAME, "Trying to get part name in new format for old format version." + "Either some new feature is incompatible with deprecated *MergeTree definition syntax or it's a bug."); +} + + +String MergeTreePartInfo::getPartNameForLogs() const +{ + /// We don't care about format version here + return getPartNameV1(); +} + +String MergeTreePartInfo::getPartNameV1() const { WriteBufferFromOwnString wb; diff --git a/src/Storages/MergeTree/MergeTreePartInfo.h b/src/Storages/MergeTree/MergeTreePartInfo.h index 60c7e4e8822..cad851fb882 100644 --- a/src/Storages/MergeTree/MergeTreePartInfo.h +++ b/src/Storages/MergeTree/MergeTreePartInfo.h @@ -103,7 +103,9 @@ struct MergeTreePartInfo return level == MergeTreePartInfo::MAX_LEVEL || level == another_max_level; } - String getPartName() const; + String getPartNameAndCheckFormat(MergeTreeDataFormatVersion format_version) const; + String getPartNameForLogs() const; + String getPartNameV1() const; String getPartNameV0(DayNum left_date, DayNum right_date) const; UInt64 getBlocksCount() const { diff --git a/src/Storages/MergeTree/MergeTreeSink.cpp b/src/Storages/MergeTree/MergeTreeSink.cpp index 0de71e94ea8..99f6b1855e4 100644 --- a/src/Storages/MergeTree/MergeTreeSink.cpp +++ b/src/Storages/MergeTree/MergeTreeSink.cpp @@ -156,7 +156,7 @@ void MergeTreeSink::finishDelayedChunk() if (!res.second) { ProfileEvents::increment(ProfileEvents::DuplicatedInsertedBlocks); - LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartName()); + LOG_INFO(storage.log, "Block with ID {} already exists as part {}; ignoring it", block_id, res.first.getPartNameForLogs()); continue; } } diff --git a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp index d5f35ea1b3c..560d9f17a07 100644 --- a/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp +++ b/src/Storages/MergeTree/PartMovesBetweenShardsOrchestrator.cpp @@ -473,7 +473,7 @@ PartMovesBetweenShardsOrchestrator::Entry PartMovesBetweenShardsOrchestrator::st log_entry.log_entry_id = attach_log_entry_barrier_path; log_entry.part_checksum = part->checksums.getTotalChecksumHex(); log_entry.create_time = std::time(nullptr); - log_entry.new_part_name = part_info.getPartName(); + log_entry.new_part_name = part_info.getPartNameAndCheckFormat(storage.format_version); ops.emplace_back(zkutil::makeCreateRequest(attach_log_entry_barrier_path, log_entry.toString(), -1)); ops.emplace_back(zkutil::makeSetRequest(entry.to_shard + "/log", "", -1)); diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp index 9ff022d5d57..d6d74228d68 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeQueue.cpp @@ -1504,7 +1504,7 @@ bool ReplicatedMergeTreeQueue::shouldExecuteLogEntry( entry.znode_name, entry.typeToString(), entry.new_part_name, - info.getPartName()); + info.getPartNameForLogs()); LOG_TRACE(log, fmt::runtime(out_postpone_reason)); return false; } diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index 8c422b52a3c..79a8253a9de 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -1484,8 +1484,11 @@ String StorageReplicatedMergeTree::getChecksumsForZooKeeper(const MergeTreeDataP MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFoundValidPart(const LogEntry& entry) const { + if (format_version != MERGE_TREE_DATA_MIN_FORMAT_VERSION_WITH_CUSTOM_PARTITIONING) + return {}; + const MergeTreePartInfo actual_part_info = MergeTreePartInfo::fromPartName(entry.new_part_name, format_version); - const String part_new_name = actual_part_info.getPartName(); + const String part_new_name = actual_part_info.getPartNameV1(); for (const DiskPtr & disk : getStoragePolicy()->getDisks()) { @@ -1496,7 +1499,7 @@ MergeTreeData::MutableDataPartPtr StorageReplicatedMergeTree::attachPartHelperFo if (!part_info || part_info->partition_id != actual_part_info.partition_id) continue; - const String part_old_name = part_info->getPartName(); + const String part_old_name = part_info->getPartNameV1(); const VolumePtr volume = std::make_shared("volume_" + part_old_name, disk); @@ -3849,7 +3852,7 @@ void StorageReplicatedMergeTree::cleanLastPartNode(const String & partition_id) bool StorageReplicatedMergeTree::partIsInsertingWithParallelQuorum(const MergeTreePartInfo & part_info) const { auto zookeeper = getZooKeeper(); - return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartName()); + return zookeeper->exists(fs::path(zookeeper_path) / "quorum" / "parallel" / part_info.getPartNameAndCheckFormat(format_version)); } @@ -3871,7 +3874,7 @@ bool StorageReplicatedMergeTree::partIsLastQuorumPart(const MergeTreePartInfo & if (partition_it == parts_with_quorum.added_parts.end()) return false; - return partition_it->second == part_info.getPartName(); + return partition_it->second == part_info.getPartNameAndCheckFormat(format_version); } @@ -5048,7 +5051,7 @@ String getPartNamePossiblyFake(MergeTreeDataFormatVersion format_version, const return part_info.getPartNameV0(left_date, right_date); } - return part_info.getPartName(); + return part_info.getPartNameV1(); } bool StorageReplicatedMergeTree::getFakePartCoveringAllPartsInPartition(const String & partition_id, MergeTreePartInfo & part_info, @@ -7540,7 +7543,7 @@ void StorageReplicatedMergeTree::enqueuePartForCheck(const String & part_name, t if (queue.hasDropRange(MergeTreePartInfo::fromPartName(part_name, format_version), &covering_drop_range)) { LOG_WARNING(log, "Do not enqueue part {} for check because it's covered by DROP_RANGE {} and going to be removed", - part_name, covering_drop_range.getPartName()); + part_name, covering_drop_range.getPartNameForLogs()); return; } part_check_thread.enqueuePart(part_name, delay_to_check_seconds); From b9d0d25eb21d451aa9c4d37d113163c4bd43a8b1 Mon Sep 17 00:00:00 2001 From: Nikolay Degterinsky Date: Wed, 21 Dec 2022 02:11:25 +0000 Subject: [PATCH 072/262] Better StorageReplicatedMergeTree::getStatus() --- src/Storages/StorageReplicatedMergeTree.cpp | 28 +++++++++++++++------ 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index b6e7864ac80..151f1002ea6 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -5665,7 +5665,24 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) { try { - auto log_entries = zookeeper->getChildren(fs::path(zookeeper_path) / "log"); + std::vector paths; + paths.push_back(fs::path(zookeeper_path) / "log"); + paths.push_back(fs::path(zookeeper_path) / "replicas"); + + auto children_result = zookeeper->getChildren(paths); + const auto & log_entries = children_result[0].names; + const auto & all_replicas = children_result[1].names; + + paths.clear(); + paths.push_back(fs::path(replica_path) / "log_pointer"); + for (const String & replica : all_replicas) + paths.push_back(fs::path(zookeeper_path) / "replicas" / replica / "is_active"); + + auto get_result = zookeeper->tryGet(paths); + const auto & log_pointer_str = get_result[0].data; + + if (get_result[0].error == Coordination::Error::ZNONODE) + throw zkutil::KeeperException(get_result[0].error); if (!log_entries.empty()) { @@ -5673,17 +5690,14 @@ void StorageReplicatedMergeTree::getStatus(Status & res, bool with_zk_fields) res.log_max_index = parse(last_log_entry.substr(strlen("log-"))); } - String log_pointer_str = zookeeper->get(fs::path(replica_path) / "log_pointer"); res.log_pointer = log_pointer_str.empty() ? 0 : parse(log_pointer_str); - - auto all_replicas = zookeeper->getChildren(fs::path(zookeeper_path) / "replicas"); res.total_replicas = all_replicas.size(); - for (const String & replica : all_replicas) + for (size_t i = 0, size = all_replicas.size(); i < size; ++i) { - bool is_replica_active = zookeeper->exists(fs::path(zookeeper_path) / "replicas" / replica / "is_active"); + bool is_replica_active = get_result[i + 1].error != Coordination::Error::ZNONODE; res.active_replicas += static_cast(is_replica_active); - res.replica_is_active.emplace(replica, is_replica_active); + res.replica_is_active.emplace(all_replicas[i], is_replica_active); } } catch (const Coordination::Exception &) From 2d9f3c242bb391aa1e062acaa900a2ff29bc9f0a Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Thu, 22 Dec 2022 08:07:16 +0000 Subject: [PATCH 073/262] Add test_bad_path_to_keytab test --- src/Access/GSSAcceptor.cpp | 2 +- .../configs/kerberos_bad_path_to_keytab.xml | 6 ++++++ .../test_kerberos_auth/kerberos_image_config.sh | 4 ++++ tests/integration/test_kerberos_auth/test.py | 15 +++++++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml diff --git a/src/Access/GSSAcceptor.cpp b/src/Access/GSSAcceptor.cpp index b107293ce39..998e5219bbb 100644 --- a/src/Access/GSSAcceptor.cpp +++ b/src/Access/GSSAcceptor.cpp @@ -268,7 +268,7 @@ void GSSAcceptorContext::initHandles() throw Exception("Keytab file not found", ErrorCodes::BAD_ARGUMENTS); if (krb5_gss_register_acceptor_identity(params.keytab.c_str())) - throw Exception("Invalid keytab file is specified", ErrorCodes::BAD_ARGUMENTS); + throw Exception("Failed to register keytab file", ErrorCodes::BAD_ARGUMENTS); } if (!params.principal.empty()) diff --git a/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml b/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml new file mode 100644 index 00000000000..5b6be45e78e --- /dev/null +++ b/tests/integration/test_kerberos_auth/configs/kerberos_bad_path_to_keytab.xml @@ -0,0 +1,6 @@ + + + TEST.CLICKHOUSE.TECH + /tmp/keytab/clickhouse.keytab + + diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh index 90bbc49f2bf..9ee5f3490fe 100644 --- a/tests/integration/test_kerberos_auth/kerberos_image_config.sh +++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh @@ -101,9 +101,13 @@ create_keytabs() { kadmin.local -q "addprinc -randkey HTTP/instance2@${REALM}" kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse2.keytab HTTP/instance2@${REALM}" + kadmin.local -q "addprinc -randkey HTTP/instance3@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse3.keytab HTTP/instance3@${REALM}" + chmod g+r /tmp/keytab/kuser.keytab chmod g+r /tmp/keytab/clickhouse1.keytab chmod g+r /tmp/keytab/clickhouse2.keytab + chmod g+r /tmp/keytab/clickhouse3.keytab } main() { diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index df6233e0cbb..89735d1c8fd 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -14,6 +14,12 @@ instance2 = cluster.add_instance( user_configs=["configs/users.xml"], with_kerberos_kdc=True, ) +instance3 = cluster.add_instance( + "instance3", + main_configs=["configs/kerberos_bad_path_to_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, +) # Fixtures @@ -57,6 +63,15 @@ def test_kerberos_auth_without_keytab(kerberos_cluster): ) +def test_bad_path_to_keytab(kerberos_cluster): + + assert ( + "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name." + in make_auth(instance3) + ) + assert instance3.contains_in_log("Keytab file not found") + + if __name__ == "__main__": cluster.start() input("Cluster created, press any key to destroy...") From a6f860f24e416b4d452ffeea9e67d8ddf7b60158 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Fri, 23 Dec 2022 17:57:17 +0100 Subject: [PATCH 074/262] Fixed review comments and updated FunctionParameterValuesVisitor to use visitFunction - 40907 Parameterized views as table functions --- src/Interpreters/InterpreterSelectQuery.cpp | 5 +++-- .../FunctionParameterValuesVisitor.cpp | 19 ++++++++++++------- src/Storages/StorageSnapshot.cpp | 13 +++++++------ 3 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp index 8381fee22b1..d16eb7e2bac 100644 --- a/src/Interpreters/InterpreterSelectQuery.cpp +++ b/src/Interpreters/InterpreterSelectQuery.cpp @@ -510,9 +510,10 @@ InterpreterSelectQuery::InterpreterSelectQuery( if (view) { query_info.is_parameterized_view = view->isParameterizedView(); - /// We need to fetch the parameters set for SELECT parameterized view before the query is replaced. + /// We need to fetch the parameters set for SELECT ... FROM parameterized_view() before the query is replaced. /// replaceWithSubquery replaces the function child and adds the subquery in its place. - /// the parameters are children of function child, if function is replaced the parameters are also gone from tree + /// the parameters are children of function child, if function (which corresponds to parametrised view and has + /// parameters in its arguments: `parametrised_view()`) is replaced the parameters are also gone from tree /// So we need to get the parameters before they are removed from the tree /// and after query is replaced, we use these parameters to substitute in the parameterized view query if (query_info.is_parameterized_view) diff --git a/src/Parsers/FunctionParameterValuesVisitor.cpp b/src/Parsers/FunctionParameterValuesVisitor.cpp index cb187b2a56a..31ba7ac4f86 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.cpp +++ b/src/Parsers/FunctionParameterValuesVisitor.cpp @@ -27,8 +27,8 @@ public: void visit(const ASTPtr & ast) { - if (const auto * expression = ast->as()) - visitExpressionList(*expression); + if (const auto * function = ast->as()) + visitFunction(*function); for (const auto & child : ast->children) visit(child); } @@ -36,18 +36,23 @@ public: private: NameToNameMap & parameter_values; - void visitExpressionList(const ASTExpressionList & expression_list) + void visitFunction(const ASTFunction & parameter_function) { - if (expression_list.children.size() != 2) + if (parameter_function.name != "equals" && parameter_function.children.size() != 1) return; - if (const auto * identifier = expression_list.children[0]->as()) + const auto * expression_list = parameter_function.children[0]->as(); + + if (expression_list && expression_list->children.size() != 2) + return; + + if (const auto * identifier = expression_list->children[0]->as()) { - if (const auto * literal = expression_list.children[1]->as()) + if (const auto * literal = expression_list->children[1]->as()) { parameter_values[identifier->name()] = convertFieldToString(literal->value); } - else if (const auto * function = expression_list.children[1]->as()) + else if (const auto * function = expression_list->children[1]->as()) { if (isFunctionCast(function)) { diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index b88e07d93b8..00f5160ae11 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -117,10 +117,11 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const Block res; const auto & columns = getMetadataForQuery()->getColumns(); - for (const auto & name : column_names) + for (const auto & column_name : column_names) { - const std::string & column_name = name; - std::string substituted_column_name = name; + /// substituted_column_name is used for parameterized view (which are created using query parameters + /// and SELECT is used with substitution of these query parameters ) + std::string substituted_column_name = column_name; std::string::size_type pos = 0u; for (const auto & parameter : parameter_values) { @@ -141,17 +142,17 @@ Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const { res.insert({object_column->type->createColumn(), object_column->type, column_name}); } - else if (auto it = virtual_columns.find(name); it != virtual_columns.end()) + else if (auto it = virtual_columns.find(column_name); it != virtual_columns.end()) { /// Virtual columns must be appended after ordinary, because user can /// override them. const auto & type = it->second; - res.insert({type->createColumn(), type, name}); + res.insert({type->createColumn(), type, column_name}); } else { throw Exception(ErrorCodes::NOT_FOUND_COLUMN_IN_BLOCK, - "Column {} not found in table {}", backQuote(name), storage.getStorageID().getNameForLogs()); + "Column {} not found in table {}", backQuote(substituted_column_name), storage.getStorageID().getNameForLogs()); } } return res; From a70e3c20fa81276318cc550ce787e3d55a2fae79 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 26 Dec 2022 11:56:45 +0000 Subject: [PATCH 075/262] Make calls of test from Ubuntu client --- .../kerberos_image_config.sh | 4 ++++ .../test_kerberos_auth/secrets/krb.conf | 4 ++-- tests/integration/test_kerberos_auth/test.py | 21 +++++++++++++------ 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_kerberos_auth/kerberos_image_config.sh b/tests/integration/test_kerberos_auth/kerberos_image_config.sh index 9ee5f3490fe..18f57ef2585 100644 --- a/tests/integration/test_kerberos_auth/kerberos_image_config.sh +++ b/tests/integration/test_kerberos_auth/kerberos_image_config.sh @@ -104,10 +104,14 @@ create_keytabs() { kadmin.local -q "addprinc -randkey HTTP/instance3@${REALM}" kadmin.local -q "ktadd -norandkey -k /tmp/keytab/clickhouse3.keytab HTTP/instance3@${REALM}" + kadmin.local -q "addprinc -randkey HTTP/client@${REALM}" + kadmin.local -q "ktadd -norandkey -k /tmp/keytab/client.keytab HTTP/client@${REALM}" + chmod g+r /tmp/keytab/kuser.keytab chmod g+r /tmp/keytab/clickhouse1.keytab chmod g+r /tmp/keytab/clickhouse2.keytab chmod g+r /tmp/keytab/clickhouse3.keytab + chmod g+r /tmp/keytab/client.keytab } main() { diff --git a/tests/integration/test_kerberos_auth/secrets/krb.conf b/tests/integration/test_kerberos_auth/secrets/krb.conf index 87520f65b2d..88431d68554 100644 --- a/tests/integration/test_kerberos_auth/secrets/krb.conf +++ b/tests/integration/test_kerberos_auth/secrets/krb.conf @@ -18,5 +18,5 @@ } [domain_realm] - .TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH - TEST.CLICKHOUSE.TECH = TEST.CLICKHOUSE.TECH + .test.clickhouse.com = TEST.CLICKHOUSE.TECH + test.clickhouse.com = TEST.CLICKHOUSE.TECH diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index 89735d1c8fd..b024f4b59ef 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -20,6 +20,12 @@ instance3 = cluster.add_instance( user_configs=["configs/users.xml"], with_kerberos_kdc=True, ) +client = cluster.add_instance( + "client", + main_configs=["configs/kerberos_without_keytab.xml"], + user_configs=["configs/users.xml"], + with_kerberos_kdc=True, +) # Fixtures @@ -38,16 +44,20 @@ def kerberos_cluster(): def make_auth(instance): - instance.exec_in_container( + instance_ip = cluster.get_instance_ip(instance.name) + + client.exec_in_container( + (["bash", "-c", f"echo '{instance_ip} {instance.hostname}' >> /etc/hosts"]) + ) + + client.exec_in_container( ["bash", "-c", "kinit -k -t /tmp/keytab/kuser.keytab kuser"] ) - return instance.exec_in_container( + return client.exec_in_container( [ "bash", "-c", - "echo 'select currentUser()' | curl -vvv --negotiate -u : http://{}:8123/ --data-binary @-".format( - instance.hostname - ), + f"echo 'select currentUser()' | curl -vvv --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-", ] ) @@ -64,7 +74,6 @@ def test_kerberos_auth_without_keytab(kerberos_cluster): def test_bad_path_to_keytab(kerberos_cluster): - assert ( "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name." in make_auth(instance3) From 3cdc9b3f81df1749cd96347aba4df2dd7d33e779 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 26 Dec 2022 11:59:37 +0000 Subject: [PATCH 076/262] Remove -vvv from kinit call in tests --- tests/integration/test_kerberos_auth/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index b024f4b59ef..37625dcef85 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -57,7 +57,7 @@ def make_auth(instance): [ "bash", "-c", - f"echo 'select currentUser()' | curl -vvv --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-", + f"echo 'select currentUser()' | curl --negotiate -u : http://{instance.hostname}:8123/ --data-binary @-", ] ) From 5a7257069b0b50b1c914b6c0155371b0b239a450 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 26 Dec 2022 12:22:29 +0000 Subject: [PATCH 077/262] Updated docs --- docs/en/operations/external-authenticators/kerberos.md | 4 +++- docs/ru/operations/external-authenticators/kerberos.md | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/en/operations/external-authenticators/kerberos.md b/docs/en/operations/external-authenticators/kerberos.md index c1360e880ad..95944e96194 100644 --- a/docs/en/operations/external-authenticators/kerberos.md +++ b/docs/en/operations/external-authenticators/kerberos.md @@ -22,10 +22,12 @@ To enable Kerberos, one should include `kerberos` section in `config.xml`. This - `principal` - canonical service principal name that will be acquired and used when accepting security contexts. - This parameter is optional, if omitted, the default principal will be used. - - `realm` - a realm, that will be used to restrict authentication to only those requests whose initiator's realm matches it. - This parameter is optional, if omitted, no additional filtering by realm will be applied. +- `keytab` - path to service keytab file. + - This parameter is optional, if omitted, path to service keytab file must be set in `KRB5_KTNAME` environment variable. + Example (goes into `config.xml`): ```xml diff --git a/docs/ru/operations/external-authenticators/kerberos.md b/docs/ru/operations/external-authenticators/kerberos.md index 865ea639c89..4641f15cb56 100644 --- a/docs/ru/operations/external-authenticators/kerberos.md +++ b/docs/ru/operations/external-authenticators/kerberos.md @@ -22,6 +22,9 @@ ClickHouse предоставляет возможность аутентифи - `realm` — обеспечивает фильтрацию по реалм (realm). Пользователям, чей реалм не совпадает с указанным, будет отказано в аутентификации. - Это опциональный параметр, при его отсутствии фильтр по реалм применяться не будет. +- `keytab` — задаёт путь к файлу keytab. + - Это опциональный параметр, при его отсутствии путь к файлу keytab должен быть задан в переменной окружения `KRB5_KTNAME`. + Примеры, как должен выглядеть файл `config.xml`: ```xml From 465cc36526156b19e6bb497ebccece6f3145eda8 Mon Sep 17 00:00:00 2001 From: Roman Vasin Date: Mon, 26 Dec 2022 13:17:22 +0000 Subject: [PATCH 078/262] Update Exception messages in test.py --- tests/integration/test_kerberos_auth/test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_kerberos_auth/test.py b/tests/integration/test_kerberos_auth/test.py index 37625dcef85..3a183ad86a0 100644 --- a/tests/integration/test_kerberos_auth/test.py +++ b/tests/integration/test_kerberos_auth/test.py @@ -68,14 +68,14 @@ def test_kerberos_auth_with_keytab(kerberos_cluster): def test_kerberos_auth_without_keytab(kerberos_cluster): assert ( - "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name." + "DB::Exception: : Authentication failed: password is incorrect, or there is no user with such name." in make_auth(instance2) ) def test_bad_path_to_keytab(kerberos_cluster): assert ( - "DB::Exception: : Authentication failed: password is incorrect or there is no user with such name." + "DB::Exception: : Authentication failed: password is incorrect, or there is no user with such name." in make_auth(instance3) ) assert instance3.contains_in_log("Keytab file not found") From f6ed1eaada8ef53b82d7bf42042a647cabd6b475 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Tue, 27 Dec 2022 16:41:26 +0100 Subject: [PATCH 079/262] Fix check black --- tests/queries/0_stateless/02473_infile_progress.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/02473_infile_progress.py b/tests/queries/0_stateless/02473_infile_progress.py index 28ad2c8413a..842acf2b697 100755 --- a/tests/queries/0_stateless/02473_infile_progress.py +++ b/tests/queries/0_stateless/02473_infile_progress.py @@ -18,7 +18,7 @@ with client( name="client>", log=log, command=os.environ.get("CLICKHOUSE_BINARY", "clickhouse") - + " client --storage_file_read_method=pread" + + " client --storage_file_read_method=pread", ) as client1: filename = os.environ["CLICKHOUSE_TMP"] + "/infile_progress.tsv" From 1ce69371fb5da17528ed4655e76841b9004caea4 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 28 Dec 2022 21:46:08 +0000 Subject: [PATCH 080/262] Infer UInt64 in case of Int64 overflow --- src/DataTypes/transformTypesRecursively.cpp | 4 +- src/DataTypes/transformTypesRecursively.h | 2 +- src/Formats/JSONUtils.cpp | 4 + src/Formats/SchemaInferenceUtils.cpp | 146 +++++++++++++++--- ...uint64_in_case_of_int64_overflow.reference | 12 ++ ..._infer_uint64_in_case_of_int64_overflow.sh | 18 +++ 6 files changed, 160 insertions(+), 26 deletions(-) create mode 100644 tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference create mode 100755 tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh diff --git a/src/DataTypes/transformTypesRecursively.cpp b/src/DataTypes/transformTypesRecursively.cpp index da3af0beee7..05f82a08abe 100644 --- a/src/DataTypes/transformTypesRecursively.cpp +++ b/src/DataTypes/transformTypesRecursively.cpp @@ -8,7 +8,7 @@ namespace DB { -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types) { TypeIndexesSet type_indexes; for (const auto & type : types) @@ -156,7 +156,7 @@ void transformTypesRecursively(DataTypes & types, std::function callback) { DataTypes types = {type}; - transformTypesRecursively(types, [callback](auto & data_types, const TypeIndexesSet &){ callback(data_types[0]); }, {}); + transformTypesRecursively(types, [callback](auto & data_types, TypeIndexesSet &){ callback(data_types[0]); }, {}); } } diff --git a/src/DataTypes/transformTypesRecursively.h b/src/DataTypes/transformTypesRecursively.h index 2cf8664f920..f9c776b4205 100644 --- a/src/DataTypes/transformTypesRecursively.h +++ b/src/DataTypes/transformTypesRecursively.h @@ -12,7 +12,7 @@ namespace DB /// If not all types are the same complex type (Array/Map/Tuple), this function won't be called to nested types. /// Function transform_simple_types will be applied to resulting simple types after all recursive calls. /// Function transform_complex_types will be applied to complex types (Array/Map/Tuple) after recursive call to their nested types. -void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); +void transformTypesRecursively(DataTypes & types, std::function transform_simple_types, std::function transform_complex_types); void callOnNestedSimpleTypes(DataTypePtr & type, std::function callback); diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 16f275ed6b8..384619dba1d 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -131,6 +131,7 @@ namespace JSONUtils { skipWhitespaceIfAny(in); assertChar('{', in); + skipWhitespaceIfAny(in); bool first = true; NamesAndTypesList names_and_types; String field; @@ -144,6 +145,7 @@ namespace JSONUtils auto name = readFieldName(in); auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); names_and_types.emplace_back(name, type); + skipWhitespaceIfAny(in); } if (in.eof()) @@ -157,6 +159,7 @@ namespace JSONUtils { skipWhitespaceIfAny(in); assertChar('[', in); + skipWhitespaceIfAny(in); bool first = true; DataTypes types; String field; @@ -168,6 +171,7 @@ namespace JSONUtils first = false; auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); types.push_back(std::move(type)); + skipWhitespaceIfAny(in); } if (in.eof()) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 9d40ac98964..3d00f67884d 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -44,9 +44,16 @@ namespace return true; } + void updateTypeIndexes(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + type_indexes.clear(); + for (const auto & type : data_types) + type_indexes.insert(type->getTypeId()); + } + /// If we have both Nothing and non Nothing types, convert all Nothing types to the first non Nothing. /// For example if we have types [Nothing, String, Nothing] we change it to [String, String, String] - void transformNothingSimpleTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformNothingSimpleTypes(DataTypes & data_types, TypeIndexesSet & type_indexes) { /// Check if we have both Nothing and non Nothing types. if (!type_indexes.contains(TypeIndex::Nothing) || type_indexes.size() <= 1) @@ -67,12 +74,32 @@ namespace if (isNothing(type)) type = not_nothing_type; } + + type_indexes.erase(TypeIndex::Nothing); + } + + /// If we have both Int64 and UInt64, convert all Int64 to UInt64, + /// because UInt64 is inferred only in case of Int64 overflow. + void transformIntegers(DataTypes & data_types, TypeIndexesSet & type_indexes) + { + if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::UInt64)) + return; + + for (auto & type : data_types) + { + if (WhichDataType(type).isInt64()) + type = std::make_shared(); + } + + type_indexes.erase(TypeIndex::Int64); } /// If we have both Int64 and Float64 types, convert all Int64 to Float64. - void transformIntegersAndFloatsToFloats(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) { - if (!type_indexes.contains(TypeIndex::Int64) || !type_indexes.contains(TypeIndex::Float64)) + bool have_floats = type_indexes.contains(TypeIndex::Float64); + bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); + if (!have_integers || !have_floats) return; for (auto & type : data_types) @@ -80,11 +107,14 @@ namespace if (isInteger(type)) type = std::make_shared(); } + + type_indexes.erase(TypeIndex::Int64); + type_indexes.erase(TypeIndex::UInt64); } /// If we have only Date and DateTime types, convert Date to DateTime, /// otherwise, convert all Date and DateTime to String. - void transformDatesAndDateTimes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformDatesAndDateTimes(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_dates = type_indexes.contains(TypeIndex::Date); bool have_datetimes = type_indexes.contains(TypeIndex::DateTime64); @@ -98,6 +128,8 @@ namespace type = std::make_shared(); } + type_indexes.erase(TypeIndex::Date); + type_indexes.erase(TypeIndex::DateTime); return; } @@ -108,16 +140,18 @@ namespace if (isDate(type)) type = std::make_shared(9); } + + type_indexes.erase(TypeIndex::Date); } } - /// If we have numbers (Int64/Float64) and String types and numbers were parsed from String, + /// If we have numbers (Int64/UInt64/Float64) and String types and numbers were parsed from String, /// convert all numbers to String. void transformJSONNumbersBackToString( - DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) + DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { bool have_strings = type_indexes.contains(TypeIndex::String); - bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::Float64); + bool have_numbers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64) || type_indexes.contains(TypeIndex::Float64); if (!have_strings || !have_numbers) return; @@ -128,36 +162,43 @@ namespace || json_info->numbers_parsed_from_json_strings.contains(type.get()))) type = std::make_shared(); } + + updateTypeIndexes(data_types, type_indexes); } - /// If we have both Bool and number (Int64/Float64) types, - /// convert all Bool to Int64/Float64. - void transformBoolsAndNumbersToNumbers(DataTypes & data_types, const TypeIndexesSet & type_indexes) + /// If we have both Bool and number (Int64/UInt64/Float64) types, + /// convert all Bool to Int64/UInt64/Float64. + void transformBoolsAndNumbersToNumbers(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_floats = type_indexes.contains(TypeIndex::Float64); - bool have_integers = type_indexes.contains(TypeIndex::Int64); + bool have_signed_integers = type_indexes.contains(TypeIndex::Int64); + bool have_unsigned_integers = type_indexes.contains(TypeIndex::UInt64); bool have_bools = type_indexes.contains(TypeIndex::UInt8); /// Check if we have both Bool and Integer/Float. - if (!have_bools || (!have_integers && !have_floats)) + if (!have_bools || (!have_signed_integers && !have_unsigned_integers && !have_floats)) return; for (auto & type : data_types) { if (isBool(type)) { - if (have_integers) + if (have_signed_integers) type = std::make_shared(); + else if (have_unsigned_integers) + type = std::make_shared(); else type = std::make_shared(); } } + + type_indexes.erase(TypeIndex::UInt8); } /// If we have type Nothing/Nullable(Nothing) and some other non Nothing types, /// convert all Nothing/Nullable(Nothing) types to the first non Nothing. /// For example, when we have [Nothing, Array(Int64)] it will convert it to [Array(Int64), Array(Int64)] /// (it can happen when transforming complex nested types like [Array(Nothing), Array(Array(Int64))]) - void transformNothingComplexTypes(DataTypes & data_types) + void transformNothingComplexTypes(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_nothing = false; DataTypePtr not_nothing_type = nullptr; @@ -177,10 +218,12 @@ namespace if (isNothing(removeNullable(type))) type = not_nothing_type; } + + updateTypeIndexes(data_types, type_indexes); } /// If we have both Nullable and non Nullable types, make all types Nullable - void transformNullableTypes(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformNullableTypes(DataTypes & data_types, TypeIndexesSet & type_indexes) { if (!type_indexes.contains(TypeIndex::Nullable)) return; @@ -190,6 +233,8 @@ namespace if (type->canBeInsideNullable()) type = makeNullable(type); } + + updateTypeIndexes(data_types, type_indexes); } /// If we have Tuple with the same nested types like Tuple(Int64, Int64), @@ -197,11 +242,12 @@ namespace /// For example when we had type Tuple(Int64, Nullable(Nothing)) and we /// transformed it to Tuple(Nullable(Int64), Nullable(Int64)) we will /// also transform it to Array(Nullable(Int64)) - void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformTuplesWithEqualNestedTypesToArrays(DataTypes & data_types, TypeIndexesSet & type_indexes) { if (!type_indexes.contains(TypeIndex::Tuple)) return; + bool remove_tuple_index = true; for (auto & type : data_types) { if (isTuple(type)) @@ -209,8 +255,13 @@ namespace const auto * tuple_type = assert_cast(type.get()); if (checkIfTypesAreEqual(tuple_type->getElements())) type = std::make_shared(tuple_type->getElements().back()); + else + remove_tuple_index = false; } } + + if (remove_tuple_index) + type_indexes.erase(TypeIndex::Tuple); } template @@ -221,7 +272,7 @@ namespace /// For example, if we have [Tuple(Nullable(Nothing), String), Array(Date), Tuple(Date, String)] /// it will convert them all to Array(String) void transformJSONTuplesAndArraysToArrays( - DataTypes & data_types, const FormatSettings & settings, const TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) + DataTypes & data_types, const FormatSettings & settings, TypeIndexesSet & type_indexes, JSONInferenceInfo * json_info) { if (!type_indexes.contains(TypeIndex::Tuple)) return; @@ -266,12 +317,14 @@ namespace if (isArray(type) || isTuple(type)) type = std::make_shared(nested_types.back()); } + + type_indexes.erase(TypeIndex::Tuple); } } /// If we have Map and Object(JSON) types, convert all Map types to Object(JSON). /// If we have Map types with different value types, convert all Map types to Object(JSON) - void transformMapsAndObjectsToObjects(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformMapsAndObjectsToObjects(DataTypes & data_types, TypeIndexesSet & type_indexes) { if (!type_indexes.contains(TypeIndex::Map)) return; @@ -298,9 +351,11 @@ namespace if (isMap(type)) type = std::make_shared("json", true); } + + type_indexes.erase(TypeIndex::Map); } - void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, const TypeIndexesSet & type_indexes) + void transformMapsObjectsAndStringsToStrings(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_maps = type_indexes.contains(TypeIndex::Map); bool have_objects = type_indexes.contains(TypeIndex::Object); @@ -315,19 +370,26 @@ namespace if (isMap(type) || isObject(type)) type = std::make_shared(); } + + type_indexes.erase(TypeIndex::Map); + type_indexes.erase(TypeIndex::Object); } template void transformInferredTypesIfNeededImpl(DataTypes & types, const FormatSettings & settings, JSONInferenceInfo * json_info) { - auto transform_simple_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes) + auto transform_simple_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) { /// Remove all Nothing type if possible. transformNothingSimpleTypes(data_types, type_indexes); - /// Transform integers to floats if needed. if (settings.try_infer_integers) + { + /// Transform Int64 to UInt64 if needed. + transformIntegers(data_types, type_indexes); + /// Transform integers to floats if needed. transformIntegersAndFloatsToFloats(data_types, type_indexes); + } /// Transform Date to DateTime or both to String if needed. if (settings.try_infer_dates || settings.try_infer_datetimes) @@ -347,14 +409,14 @@ namespace transformBoolsAndNumbersToNumbers(data_types, type_indexes); }; - auto transform_complex_types = [&](DataTypes & data_types, const TypeIndexesSet & type_indexes) + auto transform_complex_types = [&](DataTypes & data_types, TypeIndexesSet & type_indexes) { /// Make types Nullable if needed. transformNullableTypes(data_types, type_indexes); /// If we have type Nothing, it means that we had empty Array/Map while inference. /// If there is at least one non Nothing type, change all Nothing types to it. - transformNothingComplexTypes(data_types); + transformNothingComplexTypes(data_types, type_indexes); if constexpr (!is_json) return; @@ -571,10 +633,28 @@ namespace char * int_end = buf.position(); /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof. buf.position() = number_start; + + bool read_uint = false; + char * uint_end = nullptr; + /// In case of Int64 overflow we can try to infer UInt64. + if (!read_int) + { + UInt64 tmp_uint; + read_uint = tryReadIntText(tmp_uint, buf); + /// If we reached eof, it cannot be float (it requires no less data than integer) + if (buf.eof()) + return read_uint ? std::make_shared() : nullptr; + + uint_end = buf.position(); + buf.position() = number_start; + } + if (tryReadFloatText(tmp_float, buf)) { if (read_int && buf.position() == int_end) return std::make_shared(); + if (read_uint && buf.position() == uint_end) + return std::make_shared(); return std::make_shared(); } @@ -590,6 +670,19 @@ namespace bool read_int = tryReadIntText(tmp_int, peekable_buf); auto * int_end = peekable_buf.position(); peekable_buf.rollbackToCheckpoint(true); + + bool read_uint = false; + char * uint_end = nullptr; + /// In case of Int64 overflow we can try to infer UInt64. + if (!read_int) + { + PeekableReadBufferCheckpoint new_checkpoint(peekable_buf); + UInt64 tmp_uint; + read_uint = tryReadIntText(tmp_uint, peekable_buf); + uint_end = peekable_buf.position(); + peekable_buf.rollbackToCheckpoint(true); + } + if (tryReadFloatText(tmp_float, peekable_buf)) { /// Float parsing reads no fewer bytes than integer parsing, @@ -597,6 +690,8 @@ namespace /// If it's the same, then it's integer. if (read_int && peekable_buf.position() == int_end) return std::make_shared(); + if (read_uint && peekable_buf.position() == uint_end) + return std::make_shared(); return std::make_shared(); } } @@ -874,6 +969,11 @@ DataTypePtr tryInferNumberFromString(std::string_view field, const FormatSetting Int64 tmp_int; if (tryReadIntText(tmp_int, buf) && buf.eof()) return std::make_shared(); + + /// In case of Int64 overflow, try to infer UInt64 + UInt64 tmp_uint; + if (tryReadIntText(tmp_uint, buf) && buf.eof()) + return std::make_shared(); } /// We cam safely get back to the start of buffer, because we read from a string and we didn't reach eof. diff --git a/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference new file mode 100644 index 00000000000..96a50d75eee --- /dev/null +++ b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.reference @@ -0,0 +1,12 @@ +c1 Nullable(UInt64) +c1 Array(Nullable(UInt64)) +c1 Nullable(UInt64) +c1 Nullable(UInt64) +c1 Array(Nullable(UInt64)) +c1 Nullable(UInt64) +number Nullable(UInt64) +number Array(Nullable(UInt64)) +number Array(Nullable(UInt64)) +number Nullable(UInt64) +number Nullable(UInt64) +number Nullable(UInt64) diff --git a/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh new file mode 100755 index 00000000000..4019d2b7a78 --- /dev/null +++ b/tests/queries/0_stateless/02517_infer_uint64_in_case_of_int64_overflow.sh @@ -0,0 +1,18 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test"; +echo -ne '"[18446744073709551615, 10, 11]"' | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test"; +echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=CSV -q "desc test"; +echo -ne "18446744073709551615" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test"; +echo -ne "[18446744073709551615, 10, 11]" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test"; +echo -ne "18446744073709551615\n10\n11" | $CLICKHOUSE_LOCAL --table=test --input-format=TSV -q "desc test"; +echo -ne '{"number" : 18446744073709551615}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : [18446744073709551615, 10, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : [18446744073709551615, true, 11]}'| $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : 18446744073709551615}, {"number" : 10}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : 18446744073709551615}, {"number" : false}, {"number" : 11}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; +echo -ne '{"number" : "18446744073709551615"}' | $CLICKHOUSE_LOCAL --table=test --input-format=JSONEachRow -q "desc test"; From 18214c85654223b3d3913717718a4f06ff071a87 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 2 Jan 2023 15:05:43 +0100 Subject: [PATCH 081/262] Updated test to use custom database - 40907 Parameterized views as table functions --- .../0_stateless/02428_parameterized_view.sql | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index d2118cd1279..b2d4f99a5f1 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -50,15 +50,17 @@ SELECT Price FROM pv3(price=10); CREATE VIEW pv4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN} -CREATE TABLE system.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; +CREATE DATABASE test_02428; -INSERT INTO system.Catalog VALUES ('Pen', 10, 3); -INSERT INTO system.Catalog VALUES ('Book', 50, 2); -INSERT INTO system.Catalog VALUES ('Paper', 20, 1); +CREATE TABLE test_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; -CREATE VIEW system.pv1 AS SELECT * FROM system.Catalog WHERE Price={price:UInt64}; -SELECT Price FROM system.pv1(price=20); -SELECT Price FROM `system.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION } +INSERT INTO test_02428.Catalog VALUES ('Pen', 10, 3); +INSERT INTO test_02428.Catalog VALUES ('Book', 50, 2); +INSERT INTO test_02428.Catalog VALUES ('Paper', 20, 1); + +CREATE VIEW test_02428.pv1 AS SELECT * FROM test_02428.Catalog WHERE Price={price:UInt64}; +SELECT Price FROM test_02428.pv1(price=20); +SELECT Price FROM `test_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION } INSERT INTO Catalog VALUES ('Book2', 30, 8); INSERT INTO Catalog VALUES ('Book3', 30, 8); @@ -80,5 +82,6 @@ DROP VIEW pv6; DROP VIEW pv7; DROP VIEW v1; DROP TABLE Catalog; -DROP TABLE system.pv1; -DROP TABLE system.Catalog; \ No newline at end of file +DROP TABLE test_02428.pv1; +DROP TABLE test_02428.Catalog; +DROP DATABASE test_02428; \ No newline at end of file From 73fecae5ffed84ceca78f902e8f6967447b3863a Mon Sep 17 00:00:00 2001 From: avogar Date: Mon, 2 Jan 2023 15:31:07 +0000 Subject: [PATCH 082/262] Fix comments --- src/Formats/JSONUtils.cpp | 4 ++-- src/Formats/SchemaInferenceUtils.cpp | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Formats/JSONUtils.cpp b/src/Formats/JSONUtils.cpp index 384619dba1d..574759b0c07 100644 --- a/src/Formats/JSONUtils.cpp +++ b/src/Formats/JSONUtils.cpp @@ -138,7 +138,7 @@ namespace JSONUtils while (!in.eof() && *in.position() != '}') { if (!first) - skipComma(in); + assertChar(',', in); else first = false; @@ -166,7 +166,7 @@ namespace JSONUtils while (!in.eof() && *in.position() != ']') { if (!first) - skipComma(in); + assertChar(',', in); else first = false; auto type = tryInferDataTypeForSingleJSONField(in, settings, inference_info); diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 4bcbae1e9ea..77ef2e8f27a 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -104,7 +104,8 @@ namespace for (auto & type : data_types) { - if (isInteger(type)) + WhichDataType which(type); + if (which.isFloat64() || which.isInt64() || which.isUInt64()) type = std::make_shared(); } @@ -631,7 +632,7 @@ namespace return read_int ? std::make_shared() : nullptr; char * int_end = buf.position(); - /// We cam safely get back to the start of the number, because we read from a string and we didn't reach eof. + /// We can safely get back to the start of the number, because we read from a string and we didn't reach eof. buf.position() = number_start; bool read_uint = false; From f6deea1365037b3e091a07941746538805b493a1 Mon Sep 17 00:00:00 2001 From: Kruglov Pavel <48961922+Avogar@users.noreply.github.com> Date: Mon, 2 Jan 2023 16:51:11 +0100 Subject: [PATCH 083/262] Try fix build --- src/Core/Settings.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 4c14fea742b..86d86669db8 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -592,7 +592,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(StorageFileReadMethod, storage_file_read_method, "mmap", "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ + M(StorageFileReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ From 6ae0ffe8d2da413c4ab7e71de3690c14bbe87d17 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 4 Nov 2022 17:12:14 +0000 Subject: [PATCH 084/262] Implement optimize_redundant_functions_in_order_by --- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 121 ++++++++++++++++++ .../OptimizeRedundantFunctionsInOrderByPass.h | 23 ++++ src/Analyzer/QueryTreePassManager.cpp | 5 +- 3 files changed, 148 insertions(+), 1 deletion(-) create mode 100644 src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp create mode 100644 src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp new file mode 100644 index 00000000000..1359bd2616d --- /dev/null +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -0,0 +1,121 @@ +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + +class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor +{ + + struct RedundancyVerdict + { + bool redundant = true; + bool done = false; + }; + + static constexpr RedundancyVerdict makeNonRedundant() noexcept { return { .redundant = false, .done = true }; } + + std::unordered_set existing_keys; + + RedundancyVerdict isRedundantExpression(FunctionNode * function) + { + if (function->getArguments().getNodes().empty()) + return makeNonRedundant(); + + if (function->getFunction()->isDeterministicInScopeOfQuery()) + return makeNonRedundant(); + + // TODO: handle constants here + for (auto & arg : function->getArguments().getNodes()) + { + switch (arg->getNodeType()) + { + case QueryTreeNodeType::FUNCTION: + { + auto subresult = isRedundantExpression(arg->as()); + if (subresult.done) + return subresult; + break; + } + case QueryTreeNodeType::COLUMN: + { + auto * column = arg->as(); + if (!existing_keys.contains(column->getColumnName())) + return makeNonRedundant(); + break; + } + default: + return makeNonRedundant(); + } + } + + return {}; + } + +public: + bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/) + { + if (node->as()) + return false; + return true; + } + + void visitImpl(QueryTreeNodePtr & node) + { + auto * query = node->as(); + if (!query) + return; + + if (!query->hasOrderBy()) + return; + + auto & order_by = query->getOrderBy(); + for (auto & elem : order_by.getNodes()) + { + auto * order_by_elem = elem->as(); + if (order_by_elem->withFill()) + return; + } + + QueryTreeNodes new_order_by; + new_order_by.reserve(order_by.getNodes().size()); + + for (auto & elem : order_by.getNodes()) + { + auto * order_by_elem = elem->as(); + if (auto * expr = order_by_elem->getExpression()->as()) + { + if (isRedundantExpression(expr).redundant) + continue; + } + else + { + auto * column = elem->as(); + existing_keys.insert(column->getColumnName()); + } + + new_order_by.push_back(elem); + } + existing_keys.clear(); + + if (new_order_by.size() < order_by.getNodes().size()) + order_by.getNodes() = std::move(new_order_by); + } +}; + +} + +void OptimizeRedundantFunctionsInOrderByPass::run(QueryTreeNodePtr query_tree_node, ContextPtr /*context*/) +{ + OptimizeRedundantFunctionsInOrderByVisitor().visit(query_tree_node); +} + +} diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h new file mode 100644 index 00000000000..609a6360d27 --- /dev/null +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.h @@ -0,0 +1,23 @@ +#pragma once + +#include + +namespace DB +{ + +/** If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x. + * Optimize ORDER BY x, y, f(x), g(x, y), f(h(x)), t(f(x), g(x)) into ORDER BY x, y + * in case if f(), g(), h(), t() are deterministic (in scope of query). + * Don't optimize ORDER BY f(x), g(x), x even if f(x) is bijection for x or g(x). + */ +class OptimizeRedundantFunctionsInOrderByPass final : public IQueryTreePass +{ +public: + String getName() override { return "OptimizeRedundantFunctionsInOrderBy"; } + + String getDescription() override { return "If ORDER BY has argument x followed by f(x) transforms it to ORDER BY x."; } + + void run(QueryTreeNodePtr query_tree_node, ContextPtr context) override; +}; + +} diff --git a/src/Analyzer/QueryTreePassManager.cpp b/src/Analyzer/QueryTreePassManager.cpp index 06a1fec4698..dd14fc269f9 100644 --- a/src/Analyzer/QueryTreePassManager.cpp +++ b/src/Analyzer/QueryTreePassManager.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -91,7 +92,6 @@ public: * TODO: Support setting optimize_move_functions_out_of_any. * TODO: Support setting optimize_aggregators_of_group_by_keys. * TODO: Support setting optimize_duplicate_order_by_and_distinct. - * TODO: Support setting optimize_redundant_functions_in_order_by. * TODO: Support setting optimize_monotonous_functions_in_order_by. * TODO: Support settings.optimize_or_like_chain. * TODO: Add optimizations based on function semantics. Example: SELECT * FROM test_table WHERE id != id. (id is not nullable column). @@ -203,6 +203,9 @@ void addQueryTreePasses(QueryTreePassManager & manager) if (settings.optimize_if_chain_to_multiif) manager.addPass(std::make_unique()); + if (settings.optimize_redundant_functions_in_order_by) + manager.addPass(std::make_unique()); + manager.addPass(std::make_unique()); manager.addPass(std::make_unique()); From 6e8191367c7d21a7f439d6be416173829a5bae29 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Fri, 4 Nov 2022 19:04:16 +0000 Subject: [PATCH 085/262] Fixup --- .../Passes/OptimizeRedundantFunctionsInOrderByPass.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index 1359bd2616d..0337b5239d7 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -96,9 +96,8 @@ public: if (isRedundantExpression(expr).redundant) continue; } - else + else if (auto * column = elem->as()) { - auto * column = elem->as(); existing_keys.insert(column->getColumnName()); } From 0ecf6164ac77992fe83b434397076bd3858ad1df Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 7 Nov 2022 15:00:18 +0000 Subject: [PATCH 086/262] Add tests --- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 8 +- ..._redundant_functions_in_order_by.reference | 191 ++++++++++++++++++ .../01323_redundant_functions_in_order_by.sql | 12 ++ 3 files changed, 207 insertions(+), 4 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index 0337b5239d7..140129a9fb2 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -30,7 +30,7 @@ class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisito if (function->getArguments().getNodes().empty()) return makeNonRedundant(); - if (function->getFunction()->isDeterministicInScopeOfQuery()) + if (!function->getFunction()->isDeterministicInScopeOfQuery()) return makeNonRedundant(); // TODO: handle constants here @@ -90,13 +90,13 @@ public: for (auto & elem : order_by.getNodes()) { - auto * order_by_elem = elem->as(); - if (auto * expr = order_by_elem->getExpression()->as()) + auto & order_by_expr = elem->as()->getExpression(); + if (auto * expr = order_by_expr->as()) { if (isRedundantExpression(expr).redundant) continue; } - else if (auto * column = elem->as()) + else if (auto * column = order_by_expr->as()) { existing_keys.insert(column->getColumnName()); } diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference index b32ad433730..88703af7def 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference @@ -1,6 +1,15 @@ [0,1,2] [0,1,2] [0,1,2] +[0,1,2] +[0,1,2] +[0,1,2] +0 0 0 0 +0 1 1 1 +2 2 2 2 +3 3 3 3 +4 0 0 +5 0 0 0 0 0 0 0 1 1 1 2 2 2 2 @@ -15,6 +24,14 @@ 1 1 2 2 3 3 +0 0 +1 1 +2 2 +3 3 +0 0 +1 1 +2 2 +3 3 SELECT groupArray(x) FROM ( @@ -22,6 +39,32 @@ FROM FROM numbers(3) ORDER BY x ASC ) +QUERY id: 0 + PROJECTION COLUMNS + groupArray(x) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5 + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 11, nodes: 1 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 SELECT groupArray(x) FROM ( @@ -29,6 +72,32 @@ FROM FROM numbers(3) ORDER BY x ASC ) +QUERY id: 0 + PROJECTION COLUMNS + groupArray(x) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5 + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 11, nodes: 1 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 SELECT groupArray(x) FROM ( @@ -38,6 +107,38 @@ FROM exp(x) ASC, x ASC ) +QUERY id: 0 + PROJECTION COLUMNS + groupArray(x) Array(UInt64) + PROJECTION + LIST id: 1, nodes: 1 + FUNCTION id: 2, function_name: groupArray, function_type: aggregate, result_type: Array(UInt64) + ARGUMENTS + LIST id: 3, nodes: 1 + COLUMN id: 4, column_name: x, result_type: UInt64, source_id: 5 + JOIN TREE + QUERY id: 5, is_subquery: 1 + PROJECTION COLUMNS + x UInt64 + PROJECTION + LIST id: 6, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + JOIN TREE + TABLE_FUNCTION id: 8, table_function_name: numbers + ARGUMENTS + LIST id: 9, nodes: 1 + CONSTANT id: 10, constant_value: UInt64_3, constant_value_type: UInt8 + ORDER BY + LIST id: 11, nodes: 2 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 13, function_name: exp, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 14, nodes: 1 + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 + SORT id: 15, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 7, column_name: number, result_type: UInt64, source_id: 8 SELECT key, a, @@ -52,6 +153,53 @@ ALL FULL OUTER JOIN test AS t USING (key) ORDER BY key ASC, t.key ASC +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + a UInt8 + b String + c Float64 + PROJECTION + LIST id: 1, nodes: 4 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 5 + COLUMN id: 6, column_name: b, result_type: String, source_id: 5 + COLUMN id: 7, column_name: c, result_type: Float64, source_id: 5 + JOIN TREE + JOIN id: 8, strictness: ALL, kind: FULL + LEFT TABLE EXPRESSION + QUERY id: 3, alias: s, is_subquery: 1 + PROJECTION COLUMNS + key UInt64 + PROJECTION + LIST id: 9, nodes: 1 + FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 12, column_name: number, result_type: UInt64, source_id: 13 + CONSTANT id: 14, constant_value: UInt64_2, constant_value_type: UInt8 + JOIN TREE + TABLE_FUNCTION id: 13, table_function_name: numbers + ARGUMENTS + LIST id: 15, nodes: 1 + CONSTANT id: 16, constant_value: UInt64_4, constant_value_type: UInt8 + RIGHT TABLE EXPRESSION + TABLE id: 5, alias: t, table_name: default.test + JOIN EXPRESSION + LIST id: 17, nodes: 1 + COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8 + EXPRESSION + LIST id: 19, nodes: 2 + COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 21, column_name: key, result_type: UInt64, source_id: 5 + ORDER BY + LIST id: 22, nodes: 2 + SORT id: 23, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 24, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 25, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 26, column_name: key, result_type: UInt64, source_id: 5 SELECT key, a @@ -59,6 +207,24 @@ FROM test ORDER BY key ASC, a ASC +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + a UInt8 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test + ORDER BY + LIST id: 5, nodes: 2 + SORT id: 6, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 7, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 SELECT key, a @@ -66,6 +232,31 @@ FROM test ORDER BY key ASC, exp(key + a) ASC +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + a UInt8 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test + ORDER BY + LIST id: 5, nodes: 2 + SORT id: 6, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 7, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 8, function_name: exp, function_type: ordinary, result_type: Float64 + ARGUMENTS + LIST id: 9, nodes: 1 + FUNCTION id: 10, function_name: plus, function_type: ordinary, result_type: UInt64 + ARGUMENTS + LIST id: 11, nodes: 2 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 [0,1,2] [0,1,2] [0,1,2] diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql index c810567f73a..9e87b5e1da4 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql @@ -6,17 +6,29 @@ INSERT INTO test SELECT number, number, toString(number), number from numbers(4) set optimize_redundant_functions_in_order_by = 1; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); +SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)) SETTINGS allow_experimental_analyzer=1; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); +SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))) SETTINGS allow_experimental_analyzer=1; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); +SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x) SETTINGS allow_experimental_analyzer=1; SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; +SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key SETTINGS allow_experimental_analyzer=1; SELECT key, a FROM test ORDER BY key, a, exp(key + a); +SELECT key, a FROM test ORDER BY key, a, exp(key + a) SETTINGS allow_experimental_analyzer=1; SELECT key, a FROM test ORDER BY key, exp(key + a); +SELECT key, a FROM test ORDER BY key, exp(key + a) SETTINGS allow_experimental_analyzer=1; EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(exp(x))); EXPLAIN SYNTAX SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); +EXPLAIN QUERY TREE run_passes=1 SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY exp(x), x); EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; +EXPLAIN QUERY TREE run_passes=1 SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL JOIN test t USING(key) ORDER BY s.key, t.key; EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a); EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a); set optimize_redundant_functions_in_order_by = 0; From 0a42d698aca0ac9b7e9ae97a2074d164a4af1c22 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 2 Jan 2023 18:49:29 +0000 Subject: [PATCH 087/262] Fixes after review --- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 17 ++++----- ..._redundant_functions_in_order_by.reference | 36 +++++++++++++++---- .../01323_redundant_functions_in_order_by.sql | 1 + 3 files changed, 39 insertions(+), 15 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index 140129a9fb2..aaa777de13c 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -23,14 +24,14 @@ class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisito static constexpr RedundancyVerdict makeNonRedundant() noexcept { return { .redundant = false, .done = true }; } - std::unordered_set existing_keys; + std::unordered_set existing_keys; RedundancyVerdict isRedundantExpression(FunctionNode * function) { if (function->getArguments().getNodes().empty()) return makeNonRedundant(); - - if (!function->getFunction()->isDeterministicInScopeOfQuery()) + const auto & function_base = function->getFunction(); + if (!function_base || !function_base->isDeterministicInScopeOfQuery()) return makeNonRedundant(); // TODO: handle constants here @@ -85,8 +86,8 @@ public: return; } - QueryTreeNodes new_order_by; - new_order_by.reserve(order_by.getNodes().size()); + QueryTreeNodes new_order_by_nodes; + new_order_by_nodes.reserve(order_by.getNodes().size()); for (auto & elem : order_by.getNodes()) { @@ -101,12 +102,12 @@ public: existing_keys.insert(column->getColumnName()); } - new_order_by.push_back(elem); + new_order_by_nodes.push_back(elem); } existing_keys.clear(); - if (new_order_by.size() < order_by.getNodes().size()) - order_by.getNodes() = std::move(new_order_by); + if (new_order_by_nodes.size() < order_by.getNodes().size()) + order_by.getNodes() = std::move(new_order_by_nodes); } }; diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference index 88703af7def..ae160ed35d6 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference @@ -190,16 +190,16 @@ QUERY id: 0 COLUMN id: 18, column_name: key, result_type: UInt64, source_id: 8 EXPRESSION LIST id: 19, nodes: 2 - COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 3 - COLUMN id: 21, column_name: key, result_type: UInt64, source_id: 5 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + COLUMN id: 20, column_name: key, result_type: UInt64, source_id: 5 ORDER BY - LIST id: 22, nodes: 2 - SORT id: 23, sort_direction: ASCENDING, with_fill: 0 + LIST id: 21, nodes: 2 + SORT id: 22, sort_direction: ASCENDING, with_fill: 0 EXPRESSION - COLUMN id: 24, column_name: key, result_type: UInt64, source_id: 3 - SORT id: 25, sort_direction: ASCENDING, with_fill: 0 + COLUMN id: 23, column_name: key, result_type: UInt64, source_id: 3 + SORT id: 24, sort_direction: ASCENDING, with_fill: 0 EXPRESSION - COLUMN id: 26, column_name: key, result_type: UInt64, source_id: 5 + COLUMN id: 25, column_name: key, result_type: UInt64, source_id: 5 SELECT key, a @@ -257,6 +257,28 @@ QUERY id: 0 LIST id: 11, nodes: 2 COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 COLUMN id: 4, column_name: a, result_type: UInt8, source_id: 3 +QUERY id: 0 + PROJECTION COLUMNS + key UInt64 + PROJECTION + LIST id: 1, nodes: 1 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + JOIN TREE + TABLE id: 3, table_name: default.test + GROUP BY + LIST id: 4, nodes: 1 + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 + ORDER BY + LIST id: 5, nodes: 2 + SORT id: 6, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + FUNCTION id: 7, function_name: avg, function_type: aggregate, result_type: Float64 + ARGUMENTS + LIST id: 8, nodes: 1 + COLUMN id: 9, column_name: a, result_type: UInt8, source_id: 3 + SORT id: 10, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 [0,1,2] [0,1,2] [0,1,2] diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql index 9e87b5e1da4..3573773b76c 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql @@ -29,6 +29,7 @@ EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a); EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, a, exp(key + a); EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a); +EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key; set optimize_redundant_functions_in_order_by = 0; From 09d9ac6c8ef253d12c34e393970f20a8205eef88 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Mon, 2 Jan 2023 19:09:45 +0000 Subject: [PATCH 088/262] Simplify code --- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index aaa777de13c..29724de0f20 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -15,24 +15,15 @@ namespace class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor { - - struct RedundancyVerdict - { - bool redundant = true; - bool done = false; - }; - - static constexpr RedundancyVerdict makeNonRedundant() noexcept { return { .redundant = false, .done = true }; } - std::unordered_set existing_keys; - RedundancyVerdict isRedundantExpression(FunctionNode * function) + bool isRedundantExpression(FunctionNode * function) { if (function->getArguments().getNodes().empty()) - return makeNonRedundant(); + return false; const auto & function_base = function->getFunction(); if (!function_base || !function_base->isDeterministicInScopeOfQuery()) - return makeNonRedundant(); + return false; // TODO: handle constants here for (auto & arg : function->getArguments().getNodes()) @@ -41,24 +32,23 @@ class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisito { case QueryTreeNodeType::FUNCTION: { - auto subresult = isRedundantExpression(arg->as()); - if (subresult.done) - return subresult; + if (!isRedundantExpression(arg->as())) + return false; break; } case QueryTreeNodeType::COLUMN: { auto * column = arg->as(); if (!existing_keys.contains(column->getColumnName())) - return makeNonRedundant(); + return false; break; } default: - return makeNonRedundant(); + return false; } } - return {}; + return true; } public: @@ -94,7 +84,7 @@ public: auto & order_by_expr = elem->as()->getExpression(); if (auto * expr = order_by_expr->as()) { - if (isRedundantExpression(expr).redundant) + if (isRedundantExpression(expr)) continue; } else if (auto * column = order_by_expr->as()) From 1f89db78a5876c718a6f865814d2f6bb58319d66 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 3 Jan 2023 11:00:39 +0100 Subject: [PATCH 089/262] Added is_create_parameterized_view flag in ActionsVisitor, added functions for column manipulation for parameterized view in StorageView, updated tests to fix flaky test issues and addressed small review comments- 40907 Parameterized views as table functions --- .../sql-reference/statements/create/view.md | 2 +- src/Interpreters/ActionsVisitor.cpp | 6 +- src/Interpreters/ActionsVisitor.h | 4 +- src/Interpreters/ExpressionAnalyzer.cpp | 19 ++- src/Interpreters/ExpressionAnalyzer.h | 7 +- .../TranslateQualifiedNamesVisitor.cpp | 22 ++-- src/Interpreters/TreeRewriter.cpp | 22 +--- src/Parsers/FunctionParameterValuesVisitor.h | 1 - src/Storages/StorageSnapshot.cpp | 17 +-- src/Storages/StorageView.cpp | 37 ++++++ src/Storages/StorageView.h | 2 + .../02428_parameterized_view.reference | 4 +- .../0_stateless/02428_parameterized_view.sql | 123 +++++++++--------- 13 files changed, 149 insertions(+), 117 deletions(-) diff --git a/docs/en/sql-reference/statements/create/view.md b/docs/en/sql-reference/statements/create/view.md index b69d09dd266..91f542be285 100644 --- a/docs/en/sql-reference/statements/create/view.md +++ b/docs/en/sql-reference/statements/create/view.md @@ -37,7 +37,7 @@ SELECT a, b, c FROM (SELECT ...) ``` ## Parameterized View -This is similar to normal view but can be created with parameter instead of literals and can be used as table functions by substituting the values of the parametes. +Parametrized views are similar to normal views, but can be created with parameters which are not resolved immediately. These views can be used with table functions, which specify the name of the view as function name and the parameter values as its arguments. ``` sql CREATE VIEW view AS SELECT * FROM TABLE WHERE Column1={column1:datatype1} and Column2={column2:datatype2} ... diff --git a/src/Interpreters/ActionsVisitor.cpp b/src/Interpreters/ActionsVisitor.cpp index 7e4fa5d168a..e1af752b100 100644 --- a/src/Interpreters/ActionsVisitor.cpp +++ b/src/Interpreters/ActionsVisitor.cpp @@ -538,7 +538,8 @@ ActionsMatcher::Data::Data( bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_) + bool build_expression_with_window_functions_, + bool is_create_parameterized_view_) : WithContext(context_) , set_size_limit(set_size_limit_) , subquery_depth(subquery_depth_) @@ -552,6 +553,7 @@ ActionsMatcher::Data::Data( , actions_stack(std::move(actions_dag), context_) , aggregation_keys_info(aggregation_keys_info_) , build_expression_with_window_functions(build_expression_with_window_functions_) + , is_create_parameterized_view(is_create_parameterized_view_) , next_unique_suffix(actions_stack.getLastActions().getOutputs().size() + 1) { } @@ -1205,7 +1207,7 @@ void ActionsMatcher::visit(const ASTFunction & node, const ASTPtr & ast, Data & argument_types.push_back(column.type); argument_names.push_back(column.name); } - else if (query_parameter) + else if (data.is_create_parameterized_view && query_parameter) { const auto data_type = DataTypeFactory::instance().get(query_parameter->type); ColumnWithTypeAndName column(data_type,query_parameter->getColumnName()); diff --git a/src/Interpreters/ActionsVisitor.h b/src/Interpreters/ActionsVisitor.h index fea013fd075..0269371b46e 100644 --- a/src/Interpreters/ActionsVisitor.h +++ b/src/Interpreters/ActionsVisitor.h @@ -134,6 +134,7 @@ public: ScopeStack actions_stack; AggregationKeysInfo aggregation_keys_info; bool build_expression_with_window_functions; + bool is_create_parameterized_view; /* * Remember the last unique column suffix to avoid quadratic behavior @@ -154,7 +155,8 @@ public: bool only_consts_, bool create_source_for_in_, AggregationKeysInfo aggregation_keys_info_, - bool build_expression_with_window_functions_ = false); + bool build_expression_with_window_functions_ = false, + bool is_create_parameterized_view_ = false); /// Does result of the calculation already exists in the block. bool hasColumn(const String & column_name) const; diff --git a/src/Interpreters/ExpressionAnalyzer.cpp b/src/Interpreters/ExpressionAnalyzer.cpp index 1220eca4e45..98c2a49dbaa 100644 --- a/src/Interpreters/ExpressionAnalyzer.cpp +++ b/src/Interpreters/ExpressionAnalyzer.cpp @@ -159,11 +159,13 @@ ExpressionAnalyzer::ExpressionAnalyzer( size_t subquery_depth_, bool do_global, bool is_explain, - PreparedSetsPtr prepared_sets_) + PreparedSetsPtr prepared_sets_, + bool is_create_parameterized_view_) : WithContext(context_) , query(query_), settings(getContext()->getSettings()) , subquery_depth(subquery_depth_) , syntax(syntax_analyzer_result_) + , is_create_parameterized_view(is_create_parameterized_view_) { /// Cache prepared sets because we might run analysis multiple times if (prepared_sets_) @@ -556,7 +558,8 @@ void ExpressionAnalyzer::getRootActions(const ASTPtr & ast, bool no_makeset_for_ only_consts, !isRemoteStorage() /* create_source_for_in */, getAggregationKeysInfo(), - false /* build_expression_with_window_functions */); + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -575,7 +578,9 @@ void ExpressionAnalyzer::getRootActionsNoMakeSet(const ASTPtr & ast, ActionsDAGP true /* no_makeset */, only_consts, !isRemoteStorage() /* create_source_for_in */, - getAggregationKeysInfo()); + getAggregationKeysInfo(), + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -596,7 +601,9 @@ void ExpressionAnalyzer::getRootActionsForHaving( false /* no_makeset */, only_consts, true /* create_source_for_in */, - getAggregationKeysInfo()); + getAggregationKeysInfo(), + false /* build_expression_with_window_functions */, + is_create_parameterized_view); ActionsVisitor(visitor_data, log.stream()).visit(ast); actions = visitor_data.getActions(); } @@ -1319,7 +1326,7 @@ bool SelectQueryExpressionAnalyzer::appendWhere(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(columns_after_join); - getRootActions(select_query->where(), only_types, step.actions(), false/*only_consts*/); + getRootActions(select_query->where(), only_types, step.actions()); auto where_column_name = select_query->where()->getColumnName(); step.addRequiredOutput(where_column_name); @@ -1525,7 +1532,7 @@ void SelectQueryExpressionAnalyzer::appendSelect(ExpressionActionsChain & chain, ExpressionActionsChain::Step & step = chain.lastStep(aggregated_columns); - getRootActions(select_query->select(), only_types, step.actions(), false /*only_consts*/); + getRootActions(select_query->select(), only_types, step.actions()); for (const auto & child : select_query->select()->children) appendSelectSkipWindowExpressions(step, child); diff --git a/src/Interpreters/ExpressionAnalyzer.h b/src/Interpreters/ExpressionAnalyzer.h index ddb41a00f84..ba188fb0198 100644 --- a/src/Interpreters/ExpressionAnalyzer.h +++ b/src/Interpreters/ExpressionAnalyzer.h @@ -158,13 +158,15 @@ protected: size_t subquery_depth_, bool do_global_, bool is_explain_, - PreparedSetsPtr prepared_sets_); + PreparedSetsPtr prepared_sets_, + bool is_create_parameterized_view_ = false); ASTPtr query; const ExtractedSettings settings; size_t subquery_depth; TreeRewriterResultPtr syntax; + bool is_create_parameterized_view; const ConstStoragePtr & storage() const { return syntax->storage; } /// The main table in FROM clause, if exists. const TableJoin & analyzedJoin() const { return *syntax->analyzed_join; } @@ -318,7 +320,8 @@ public: options_.subquery_depth, do_global_, options_.is_explain, - prepared_sets_) + prepared_sets_, + options_.is_create_parameterized_view) , metadata_snapshot(metadata_snapshot_) , required_result_columns(required_result_columns_) , query_options(options_) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index ff97eccab58..e4ffa1ef3f1 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -20,6 +20,7 @@ #include #include #include +#include namespace DB @@ -251,20 +252,13 @@ void TranslateQualifiedNamesMatcher::visit(ASTExpressionList & node, const ASTPt if (first_table || !data.join_using_columns.contains(column.name)) { std::string column_name = column.name; - std::string::size_type pos = 0u; - for (const auto & parameter : data.parameter_values) - { - if ((pos = column_name.find(parameter.first)) != std::string::npos) - { - auto parameter_datatype_iterator = data.parameter_types.find(parameter.first); - if (parameter_datatype_iterator != data.parameter_types.end()) - { - String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); - column_name.replace(pos, parameter.first.size(), parameter_name); - break; - } - } - } + + /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters + /// and SELECT is used with substitution of these query parameters ) + if (!data.parameter_values.empty()) + column_name + = StorageView::replaceQueryParameterWithValue(column_name, data.parameter_values, data.parameter_types); + addIdentifier(columns, table.table, column_name); } } diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 22df8c1cbe7..bd3472d5dc1 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -53,6 +53,7 @@ #include #include #include +#include #include @@ -1395,26 +1396,13 @@ TreeRewriterResultPtr TreeRewriter::analyzeSelect( result.window_function_asts = getWindowFunctions(query, *select_query); result.expressions_with_window_function = getExpressionsWithWindowFunctions(query); + /// replaceQueryParameterWithValue is used for parameterized view (which are created using query parameters + /// and SELECT is used with substitution of these query parameters ) + /// the replaced column names will be used in the next steps if (is_parameterized_view) { for (auto & column : result.source_columns) - { - std::string column_name = column.name; - std::string::size_type pos = 0u; - for (auto & parameter : parameter_values) - { - if ((pos = column_name.find(parameter.first)) != std::string::npos) - { - auto parameter_datatype_iterator = parameter_types.find(parameter.first); - if (parameter_datatype_iterator != parameter_types.end()) - { - String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); - column.name.replace(pos, parameter.first.size(), parameter_name); - break; - } - } - } - } + column.name = StorageView::replaceQueryParameterWithValue(column.name, parameter_values, parameter_types); } result.collectUsedColumns(query, true, settings.query_plan_optimize_primary_key); diff --git a/src/Parsers/FunctionParameterValuesVisitor.h b/src/Parsers/FunctionParameterValuesVisitor.h index f87257fc979..e6ce0e42d06 100644 --- a/src/Parsers/FunctionParameterValuesVisitor.h +++ b/src/Parsers/FunctionParameterValuesVisitor.h @@ -1,6 +1,5 @@ #pragma once -#include #include #include diff --git a/src/Storages/StorageSnapshot.cpp b/src/Storages/StorageSnapshot.cpp index 00f5160ae11..31770c9a32b 100644 --- a/src/Storages/StorageSnapshot.cpp +++ b/src/Storages/StorageSnapshot.cpp @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace DB @@ -112,25 +113,19 @@ NameAndTypePair StorageSnapshot::getColumn(const GetColumnsOptions & options, co return *column; } -Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names,const NameToNameMap & parameter_values) const +Block StorageSnapshot::getSampleBlockForColumns(const Names & column_names, const NameToNameMap & parameter_values) const { Block res; const auto & columns = getMetadataForQuery()->getColumns(); for (const auto & column_name : column_names) { + std::string substituted_column_name = column_name; + /// substituted_column_name is used for parameterized view (which are created using query parameters /// and SELECT is used with substitution of these query parameters ) - std::string substituted_column_name = column_name; - std::string::size_type pos = 0u; - for (const auto & parameter : parameter_values) - { - if ((pos = substituted_column_name.find("_CAST(" + parameter.second)) != std::string::npos) - { - substituted_column_name = substituted_column_name.substr(0,pos) + parameter.first + ")"; - break; - } - } + if (!parameter_values.empty()) + substituted_column_name = StorageView::replaceValueWithQueryParameter(column_name, parameter_values); auto column = columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); auto object_column = object_columns.tryGetColumnOrSubcolumn(GetColumnsOptions::All, substituted_column_name); diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index 2446659cebc..df74def509d 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -252,6 +252,43 @@ void StorageView::replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_ child = view_query; } +String StorageView::replaceQueryParameterWithValue(const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types) +{ + std::string name = column_name; + std::string::size_type pos = 0u; + for (const auto & parameter : parameter_values) + { + if ((pos = name.find(parameter.first)) != std::string::npos) + { + auto parameter_datatype_iterator = parameter_types.find(parameter.first); + if (parameter_datatype_iterator != parameter_types.end()) + { + String parameter_name("_CAST(" + parameter.second + ", '" + parameter_datatype_iterator->second + "')"); + name.replace(pos, parameter.first.size(), parameter_name); + break; + } + else + throw Exception("Datatype not found for query parameter " + parameter.first, ErrorCodes::LOGICAL_ERROR); + } + } + return name; +} + +String StorageView::replaceValueWithQueryParameter(const String & column_name, const NameToNameMap & parameter_values) +{ + String name = column_name; + std::string::size_type pos = 0u; + for (const auto & parameter : parameter_values) + { + if ((pos = name.find("_CAST(" + parameter.second)) != std::string::npos) + { + name = name.substr(0,pos) + parameter.first + ")"; + break; + } + } + return name; +} + ASTPtr StorageView::restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name) { ASTTableExpression * table_expression = getFirstTableExpression(select_query); diff --git a/src/Storages/StorageView.h b/src/Storages/StorageView.h index 756106a95d1..6cd4bb171f5 100644 --- a/src/Storages/StorageView.h +++ b/src/Storages/StorageView.h @@ -44,6 +44,8 @@ public: static void replaceWithSubquery(ASTSelectQuery & outer_query, ASTPtr view_query, ASTPtr & view_name, const bool parameterized_view); static ASTPtr restoreViewName(ASTSelectQuery & select_query, const ASTPtr & view_name); + static String replaceQueryParameterWithValue (const String & column_name, const NameToNameMap & parameter_values, const NameToNameMap & parameter_types); + static String replaceValueWithQueryParameter (const String & column_name, const NameToNameMap & parameter_values); void setParameterValues (NameToNameMap parameter_values_) { diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index 38355617601..da3ad8a9a3c 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -9,9 +9,9 @@ SELECT FROM ( SELECT * - FROM default.Catalog + FROM default.test_02428_Catalog WHERE Price = _CAST(10, \'UInt64\') -) AS pv1 +) AS test_02428_pv1 50 10 20 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index b2d4f99a5f1..feedaed0c44 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -1,87 +1,90 @@ -DROP VIEW IF EXISTS pv1; -DROP VIEW IF EXISTS pv2; -DROP VIEW IF EXISTS pv3; -DROP VIEW IF EXISTS pv4; -DROP VIEW IF EXISTS pv5; -DROP VIEW IF EXISTS pv6; -DROP VIEW IF EXISTS pv7; -DROP VIEW IF EXISTS v1; -DROP TABLE IF EXISTS Catalog; -DROP TABLE IF EXISTS system.pv1; -DROP TABLE IF EXISTS system.Catalog; +-- Tags: no-parallel -CREATE TABLE Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; +DROP VIEW IF EXISTS test_02428_pv1; +DROP VIEW IF EXISTS test_02428_pv2; +DROP VIEW IF EXISTS test_02428_pv3; +DROP VIEW IF EXISTS test_02428_pv4; +DROP VIEW IF EXISTS test_02428_pv5; +DROP VIEW IF EXISTS test_02428_pv6; +DROP VIEW IF EXISTS test_02428_pv7; +DROP VIEW IF EXISTS test_02428_v1; +DROP TABLE IF EXISTS test_02428_Catalog; +DROP TABLE IF EXISTS db_02428.pv1; +DROP TABLE IF EXISTS db_02428.Catalog; +DROP DATABASE IF EXISTS db_02428; -INSERT INTO Catalog VALUES ('Pen', 10, 3); -INSERT INTO Catalog VALUES ('Book', 50, 2); -INSERT INTO Catalog VALUES ('Paper', 20, 1); +CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; -CREATE VIEW pv1 AS SELECT * FROM Catalog WHERE Price={price:UInt64}; -SELECT Price FROM pv1(price=20); -SELECT Price FROM `pv1`(price=20); +INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3); +INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2); +INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1); + +CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64}; +SELECT Price FROM test_02428_pv1(price=20); +SELECT Price FROM `test_02428_pv1`(price=20); set param_p=10; -SELECT Price FROM pv1; -- { serverError UNKNOWN_QUERY_PARAMETER} -SELECT Price FROM pv1(price={p:UInt64}); +SELECT Price FROM test_02428_pv1; -- { serverError UNKNOWN_QUERY_PARAMETER} +SELECT Price FROM test_02428_pv1(price={p:UInt64}); set param_l=1; -SELECT Price FROM pv1(price=50) LIMIT ({l:UInt64}); +SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64}); -DETACH TABLE pv1; -ATTACH TABLE pv1; +DETACH TABLE test_02428_pv1; +ATTACH TABLE test_02428_pv1; -EXPLAIN SYNTAX SELECT * from pv1(price=10); +EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10); -INSERT INTO pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED} +INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED} SELECT Price FROM pv123(price=20); -- { serverError UNKNOWN_FUNCTION } -CREATE VIEW v1 AS SELECT * FROM Catalog WHERE Price=10; +CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10; -SELECT Price FROM v1(price=10); -- { serverError UNKNOWN_FUNCTION } +SELECT Price FROM test_02428_v1(price=10); -- { serverError UNKNOWN_FUNCTION } -CREATE VIEW pv2 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; -SELECT Price FROM pv2(price=50,quantity=2); +CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; +SELECT Price FROM test_02428_pv2(price=50,quantity=2); -SELECT Price FROM pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} +SELECT Price FROM test_02428_pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} -CREATE VIEW pv3 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity=3; -SELECT Price FROM pv3(price=10); +CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3; +SELECT Price FROM test_02428_pv3(price=10); -CREATE VIEW pv4 AS SELECT * FROM Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN} +CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN} -CREATE DATABASE test_02428; +CREATE DATABASE db_02428; -CREATE TABLE test_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; +CREATE TABLE db_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; -INSERT INTO test_02428.Catalog VALUES ('Pen', 10, 3); -INSERT INTO test_02428.Catalog VALUES ('Book', 50, 2); -INSERT INTO test_02428.Catalog VALUES ('Paper', 20, 1); +INSERT INTO db_02428.Catalog VALUES ('Pen', 10, 3); +INSERT INTO db_02428.Catalog VALUES ('Book', 50, 2); +INSERT INTO db_02428.Catalog VALUES ('Paper', 20, 1); -CREATE VIEW test_02428.pv1 AS SELECT * FROM test_02428.Catalog WHERE Price={price:UInt64}; -SELECT Price FROM test_02428.pv1(price=20); -SELECT Price FROM `test_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION } +CREATE VIEW db_02428.pv1 AS SELECT * FROM db_02428.Catalog WHERE Price={price:UInt64}; +SELECT Price FROM db_02428.pv1(price=20); +SELECT Price FROM `db_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION } -INSERT INTO Catalog VALUES ('Book2', 30, 8); -INSERT INTO Catalog VALUES ('Book3', 30, 8); +INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8); +INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8); -CREATE VIEW pv5 AS SELECT Price FROM Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}; -SELECT Price FROM pv5(price=30, quantity=8,limit=1); +CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}; +SELECT Price FROM test_02428_pv5(price=30, quantity=8,limit=1); -CREATE VIEW pv6 AS SELECT Price+{price:UInt64} FROM Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}; -SELECT * FROM pv6(price=10); +CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}; +SELECT * FROM test_02428_pv6(price=10); -CREATE VIEW pv7 AS SELECT Price/{price:UInt64} FROM Catalog ORDER BY Price; -SELECT * FROM pv7(price=10); +CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price; +SELECT * FROM test_02428_pv7(price=10); -DROP VIEW pv1; -DROP VIEW pv2; -DROP VIEW pv3; -DROP VIEW pv5; -DROP VIEW pv6; -DROP VIEW pv7; -DROP VIEW v1; -DROP TABLE Catalog; -DROP TABLE test_02428.pv1; -DROP TABLE test_02428.Catalog; -DROP DATABASE test_02428; \ No newline at end of file +DROP VIEW test_02428_pv1; +DROP VIEW test_02428_pv2; +DROP VIEW test_02428_pv3; +DROP VIEW test_02428_pv5; +DROP VIEW test_02428_pv6; +DROP VIEW test_02428_pv7; +DROP VIEW test_02428_v1; +DROP TABLE test_02428_Catalog; +DROP TABLE db_02428.pv1; +DROP TABLE db_02428.Catalog; +DROP DATABASE db_02428; \ No newline at end of file From 11fa29d243fd45e0944b7e06b4c7f53a76238a07 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 3 Jan 2023 17:59:04 +0000 Subject: [PATCH 090/262] Get rid of recursion --- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 101 ++++++++++-------- 1 file changed, 58 insertions(+), 43 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index 29724de0f20..8136052cbd5 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -15,42 +15,6 @@ namespace class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor { - std::unordered_set existing_keys; - - bool isRedundantExpression(FunctionNode * function) - { - if (function->getArguments().getNodes().empty()) - return false; - const auto & function_base = function->getFunction(); - if (!function_base || !function_base->isDeterministicInScopeOfQuery()) - return false; - - // TODO: handle constants here - for (auto & arg : function->getArguments().getNodes()) - { - switch (arg->getNodeType()) - { - case QueryTreeNodeType::FUNCTION: - { - if (!isRedundantExpression(arg->as())) - return false; - break; - } - case QueryTreeNodeType::COLUMN: - { - auto * column = arg->as(); - if (!existing_keys.contains(column->getColumnName())) - return false; - break; - } - default: - return false; - } - } - - return true; - } - public: bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/) { @@ -82,14 +46,22 @@ public: for (auto & elem : order_by.getNodes()) { auto & order_by_expr = elem->as()->getExpression(); - if (auto * expr = order_by_expr->as()) + switch (order_by_expr->getNodeType()) { - if (isRedundantExpression(expr)) - continue; - } - else if (auto * column = order_by_expr->as()) - { - existing_keys.insert(column->getColumnName()); + case QueryTreeNodeType::FUNCTION: + { + if (isRedundantExpression(order_by_expr)) + continue; + break; + } + case QueryTreeNodeType::COLUMN: + { + auto * column = order_by_expr->as(); + existing_keys.insert(column->getColumnName()); + break; + } + default: + break; } new_order_by_nodes.push_back(elem); @@ -99,6 +71,49 @@ public: if (new_order_by_nodes.size() < order_by.getNodes().size()) order_by.getNodes() = std::move(new_order_by_nodes); } + +private: + std::unordered_set existing_keys; + + bool isRedundantExpression(QueryTreeNodePtr function) + { + QueryTreeNodes nodes_to_process{ function }; + while (!nodes_to_process.empty()) + { + auto node = nodes_to_process.back(); + nodes_to_process.pop_back(); + + // TODO: handle constants here + switch (node->getNodeType()) + { + case QueryTreeNodeType::FUNCTION: + { + auto * function_node = node->as(); + const auto & function_arguments = function_node->getArguments().getNodes(); + if (function_arguments.empty()) + return false; + const auto & function_base = function_node->getFunction(); + if (!function_base || !function_base->isDeterministicInScopeOfQuery()) + return false; + + // Process arguments in order + for (auto it = function_arguments.rbegin(); it != function_arguments.rend(); ++it) + nodes_to_process.push_back(*it); + break; + } + case QueryTreeNodeType::COLUMN: + { + auto * column = node->as(); + if (!existing_keys.contains(column->getColumnName())) + return false; + break; + } + default: + return false; + } + } + return true; + } }; } From 15f20cb4e87531e364bc4cd5e34f745fd5387245 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 3 Jan 2023 18:13:31 +0000 Subject: [PATCH 091/262] Fix column comparison --- ...ptimizeRedundantFunctionsInOrderByPass.cpp | 10 +++---- ..._redundant_functions_in_order_by.reference | 28 +++++++++++++++++++ .../01323_redundant_functions_in_order_by.sql | 9 ++++++ 3 files changed, 41 insertions(+), 6 deletions(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index 8136052cbd5..105fc0ef00a 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -1,7 +1,7 @@ -#include #include #include #include +#include #include #include #include @@ -56,8 +56,7 @@ public: } case QueryTreeNodeType::COLUMN: { - auto * column = order_by_expr->as(); - existing_keys.insert(column->getColumnName()); + existing_keys.insert(order_by_expr); break; } default: @@ -73,7 +72,7 @@ public: } private: - std::unordered_set existing_keys; + QueryTreeNodePtrWithHashSet existing_keys; bool isRedundantExpression(QueryTreeNodePtr function) { @@ -103,8 +102,7 @@ private: } case QueryTreeNodeType::COLUMN: { - auto * column = node->as(); - if (!existing_keys.contains(column->getColumnName())) + if (!existing_keys.contains(node)) return false; break; } diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference index ae160ed35d6..c69f8bb2c46 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.reference @@ -279,6 +279,34 @@ QUERY id: 0 SORT id: 10, sort_direction: ASCENDING, with_fill: 0 EXPRESSION COLUMN id: 2, column_name: key, result_type: UInt64, source_id: 3 +QUERY id: 0 + PROJECTION COLUMNS + t1.id UInt64 + t2.id UInt64 + PROJECTION + LIST id: 1, nodes: 2 + COLUMN id: 2, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 4, column_name: id, result_type: UInt64, source_id: 5 + JOIN TREE + JOIN id: 6, strictness: ALL, kind: INNER + LEFT TABLE EXPRESSION + TABLE id: 3, table_name: default.t1 + RIGHT TABLE EXPRESSION + TABLE id: 5, table_name: default.t2 + JOIN EXPRESSION + FUNCTION id: 7, function_name: equals, function_type: ordinary, result_type: UInt8 + ARGUMENTS + LIST id: 8, nodes: 2 + COLUMN id: 9, column_name: id, result_type: UInt64, source_id: 3 + COLUMN id: 10, column_name: id, result_type: UInt64, source_id: 5 + ORDER BY + LIST id: 11, nodes: 2 + SORT id: 12, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 13, column_name: id, result_type: UInt64, source_id: 3 + SORT id: 14, sort_direction: ASCENDING, with_fill: 0 + EXPRESSION + COLUMN id: 15, column_name: id, result_type: UInt64, source_id: 5 [0,1,2] [0,1,2] [0,1,2] diff --git a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql index 3573773b76c..5cdc4164d56 100644 --- a/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql +++ b/tests/queries/0_stateless/01323_redundant_functions_in_order_by.sql @@ -31,6 +31,13 @@ EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); EXPLAIN QUERY TREE run_passes=1 SELECT key, a FROM test ORDER BY key, exp(key + a); EXPLAIN QUERY TREE run_passes=1 SELECT key FROM test GROUP BY key ORDER BY avg(a), key; +DROP TABLE IF EXISTS t1; +DROP TABLE IF EXISTS t2; +CREATE TABLE t1 (id UInt64) ENGINE = MergeTree() ORDER BY id; +CREATE TABLE t2 (id UInt64) ENGINE = MergeTree() ORDER BY id; + +EXPLAIN QUERY TREE run_passes=1 SELECT * FROM t1 INNER JOIN t2 ON t1.id = t2.id ORDER BY t1.id, t2.id; + set optimize_redundant_functions_in_order_by = 0; SELECT groupArray(x) from (SELECT number as x FROM numbers(3) ORDER BY x, exp(x)); @@ -46,4 +53,6 @@ EXPLAIN SYNTAX SELECT * FROM (SELECT number + 2 AS key FROM numbers(4)) s FULL J EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, a, exp(key + a); EXPLAIN SYNTAX SELECT key, a FROM test ORDER BY key, exp(key + a); +DROP TABLE t1; +DROP TABLE t2; DROP TABLE test; From 0e743254dfd253cba19c5772c23398cf969640ae Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Wed, 4 Jan 2023 08:19:15 +0100 Subject: [PATCH 092/262] Removed no-parallel tag from test and removed an exception from StorageView - 40907 Parameterized views as table functions --- src/Storages/StorageView.cpp | 2 -- tests/queries/0_stateless/02428_parameterized_view.sql | 2 -- 2 files changed, 4 deletions(-) diff --git a/src/Storages/StorageView.cpp b/src/Storages/StorageView.cpp index df74def509d..13202d8f782 100644 --- a/src/Storages/StorageView.cpp +++ b/src/Storages/StorageView.cpp @@ -267,8 +267,6 @@ String StorageView::replaceQueryParameterWithValue(const String & column_name, c name.replace(pos, parameter.first.size(), parameter_name); break; } - else - throw Exception("Datatype not found for query parameter " + parameter.first, ErrorCodes::LOGICAL_ERROR); } } return name; diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql index feedaed0c44..fbc1d8b2970 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ b/tests/queries/0_stateless/02428_parameterized_view.sql @@ -1,5 +1,3 @@ --- Tags: no-parallel - DROP VIEW IF EXISTS test_02428_pv1; DROP VIEW IF EXISTS test_02428_pv2; DROP VIEW IF EXISTS test_02428_pv3; From 6f0c0252f03dc504b454c6a36d792ea1f4da2363 Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 4 Jan 2023 14:55:32 +0000 Subject: [PATCH 093/262] Fix tests --- tests/queries/0_stateless/02500_numbers_inference.reference | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/02500_numbers_inference.reference b/tests/queries/0_stateless/02500_numbers_inference.reference index bff7211f66a..7e1bb6510bb 100644 --- a/tests/queries/0_stateless/02500_numbers_inference.reference +++ b/tests/queries/0_stateless/02500_numbers_inference.reference @@ -16,5 +16,5 @@ c1 Nullable(Float64) c1 Nullable(Float64) c1 Array(Nullable(Float64)) c1 Array(Nullable(Float64)) -c1 Array(Nullable(Float64)) -c1 Array(Nullable(Float64)) +c1 Nullable(String) +c1 Nullable(String) From 1f3d75cbf256c493b248dcddfb729fd0e4fb55fc Mon Sep 17 00:00:00 2001 From: avogar Date: Wed, 4 Jan 2023 14:58:17 +0000 Subject: [PATCH 094/262] Better --- src/Formats/SchemaInferenceUtils.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Formats/SchemaInferenceUtils.cpp b/src/Formats/SchemaInferenceUtils.cpp index 5db9b04a6c1..6d0853f6169 100644 --- a/src/Formats/SchemaInferenceUtils.cpp +++ b/src/Formats/SchemaInferenceUtils.cpp @@ -98,14 +98,14 @@ namespace void transformIntegersAndFloatsToFloats(DataTypes & data_types, TypeIndexesSet & type_indexes) { bool have_floats = type_indexes.contains(TypeIndex::Float64); - bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); + bool have_integers = type_indexes.contains(TypeIndex::Int64) || type_indexes.contains(TypeIndex::UInt64); if (!have_integers || !have_floats) return; for (auto & type : data_types) { WhichDataType which(type); - if (which.isFloat64() || which.isInt64() || which.isUInt64()) + if (which.isInt64() || which.isUInt64()) type = std::make_shared(); } From 712de132d9a9927e8e93e079e81acccbb441cc6c Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Wed, 4 Jan 2023 16:05:05 +0000 Subject: [PATCH 095/262] Fix special build --- src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp index 105fc0ef00a..8c9db191bbd 100644 --- a/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp +++ b/src/Analyzer/Passes/OptimizeRedundantFunctionsInOrderByPass.cpp @@ -16,7 +16,7 @@ namespace class OptimizeRedundantFunctionsInOrderByVisitor : public InDepthQueryTreeVisitor { public: - bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/) + static bool needChildVisit(QueryTreeNodePtr & node, QueryTreeNodePtr & /*parent*/) { if (node->as()) return false; From baf6297f1d9686f7a1fe949ebab405317a1b9722 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 4 Jan 2023 21:50:57 +0000 Subject: [PATCH 096/262] add fast and cancellable shared_mutex alternatives --- src/Common/ErrorCodes.cpp | 1 + src/Common/Threading.cpp | 511 +++++++++++++++++++++++++++ src/Common/Threading.h | 282 +++++++++++++++ src/Common/tests/gtest_threading.cpp | 369 +++++++++++++++++++ 4 files changed, 1163 insertions(+) create mode 100644 src/Common/Threading.cpp create mode 100644 src/Common/Threading.h create mode 100644 src/Common/tests/gtest_threading.cpp diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 95333eccbcd..dec63d114eb 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -643,6 +643,7 @@ M(672, INVALID_SCHEDULER_NODE) \ M(673, RESOURCE_ACCESS_DENIED) \ M(674, RESOURCE_NOT_FOUND) \ + M(675, THREAD_WAS_CANCELLED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp new file mode 100644 index 00000000000..4d135ef93a6 --- /dev/null +++ b/src/Common/Threading.cpp @@ -0,0 +1,511 @@ +#include +#include + +#ifdef OS_LINUX /// Because of 'sigqueue' functions, RT signals and futex. + +#include + +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int THREAD_WAS_CANCELLED; +} + +namespace +{ + inline long futexWait(void * address, UInt32 value) + { + return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); + } + + inline long futexWake(void * address, int count) + { + return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); + } + + // inline void waitFetch(std::atomic & address, UInt32 & value) + // { + // futexWait(&address, value); + // value = address.load(); + // } + + // inline void wakeOne(std::atomic & address) + // { + // futexWake(&address, 1); + // } + + // inline void wakeAll(std::atomic & address) + // { + // futexWake(&address, INT_MAX); + // } + + inline constexpr UInt32 lowerValue(UInt64 value) + { + return UInt32(value & 0xffffffffull); + } + + inline constexpr UInt32 upperValue(UInt64 value) + { + return UInt32(value >> 32ull); + } + + inline UInt32 * lowerAddress(void * address) + { + return reinterpret_cast(address) + (std::endian::native == std::endian::big); + } + + inline UInt32 * upperAddress(void * address) + { + return reinterpret_cast(address) + (std::endian::native == std::endian::little); + } + + inline void waitLowerFetch(std::atomic & address, UInt64 & value) + { + futexWait(lowerAddress(&address), lowerValue(value)); + value = address.load(); + } + + inline bool cancellableWaitLowerFetch(std::atomic & address, UInt64 & value) + { + bool res = CancelToken::local().wait(lowerAddress(&address), lowerValue(value)); + value = address.load(); + return res; + } + + inline void wakeLowerOne(std::atomic & address) + { + syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); + } + + // inline void wakeLowerAll(std::atomic & address) + // { + // syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); + // } + + inline void waitUpperFetch(std::atomic & address, UInt64 & value) + { + futexWait(upperAddress(&address), upperValue(value)); + value = address.load(); + } + + inline bool cancellableWaitUpperFetch(std::atomic & address, UInt64 & value) + { + bool res = CancelToken::local().wait(upperAddress(&address), upperValue(value)); + value = address.load(); + return res; + } + + // inline void wakeUpperOne(std::atomic & address) + // { + // syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); + // } + + inline void wakeUpperAll(std::atomic & address) + { + syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); + } +} + +CancelToken::Registry::Registry() +{ + // setupCancelSignalHandler(); +} + +void CancelToken::Registry::insert(CancelToken * token) +{ + std::lock_guard lock(mutex); + threads[token->thread_id] = token; +} + +void CancelToken::Registry::remove(CancelToken * token) +{ + std::lock_guard lock(mutex); + threads.erase(token->thread_id); +} + +void CancelToken::Registry::signal(UInt64 tid) +{ + std::lock_guard lock(mutex); + if (auto it = threads.find(tid); it != threads.end()) + it->second->signalImpl(); +} + +void CancelToken::Registry::signal(UInt64 tid, int code, const String & message) +{ + std::lock_guard lock(mutex); + if (auto it = threads.find(tid); it != threads.end()) + it->second->signalImpl(code, message); +} + +CancelToken::Registry & CancelToken::Registry::instance() +{ + static Registry registry; + return registry; +} + +CancelToken::CancelToken() + : state(disabled) + , thread_id(getThreadId()) +{ + Registry::instance().insert(this); +} + +CancelToken::~CancelToken() +{ + Registry::instance().remove(this); +} + +void CancelToken::signal(UInt64 tid) +{ + Registry::instance().signal(tid); +} + +void CancelToken::signal(UInt64 tid, int code, const String & message) +{ + Registry::instance().signal(tid, code, message); +} + +bool CancelToken::wait(UInt32 * address, UInt32 value) +{ + chassert((reinterpret_cast(address) & canceled) == 0); // An `address` must be 2-byte aligned + if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread + { + // static std::atomic x{0}; + // if (x++ > 5) + // sleep(3600); + return true; // Spin-wait unless signal is handled + } + + UInt64 s = state.load(); + while (true) + { + DBG("s={}", s); + if (s & disabled) + { + // Start non-cancellable wait on futex. Spurious wake-up is possible. + futexWait(address, value); + return true; // Disabled - true is forced + } + if (s & canceled) + return false; // Has already been canceled + if (state.compare_exchange_strong(s, reinterpret_cast(address))) + break; // This futex has been "acquired" by this token + } + + // Start cancellable wait. Spurious wake-up is possible. + DBG("start cancellable wait address={} value={}", static_cast(address), value); + futexWait(address, value); + + // "Release" futex and check for cancellation + s = state.load(); + while (true) + { + DBG("finish cancellable wait, s={}", s); + chassert((s & disabled) != disabled); // `disable()` must not be called from another thread + if (s & canceled) + { + if (s == canceled) + break; // Signaled; futex "release" has been done by the signaling thread + else + { + s = state.load(); + continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish + } + } + if (state.compare_exchange_strong(s, 0)) + return true; // There was no cancellation; futex "released" + } + + // Reset signaled bit + reinterpret_cast *>(address)->fetch_and(~signaled); + return false; +} + +void CancelToken::raise() +{ + std::unique_lock lock(signal_mutex); + DBG("raise code={} msg={}", exception_code, exception_message); + if (exception_code != 0) + throw DB::Exception( + std::exchange(exception_code, 0), + std::exchange(exception_message, {})); + else + throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled"); +} + +void CancelToken::notifyOne(UInt32 * address) +{ + futexWake(address, 1); +} + +void CancelToken::notifyAll(UInt32 * address) +{ + futexWake(address, INT_MAX); +} + +void CancelToken::signalImpl() +{ + signalImpl(0, {}); +} + +std::mutex CancelToken::signal_mutex; + +void CancelToken::signalImpl(int code, const String & message) +{ + // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls + std::unique_lock lock(signal_mutex); + + UInt64 s = state.load(); + while (true) + { + DBG("s={}", s); + if (s & canceled) + return; // Already cancelled - don't signal twice + if (state.compare_exchange_strong(s, s | canceled)) + break; // It is the cancelling thread - should deliver signal if necessary + } + + DBG("cancel tid={} code={} msg={}", thread_id, code, message); + exception_code = code; + exception_message = message; + + if ((s & disabled) == disabled) + return; // Cancellation is disabled - just signal token for later, but don't wake + std::atomic * address = reinterpret_cast *>(s & disabled); + DBG("address={}", static_cast(address)); + if (address == nullptr) + return; // Thread is currently not waiting on futex - wake-up not required + + // Set signaled bit + UInt32 value = address->load(); + while (true) + { + if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter + value = address->load(); + else if (address->compare_exchange_strong(value, value | signaled)) + break; + } + + // Wake all threads waiting on `address`, one of them will be cancelled and others will get spurious wake-ups + // Woken canceled thread will reset signaled bit + DBG("wake"); + futexWake(address, INT_MAX); + + // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return. + // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed. + state.store(canceled); +} + +Cancellable::Cancellable() +{ + CancelToken::local().reset(); +} + +Cancellable::~Cancellable() +{ + CancelToken::local().disable(); +} + +NotCancellable::NotCancellable() +{ + CancelToken::local().disable(); +} + +NotCancellable::~NotCancellable() +{ + CancelToken::local().enable(); +} + +CancellableSharedMutex::CancellableSharedMutex() + : state(0) + , waiters(0) +{} + +void CancellableSharedMutex::lock() +{ + UInt64 value = state.load(); + while (true) + { + DBG("#A r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); + if (value & writers) + { + waiters++; + if (!cancellableWaitUpperFetch(state, value)) + { + waiters--; + CancelToken::local().raise(); + } + else + waiters--; + } + else if (state.compare_exchange_strong(value, value | writers)) + break; + } + + value |= writers; + while (value & readers) + { + DBG("#B r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); + if (!cancellableWaitLowerFetch(state, value)) + { + state.fetch_and(~writers); + wakeUpperAll(state); + CancelToken::local().raise(); + } + } +} + +bool CancellableSharedMutex::try_lock() +{ + UInt64 value = state.load(); + if ((value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers)) + return true; + return false; +} + +void CancellableSharedMutex::unlock() +{ + UInt64 value = state.fetch_and(~writers); + DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); + if (waiters) + wakeUpperAll(state); +} + +void CancellableSharedMutex::lock_shared() +{ + UInt64 value = state.load(); + while (true) + { + DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); + if (value & writers) + { + waiters++; + if (!cancellableWaitUpperFetch(state, value)) + { + waiters--; + CancelToken::local().raise(); + } + else + waiters--; + } + else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic + break; + } +} + +bool CancellableSharedMutex::try_lock_shared() +{ + UInt64 value = state.load(); + if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic + return true; + return false; +} + +void CancellableSharedMutex::unlock_shared() +{ + UInt64 value = state.fetch_sub(1) - 1; + DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); + if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers + wakeLowerOne(state); // Wake writer +} + +FastSharedMutex::FastSharedMutex() + : state(0) + , waiters(0) +{} + +void FastSharedMutex::lock() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + waitUpperFetch(state, value); + waiters--; + } + else if (state.compare_exchange_strong(value, value | writers)) + break; + } + + value |= writers; + while (value & readers) + waitLowerFetch(state, value); +} + +bool FastSharedMutex::try_lock() +{ + UInt64 value = 0; + if (state.compare_exchange_strong(value, writers)) + return true; + return false; +} + +void FastSharedMutex::unlock() +{ + state.store(0); + if (waiters) + wakeUpperAll(state); +} + +void FastSharedMutex::lock_shared() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + waitUpperFetch(state, value); + waiters--; + } + else if (state.compare_exchange_strong(value, value + 1)) + break; + } +} + +bool FastSharedMutex::try_lock_shared() +{ + UInt64 value = state.load(); + if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) + return true; + return false; +} + +void FastSharedMutex::unlock_shared() +{ + UInt64 value = state.fetch_sub(1) - 1; + if (value == writers) + wakeLowerOne(state); // Wake writer +} + +} + +#else + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int THREAD_WAS_CANCELLED; +} + +void CancelToken::raise() +{ + throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled"); +} + +} + +#endif diff --git a/src/Common/Threading.h b/src/Common/Threading.h new file mode 100644 index 00000000000..14743def476 --- /dev/null +++ b/src/Common/Threading.h @@ -0,0 +1,282 @@ +#pragma once + +#include +#include + +#ifdef OS_LINUX /// Because of futex + +#include +#include +#include + + +// TODO(serxa): for debug only, remove it +#if 0 +#include +#include +#define DBG(...) std::cout << fmt::format("\033[01;3{}m[{}] {} {} {}\033[00m {}:{}\n", 1 + getThreadId() % 8, getThreadId(), reinterpret_cast(this), fmt::format(__VA_ARGS__), __PRETTY_FUNCTION__, __FILE__, __LINE__) +#else +#include +#define DBG(...) UNUSED(__VA_ARGS__) +#endif + +namespace DB +{ + +// Scoped object, enabling thread cancellation (cannot be nested) +struct Cancellable +{ + Cancellable(); + ~Cancellable(); +}; + +// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancellable` region) +struct NotCancellable +{ + NotCancellable(); + ~NotCancellable(); +}; + +// Responsible for synchronization needed to deliver thread cancellation signal. +// Basic building block for cancallable synchronization primitives. +// Allows to perform cancellable wait on memory addresses (think futex) +class CancelToken +{ +public: + CancelToken(); + CancelToken(const CancelToken &) = delete; + CancelToken(CancelToken &&) = delete; + CancelToken & operator=(const CancelToken &) = delete; + ~CancelToken(); + + // Returns token for the current thread + static CancelToken & local() + { + static thread_local CancelToken token; + return token; + } + + // Cancellable wait on memory address (futex word). + // Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()` + // or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`. + // Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`. + // WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero. + // Return value: + // true - woken by either notify or spurious wakeup; + // false - iff cancelation signal has been received. + // Implementation details: + // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal. + // Highest bit of `*address` is used for guarantied delivery of the signal, but is guaranteed to be zero on return due to cancellation. + // Intented to be called only by thread associated with this token. + bool wait(UInt32 * address, UInt32 value); + + // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false. + // Intented to be called only by thread associated with this token. + [[noreturn]] void raise(); + + // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`. + // Can be called from any thread. + static void notifyOne(UInt32 * address); + static void notifyAll(UInt32 * address); + + // Send cancel signal to thread with specified `tid`. + // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled). + // Can be called from any thread. + static void signal(UInt64 tid); + static void signal(UInt64 tid, int code, const String & message); + + // Flag used to deliver cancellation into memory address to wake a thread. + // Note that most significat bit at `addresses` to be used with `wait()` is reserved. + static constexpr UInt32 signaled = 1u << 31u; + +private: + friend struct Cancellable; + friend struct NotCancellable; + + // Restores initial state for token to be reused. See `Cancellable` struct. + // Intented to be called only by thread associated with this token. + void reset() + { + state.store(0); + } + + // Enable thread cancellation. See `NotCancellable` struct. + // Intented to be called only by thread associated with this token. + void enable() + { + chassert((state.load() & disabled) == disabled); + state.fetch_and(~disabled); + } + + // Disable thread cancellation. See `NotCancellable` struct. + // Intented to be called only by thread associated with this token. + void disable() + { + chassert((state.load() & disabled) == 0); + state.fetch_or(disabled); + } + + // Singleton. Maps thread IDs to tokens. + struct Registry; + friend struct Registry; + struct Registry + { + Registry(); + + std::mutex mutex; + std::unordered_map threads; // By thread ID + + void insert(CancelToken * token); + void remove(CancelToken * token); + void signal(UInt64 tid); + void signal(UInt64 tid, int code, const String & message); + + static Registry & instance(); + }; + + // Cancels this token and wakes thread if necessary. + // Can be called from any thread. + void signalImpl(); + void signalImpl(int code, const String & message); + + // Lower bit: cancel signal received flag + static constexpr UInt64 canceled = 1; + + // Upper bits - possible values: + // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address; + // 2) all ones: token is disabled, i.e. wait() call cannot be cancelled; + // 3) specific `address`: token is enabled and thread is currently waiting on this `address`. + static constexpr UInt64 disabled = ~canceled; + static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address + + // All signal handling logic should be globally serialized using this mutex + static std::mutex signal_mutex; + + // Cancellation state + alignas(64) std::atomic state; + [[maybe_unused]] char padding[64 - sizeof(state)]; + + // Cancellation exception + int exception_code; + String exception_message; + + // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens. + const UInt64 thread_id; +}; + +class CancellableSharedMutex +{ +public: + CancellableSharedMutex(); + ~CancellableSharedMutex() = default; + CancellableSharedMutex(const CancellableSharedMutex &) = delete; + CancellableSharedMutex & operator=(const CancellableSharedMutex &) = delete; + + // Exclusive ownership + void lock(); + bool try_lock(); + void unlock(); + + // Shared ownership + void lock_shared(); + bool try_lock_shared(); + void unlock_shared(); + +private: + // State 64-bits layout: + // 1b - 31b - 1b - 31b + // signaled - writers - signaled - readers + // 63------------------------------------0 + // Two 32-bit words are used for cancellable waiting, so each has its own separate signaled bit + static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled; + static constexpr UInt64 readers_signaled = CancelToken::signaled; + static constexpr UInt64 writers = readers << 32ull; + static constexpr UInt64 writers_signaled = readers_signaled << 32ull; + + alignas(64) std::atomic state; + std::atomic waiters; +}; + +class FastSharedMutex +{ +public: + FastSharedMutex(); + ~FastSharedMutex() = default; + FastSharedMutex(const FastSharedMutex &) = delete; + FastSharedMutex & operator=(const FastSharedMutex &) = delete; + + // Exclusive ownership + void lock(); + bool try_lock(); + void unlock(); + + // Shared ownership + void lock_shared(); + bool try_lock_shared(); + void unlock_shared(); + +private: + static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state + static constexpr UInt64 writers = ~readers; // Upper 32 bits of state + + alignas(64) std::atomic state; + std::atomic waiters; +}; + +} + +#else + +#include + +// WARNING: We support cancellable synchronization primitives only on linux for now + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int THREAD_WAS_CANCELLED; +} + +struct Cancellable +{ + Cancellable() = default; + ~Cancellable() = default; +}; + +struct NotCancellable +{ + NotCancellable() = default; + ~NotCancellable() = default; +}; + +class CancelToken +{ +public: + CancelToken() = default; + CancelToken(const CancelToken &) = delete; + CancelToken(CancelToken &&) = delete; + CancelToken & operator=(const CancelToken &) = delete; + ~CancelToken() = default; + + static CancelToken & local() + { + static CancelToken token; + return token; + } + + bool wait(UInt32 *, UInt32) { return true; } + [[noreturn]] void raise(); + static void notifyOne(UInt32 *) {} + static void notifyAll(UInt32 *) {} + static void signal(UInt64) {} + static void signal(UInt64, int, const String &) {} +}; + +using CancellableSharedMutex = std::shared_mutex; +using FastSharedMutex = std::shared_mutex; + +} + +#endif diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp new file mode 100644 index 00000000000..d9cb8748eeb --- /dev/null +++ b/src/Common/tests/gtest_threading.cpp @@ -0,0 +1,369 @@ +#include + +#include +#include +#include +#include +#include + +#include "Common/Exception.h" +#include +#include + +#include +#include + + +namespace DB +{ + namespace ErrorCodes + { + extern const int THREAD_WAS_CANCELLED; + } +} + +struct NoCancel {}; + +// for all PerfTests +static constexpr int requests = 512 * 1024; +static constexpr int max_threads = 16; + +template +void TestSharedMutex() +{ + // Test multiple readers can acquire lock + for (int readers = 1; readers <= 128; readers *= 2) + { + T sm; + std::atomic test(0); + std::barrier sync(readers + 1); + + std::vector threads; + threads.reserve(readers); + auto reader = [&] + { + [[maybe_unused]] Status status; + std::shared_lock lock(sm); + test++; + sync.arrive_and_wait(); + }; + + for (int i = 0; i < readers; i++) + threads.emplace_back(reader); + + { // writer + [[maybe_unused]] Status status; + sync.arrive_and_wait(); // wait for all reader to acquire lock to avoid blocking them + std::unique_lock lock(sm); + test++; + } + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(test, readers + 1); + } + + // Test multiple writers cannot acquire lock simultaneously + for (int writers = 1; writers <= 128; writers *= 2) + { + T sm; + int test = 0; + std::barrier sync(writers); + std::vector threads; + + threads.reserve(writers); + auto writer = [&] + { + [[maybe_unused]] Status status; + sync.arrive_and_wait(); + std::unique_lock lock(sm); + test++; + }; + + for (int i = 0; i < writers; i++) + threads.emplace_back(writer); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(test, writers); + } +} + +template +void TestSharedMutexCancelReader() +{ + constexpr int readers = 8; + constexpr int tasks_per_reader = 32; + + T sm; + std::atomic successes(0); + std::atomic cancels(0); + std::barrier sync(readers + 1); + std::barrier cancel_sync(readers / 2 + 1); + std::vector threads; + + std::mutex m; + std::vector tids_to_cancel; + + threads.reserve(readers); + auto reader = [&] (int reader_id) + { + if (reader_id % 2 == 0) + { + std::unique_lock lock(m); + tids_to_cancel.emplace_back(getThreadId()); + } + for (int task = 0; task < tasks_per_reader; task++) { + try + { + [[maybe_unused]] Status status; + sync.arrive_and_wait(); // (A) sync with writer + sync.arrive_and_wait(); // (B) wait for writer to acquire unique_lock + std::shared_lock lock(sm); + successes++; + } + catch(DB::Exception & e) + { + ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED); + ASSERT_EQ(e.message(), "test"); + cancels++; + cancel_sync.arrive_and_wait(); // (C) sync with writer + } + } + }; + + for (int reader_id = 0; reader_id < readers; reader_id++) + threads.emplace_back(reader, reader_id); + + { // writer + [[maybe_unused]] Status status; + for (int task = 0; task < tasks_per_reader; task++) { + sync.arrive_and_wait(); // (A) wait for readers to finish previous task + ASSERT_EQ(cancels + successes, task * readers); + ASSERT_EQ(cancels, task * readers / 2); + ASSERT_EQ(successes, task * readers / 2); + std::unique_lock lock(sm); + sync.arrive_and_wait(); // (B) sync with readers + //std::unique_lock lock(m); // not needed, already synced using barrier + for (UInt64 tid : tids_to_cancel) + DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test"); + + // This sync is crutial. It is needed to hold `lock` long enough. + // It guarantees that every cancelled thread will find `sm` blocked by writer, and thus will begin to wait. + // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. + // And this is the desired behaviour. + cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock. + } + } + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(successes, tasks_per_reader * readers / 2); + ASSERT_EQ(cancels, tasks_per_reader * readers / 2); +} + +template +void TestSharedMutexCancelWriter() +{ + constexpr int writers = 8; + constexpr int tasks_per_writer = 32; + + T sm; + std::atomic successes(0); + std::atomic cancels(0); + std::barrier sync(writers); + std::vector threads; + + std::mutex m; + std::vector all_tids; + + threads.reserve(writers); + auto writer = [&] + { + { + std::unique_lock lock(m); + all_tids.emplace_back(getThreadId()); + } + for (int task = 0; task < tasks_per_writer; task++) { + try + { + [[maybe_unused]] Status status; + sync.arrive_and_wait(); // (A) sync all threads before race to acquire the lock + std::unique_lock lock(sm); + successes++; + // Thread that managed to acquire the lock cancels all other waiting writers + //std::unique_lock lock(m); // not needed, already synced using barrier + for (UInt64 tid : all_tids) + { + if (tid != getThreadId()) + DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test"); + } + + // This sync is crutial. It is needed to hold `lock` long enough. + // It guarantees that every cancelled thread will find `sm` blocked, and thus will begin to wait. + // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. + // And this is the desired behaviour. + sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock. + } + catch(DB::Exception & e) + { + ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED); + ASSERT_EQ(e.message(), "test"); + cancels++; + sync.arrive_and_wait(); // (B) sync with race winner + } + } + }; + + for (int writer_id = 0; writer_id < writers; writer_id++) + threads.emplace_back(writer); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(successes, tasks_per_writer); + ASSERT_EQ(cancels, tasks_per_writer * (writers - 1)); +} + +template +void PerfTestSharedMutexReadersOnly() +{ + std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl; + + for (int thrs = 1; thrs <= max_threads; thrs *= 2) + { + T sm; + std::vector threads; + threads.reserve(thrs); + auto reader = [&] + { + [[maybe_unused]] Status status; + for (int request = requests / thrs; request; request--) + { + std::shared_lock lock(sm); + } + }; + + Stopwatch watch; + for (int i = 0; i < thrs; i++) + threads.emplace_back(reader); + + for (auto & thread : threads) + thread.join(); + + double ns = watch.elapsedNanoseconds(); + std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl; + } +} + +template +void PerfTestSharedMutexWritersOnly() +{ + std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl; + + for (int thrs = 1; thrs <= max_threads; thrs *= 2) + { + int counter = 0; + T sm; + std::vector threads; + threads.reserve(thrs); + auto writer = [&] + { + [[maybe_unused]] Status status; + for (int request = requests / thrs; request; request--) + { + std::unique_lock lock(sm); + ASSERT_TRUE(counter % 2 == 0); + counter++; + std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions + counter++; + } + }; + + Stopwatch watch; + for (int i = 0; i < thrs; i++) + threads.emplace_back(writer); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(counter, requests * 2); + + double ns = watch.elapsedNanoseconds(); + std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl; + } +} + +template +void PerfTestSharedMutexRW() +{ + std::cout << "*** " << demangle(typeid(T).name()) << "/" << demangle(typeid(Status).name()) << " ***" << std::endl; + + for (int thrs = 1; thrs <= max_threads; thrs *= 2) + { + int counter = 0; + T sm; + std::vector threads; + threads.reserve(thrs); + auto reader = [&] + { + [[maybe_unused]] Status status; + for (int request = requests / thrs / 2; request; request--) + { + { + std::shared_lock lock(sm); + ASSERT_TRUE(counter % 2 == 0); + } + { + std::unique_lock lock(sm); + ASSERT_TRUE(counter % 2 == 0); + counter++; + std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions + counter++; + } + } + }; + + Stopwatch watch; + for (int i = 0; i < thrs; i++) + threads.emplace_back(reader); + + for (auto & thread : threads) + thread.join(); + + ASSERT_EQ(counter, requests); + + double ns = watch.elapsedNanoseconds(); + std::cout << "thrs = " << thrs << ":\t" << ns / requests << " ns\t" << requests * 1e9 / ns << " rps" << std::endl; + } +} + +TEST(Threading, SharedMutexSmokeCancellableEnabled) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeCancellableDisabled) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex(); } + +TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableEnabled) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableDisabled) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly(); } + +TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableEnabled) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableDisabled) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly(); } + +TEST(Threading, PerfTestSharedMutexRWCancellableEnabled) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWCancellableDisabled) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW(); } + +#ifdef OS_LINUX /// These tests require cancellability + +TEST(Threading, SharedMutexCancelReaderCancellableEnabled) { TestSharedMutexCancelReader(); } +TEST(Threading, SharedMutexCancelWriterCancellableEnabled) { TestSharedMutexCancelWriter(); } + +#endif From 3ea04f0429bda92ebce56688caf5f21638a38a1b Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 4 Jan 2023 22:59:29 +0000 Subject: [PATCH 097/262] fix typos --- src/Common/Threading.cpp | 2 +- src/Common/Threading.h | 21 ++++++++------------- src/Common/tests/gtest_threading.cpp | 8 ++++---- 3 files changed, 13 insertions(+), 18 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 4d135ef93a6..0b0f347eb38 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -1,7 +1,7 @@ #include #include -#ifdef OS_LINUX /// Because of 'sigqueue' functions, RT signals and futex. +#ifdef OS_LINUX /// Because of futex #include diff --git a/src/Common/Threading.h b/src/Common/Threading.h index 14743def476..4d9e21193d3 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -63,15 +63,15 @@ public: // WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero. // Return value: // true - woken by either notify or spurious wakeup; - // false - iff cancelation signal has been received. + // false - iff cancellation signal has been received. // Implementation details: // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal. - // Highest bit of `*address` is used for guarantied delivery of the signal, but is guaranteed to be zero on return due to cancellation. - // Intented to be called only by thread associated with this token. + // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation. + // Intended to be called only by thread associated with this token. bool wait(UInt32 * address, UInt32 value); // Throws `DB::Exception` received from `signal()`. Call it if `wait()` returned false. - // Intented to be called only by thread associated with this token. + // Intended to be called only by thread associated with this token. [[noreturn]] void raise(); // Regular wake by address (futex word). It does not interact with token in any way. We have it here to complement `wait()`. @@ -86,7 +86,7 @@ public: static void signal(UInt64 tid, int code, const String & message); // Flag used to deliver cancellation into memory address to wake a thread. - // Note that most significat bit at `addresses` to be used with `wait()` is reserved. + // Note that most significant bit at `addresses` to be used with `wait()` is reserved. static constexpr UInt32 signaled = 1u << 31u; private: @@ -94,14 +94,14 @@ private: friend struct NotCancellable; // Restores initial state for token to be reused. See `Cancellable` struct. - // Intented to be called only by thread associated with this token. + // Intended to be called only by thread associated with this token. void reset() { state.store(0); } // Enable thread cancellation. See `NotCancellable` struct. - // Intented to be called only by thread associated with this token. + // Intended to be called only by thread associated with this token. void enable() { chassert((state.load() & disabled) == disabled); @@ -109,7 +109,7 @@ private: } // Disable thread cancellation. See `NotCancellable` struct. - // Intented to be called only by thread associated with this token. + // Intended to be called only by thread associated with this token. void disable() { chassert((state.load() & disabled) == 0); @@ -234,11 +234,6 @@ private: namespace DB { -namespace ErrorCodes -{ - extern const int THREAD_WAS_CANCELLED; -} - struct Cancellable { Cancellable() = default; diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp index d9cb8748eeb..5ac3cc35448 100644 --- a/src/Common/tests/gtest_threading.cpp +++ b/src/Common/tests/gtest_threading.cpp @@ -150,7 +150,7 @@ void TestSharedMutexCancelReader() for (UInt64 tid : tids_to_cancel) DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test"); - // This sync is crutial. It is needed to hold `lock` long enough. + // This sync is crucial. It is needed to hold `lock` long enough. // It guarantees that every cancelled thread will find `sm` blocked by writer, and thus will begin to wait. // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. // And this is the desired behaviour. @@ -202,7 +202,7 @@ void TestSharedMutexCancelWriter() DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test"); } - // This sync is crutial. It is needed to hold `lock` long enough. + // This sync is crucial. It is needed to hold `lock` long enough. // It guarantees that every cancelled thread will find `sm` blocked, and thus will begin to wait. // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. // And this is the desired behaviour. @@ -278,7 +278,7 @@ void PerfTestSharedMutexWritersOnly() std::unique_lock lock(sm); ASSERT_TRUE(counter % 2 == 0); counter++; - std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions + std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions counter++; } }; @@ -321,7 +321,7 @@ void PerfTestSharedMutexRW() std::unique_lock lock(sm); ASSERT_TRUE(counter % 2 == 0); counter++; - std::atomic_signal_fence(std::memory_order::seq_cst); // force complier to generate two separate increment instructions + std::atomic_signal_fence(std::memory_order::seq_cst); // force compiler to generate two separate increment instructions counter++; } } From e9e3414ae1dababe0fda07ad5086c5d433584e4f Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 4 Jan 2023 23:04:04 +0000 Subject: [PATCH 098/262] remove debug print --- src/Common/Threading.cpp | 15 +-------------- src/Common/Threading.h | 11 ----------- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 0b0f347eb38..860e26efc76 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -188,7 +188,6 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) UInt64 s = state.load(); while (true) { - DBG("s={}", s); if (s & disabled) { // Start non-cancellable wait on futex. Spurious wake-up is possible. @@ -202,14 +201,12 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) } // Start cancellable wait. Spurious wake-up is possible. - DBG("start cancellable wait address={} value={}", static_cast(address), value); futexWait(address, value); // "Release" futex and check for cancellation s = state.load(); while (true) { - DBG("finish cancellable wait, s={}", s); chassert((s & disabled) != disabled); // `disable()` must not be called from another thread if (s & canceled) { @@ -233,7 +230,6 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) void CancelToken::raise() { std::unique_lock lock(signal_mutex); - DBG("raise code={} msg={}", exception_code, exception_message); if (exception_code != 0) throw DB::Exception( std::exchange(exception_code, 0), @@ -267,21 +263,18 @@ void CancelToken::signalImpl(int code, const String & message) UInt64 s = state.load(); while (true) { - DBG("s={}", s); if (s & canceled) return; // Already cancelled - don't signal twice if (state.compare_exchange_strong(s, s | canceled)) break; // It is the cancelling thread - should deliver signal if necessary } - DBG("cancel tid={} code={} msg={}", thread_id, code, message); exception_code = code; exception_message = message; if ((s & disabled) == disabled) return; // Cancellation is disabled - just signal token for later, but don't wake std::atomic * address = reinterpret_cast *>(s & disabled); - DBG("address={}", static_cast(address)); if (address == nullptr) return; // Thread is currently not waiting on futex - wake-up not required @@ -297,7 +290,6 @@ void CancelToken::signalImpl(int code, const String & message) // Wake all threads waiting on `address`, one of them will be cancelled and others will get spurious wake-ups // Woken canceled thread will reset signaled bit - DBG("wake"); futexWake(address, INT_MAX); // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return. @@ -335,7 +327,6 @@ void CancellableSharedMutex::lock() UInt64 value = state.load(); while (true) { - DBG("#A r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); if (value & writers) { waiters++; @@ -354,7 +345,6 @@ void CancellableSharedMutex::lock() value |= writers; while (value & readers) { - DBG("#B r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); if (!cancellableWaitLowerFetch(state, value)) { state.fetch_and(~writers); @@ -374,8 +364,7 @@ bool CancellableSharedMutex::try_lock() void CancellableSharedMutex::unlock() { - UInt64 value = state.fetch_and(~writers); - DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); + state.fetch_and(~writers); if (waiters) wakeUpperAll(state); } @@ -385,7 +374,6 @@ void CancellableSharedMutex::lock_shared() UInt64 value = state.load(); while (true) { - DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); if (value & writers) { waiters++; @@ -413,7 +401,6 @@ bool CancellableSharedMutex::try_lock_shared() void CancellableSharedMutex::unlock_shared() { UInt64 value = state.fetch_sub(1) - 1; - DBG("r={} w={} rs={} ws={}", value & readers, (value & writers) != 0, (value & readers_signaled) != 0, (value & writers_signaled) != 0); if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers wakeLowerOne(state); // Wake writer } diff --git a/src/Common/Threading.h b/src/Common/Threading.h index 4d9e21193d3..08f0242a1df 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -9,17 +9,6 @@ #include #include - -// TODO(serxa): for debug only, remove it -#if 0 -#include -#include -#define DBG(...) std::cout << fmt::format("\033[01;3{}m[{}] {} {} {}\033[00m {}:{}\n", 1 + getThreadId() % 8, getThreadId(), reinterpret_cast(this), fmt::format(__VA_ARGS__), __PRETTY_FUNCTION__, __FILE__, __LINE__) -#else -#include -#define DBG(...) UNUSED(__VA_ARGS__) -#endif - namespace DB { From f706cf8903cc0f93e47763c842f832144805b543 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 4 Jan 2023 23:49:29 +0000 Subject: [PATCH 099/262] fix style --- src/Common/Threading.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 860e26efc76..2a013e6485e 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -1,6 +1,14 @@ #include #include +namespace DB +{ +namespace ErrorCodes +{ + extern const int THREAD_WAS_CANCELLED; +} +} + #ifdef OS_LINUX /// Because of futex #include @@ -15,11 +23,6 @@ namespace DB { -namespace ErrorCodes -{ - extern const int THREAD_WAS_CANCELLED; -} - namespace { inline long futexWait(void * address, UInt32 value) @@ -483,11 +486,6 @@ void FastSharedMutex::unlock_shared() namespace DB { -namespace ErrorCodes -{ - extern const int THREAD_WAS_CANCELLED; -} - void CancelToken::raise() { throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled"); From c4e896f73a0a71ee4c8fb048856a0b9d1ac9011e Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 5 Jan 2023 00:23:10 +0000 Subject: [PATCH 100/262] cleanup --- src/Common/Threading.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 2a013e6485e..caf255cab64 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -181,12 +181,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) { chassert((reinterpret_cast(address) & canceled) == 0); // An `address` must be 2-byte aligned if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread - { - // static std::atomic x{0}; - // if (x++ > 5) - // sleep(3600); return true; // Spin-wait unless signal is handled - } UInt64 s = state.load(); while (true) From abf63d0c3365ef421bc0e68ad28c108d07dec2bd Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 5 Jan 2023 00:26:28 +0000 Subject: [PATCH 101/262] cleanup --- src/Common/Threading.cpp | 5 ----- src/Common/Threading.h | 2 -- 2 files changed, 7 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index caf255cab64..7a32af405de 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -118,11 +118,6 @@ namespace } } -CancelToken::Registry::Registry() -{ - // setupCancelSignalHandler(); -} - void CancelToken::Registry::insert(CancelToken * token) { std::lock_guard lock(mutex); diff --git a/src/Common/Threading.h b/src/Common/Threading.h index 08f0242a1df..1398e5b1e0e 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -110,8 +110,6 @@ private: friend struct Registry; struct Registry { - Registry(); - std::mutex mutex; std::unordered_map threads; // By thread ID From c507d7ecef6840a84e32888528f315c69f230d84 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 5 Jan 2023 15:27:21 +0100 Subject: [PATCH 102/262] Update src/Common/tests/gtest_threading.cpp Co-authored-by: Antonio Andelic --- src/Common/tests/gtest_threading.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp index 5ac3cc35448..fd84e4f0633 100644 --- a/src/Common/tests/gtest_threading.cpp +++ b/src/Common/tests/gtest_threading.cpp @@ -208,7 +208,7 @@ void TestSharedMutexCancelWriter() // And this is the desired behaviour. sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock. } - catch(DB::Exception & e) + catch (DB::Exception & e) { ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED); ASSERT_EQ(e.message(), "test"); From 985dff1dbfe2db124f3919ec6143fc8b312b33a6 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 5 Jan 2023 15:27:32 +0100 Subject: [PATCH 103/262] Update src/Common/tests/gtest_threading.cpp Co-authored-by: Antonio Andelic --- src/Common/tests/gtest_threading.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp index fd84e4f0633..62a9085cdbe 100644 --- a/src/Common/tests/gtest_threading.cpp +++ b/src/Common/tests/gtest_threading.cpp @@ -124,7 +124,7 @@ void TestSharedMutexCancelReader() std::shared_lock lock(sm); successes++; } - catch(DB::Exception & e) + catch (DB::Exception & e) { ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED); ASSERT_EQ(e.message(), "test"); From 3f87e6cd776ad2c99d6c4733d98cdfdc1049ad0f Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Thu, 5 Jan 2023 15:27:51 +0100 Subject: [PATCH 104/262] Update src/Common/Threading.h Co-authored-by: Igor Nikonov <954088+devcrafter@users.noreply.github.com> --- src/Common/Threading.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Common/Threading.h b/src/Common/Threading.h index 1398e5b1e0e..4b6a372ce2d 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -27,7 +27,7 @@ struct NotCancellable }; // Responsible for synchronization needed to deliver thread cancellation signal. -// Basic building block for cancallable synchronization primitives. +// Basic building block for cancellable synchronization primitives. // Allows to perform cancellable wait on memory addresses (think futex) class CancelToken { From d217136edea8910dcee40cda2535fa8928506154 Mon Sep 17 00:00:00 2001 From: serxa Date: Thu, 5 Jan 2023 20:41:36 +0000 Subject: [PATCH 105/262] review fixes --- src/Common/Threading.cpp | 8 ++++---- src/Common/Threading.h | 18 +++++++++--------- src/Common/tests/gtest_threading.cpp | 10 +++++----- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 7a32af405de..0e073162bb8 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -53,12 +53,12 @@ namespace inline constexpr UInt32 lowerValue(UInt64 value) { - return UInt32(value & 0xffffffffull); + return static_cast(value & 0xffffffffull); } inline constexpr UInt32 upperValue(UInt64 value) { - return UInt32(value >> 32ull); + return static_cast(value >> 32ull); } inline UInt32 * lowerAddress(void * address) @@ -300,12 +300,12 @@ Cancellable::~Cancellable() CancelToken::local().disable(); } -NotCancellable::NotCancellable() +NonCancellable::NonCancellable() { CancelToken::local().disable(); } -NotCancellable::~NotCancellable() +NonCancellable::~NonCancellable() { CancelToken::local().enable(); } diff --git a/src/Common/Threading.h b/src/Common/Threading.h index 4b6a372ce2d..e21fc5608b2 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -20,10 +20,10 @@ struct Cancellable }; // Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancellable` region) -struct NotCancellable +struct NonCancellable { - NotCancellable(); - ~NotCancellable(); + NonCancellable(); + ~NonCancellable(); }; // Responsible for synchronization needed to deliver thread cancellation signal. @@ -80,7 +80,7 @@ public: private: friend struct Cancellable; - friend struct NotCancellable; + friend struct NonCancellable; // Restores initial state for token to be reused. See `Cancellable` struct. // Intended to be called only by thread associated with this token. @@ -89,7 +89,7 @@ private: state.store(0); } - // Enable thread cancellation. See `NotCancellable` struct. + // Enable thread cancellation. See `NonCancellable` struct. // Intended to be called only by thread associated with this token. void enable() { @@ -97,7 +97,7 @@ private: state.fetch_and(~disabled); } - // Disable thread cancellation. See `NotCancellable` struct. + // Disable thread cancellation. See `NonCancellable` struct. // Intended to be called only by thread associated with this token. void disable() { @@ -227,10 +227,10 @@ struct Cancellable ~Cancellable() = default; }; -struct NotCancellable +struct NonCancellable { - NotCancellable() = default; - ~NotCancellable() = default; + NonCancellable() = default; + ~NonCancellable() = default; }; class CancelToken diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp index 62a9085cdbe..767739deb46 100644 --- a/src/Common/tests/gtest_threading.cpp +++ b/src/Common/tests/gtest_threading.cpp @@ -44,8 +44,8 @@ void TestSharedMutex() { [[maybe_unused]] Status status; std::shared_lock lock(sm); - test++; sync.arrive_and_wait(); + test++; }; for (int i = 0; i < readers; i++) @@ -94,8 +94,8 @@ void TestSharedMutex() template void TestSharedMutexCancelReader() { - constexpr int readers = 8; - constexpr int tasks_per_reader = 32; + static constexpr int readers = 8; + static constexpr int tasks_per_reader = 32; T sm; std::atomic successes(0); @@ -168,8 +168,8 @@ void TestSharedMutexCancelReader() template void TestSharedMutexCancelWriter() { - constexpr int writers = 8; - constexpr int tasks_per_writer = 32; + static constexpr int writers = 8; + static constexpr int tasks_per_writer = 32; T sm; std::atomic successes(0); From 5cde7762ad0574c2c01d8f0e988c3f2f63ba27a0 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Thu, 5 Jan 2023 15:54:44 -0500 Subject: [PATCH 106/262] WIP --- .../table-engines/integrations/deltalake.md | 33 ++++ .../table-functions/deltalake.md | 184 ++++++++++++++++++ 2 files changed, 217 insertions(+) create mode 100644 docs/en/engines/table-engines/integrations/deltalake.md create mode 100644 docs/en/sql-reference/table-functions/deltalake.md diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md new file mode 100644 index 00000000000..44407e34e38 --- /dev/null +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -0,0 +1,33 @@ +--- +slug: /en/engines/table-engines/integrations/deltalake +sidebar_label: DeltaLake +--- + +# DeltaLake Table Engine + +This engine provides a read-only integration with existing Delta Lake tables in Amazon S3. + +## Create Table + +Note that the Delta Lake table must already exist in S3, this command does not take DDL parameters to create a new table. + +``` sql +CREATE TABLE deltalake + ENGINE = DeltaLake(path, [aws_access_key_id, aws_secret_access_key,]) +``` + +**Engine parameters** + +- `path` — Bucket url with path to the existing Delta Lake table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). + +**Example** + +```sql +CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123') +``` + +## See also + +- [DeltaLake table function](../../../sql-reference/table-functions/deltalake.md) + diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md new file mode 100644 index 00000000000..7e3fffe4d8b --- /dev/null +++ b/docs/en/sql-reference/table-functions/deltalake.md @@ -0,0 +1,184 @@ +--- +slug: /en/sql-reference/table-functions/deltalake +sidebar_label: DeltLake +--- + +# DeltaLake Table Function + +Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in [Amazon S3](https://aws.amazon.com/s3/). + +For example, to query an existing Delta Lake table named `deltalake` in S3: +```sql +CREATE TABLE dl_hits + ENGINE = DeltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/','',''); + +SHOW TABLES; + +DESCRIBE dl_hits; + +SELECT URL, Referer, UserAgent FROM dl_hits WHERE URL IS NOT NULL LIMIT 10; + +SELECT URL, Referer, UserAgent FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/') WHERE URL IS NOT NULL LIMIT 10; + +``` + +**Syntax** + +``` sql +s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +``` + +**Arguments** + +- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). +- `format` — The [format](../../interfaces/formats.md#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. + +**Returned value** + +A table with the specified structure for reading or writing data in the specified file. + +**Examples** + +Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`: + +``` sql +SELECT * +FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +The similar but from file with `gzip` compression: + +``` sql +SELECT * +FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') +LIMIT 2; +``` + +``` text +┌─column1─┬─column2─┬─column3─┐ +│ 1 │ 2 │ 3 │ +│ 3 │ 2 │ 1 │ +└─────────┴─────────┴─────────┘ +``` + +## Usage + +Suppose that we have several files with following URIs on S3: + +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' +- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' + +Count the amount of rows in files ending with numbers from 1 to 3: + +``` sql +SELECT count(*) +FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 18 │ +└─────────┘ +``` + +Count the total amount of rows in all files in these two directories: + +``` sql +SELECT count(*) +FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') +``` + +``` text +┌─count()─┐ +│ 24 │ +└─────────┘ +``` + +:::warning +If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. +::: + +Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: + +``` sql +SELECT count(*) +FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); +``` + +``` text +┌─count()─┐ +│ 12 │ +└─────────┘ +``` + +Insert data into file `test-data.csv.gz`: + +``` sql +INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +VALUES ('test-data', 1), ('test-data-2', 2); +``` + +Insert data into file `test-data.csv.gz` from existing table: + +``` sql +INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') +SELECT name, value FROM existing_table; +``` + +Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively: + +``` sql +SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively: + +``` sql +SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); +``` + +## Partitioned Write + +If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. + +**Examples** + +1. Using partition ID in a key creates separate files: + +```sql +INSERT INTO TABLE FUNCTION + s3('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32') + PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24); +``` +As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`. + +2. Using partition ID in a bucket name creates files in different buckets: + +```sql +INSERT INTO TABLE FUNCTION + s3('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32') + PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24); +``` +As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`. + +**See Also** + +- [S3 engine](../../engines/table-engines/integrations/s3.md) + +[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/s3/) From a67afdff6a1ef47fcb7c70ffcb83f34dedfc4f46 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Thu, 5 Jan 2023 22:43:41 +0000 Subject: [PATCH 107/262] Fix: insert delay calculation --- src/Storages/MergeTree/MergeTreeData.cpp | 103 ++++++++++++++--------- src/Storages/MergeTree/MergeTreeData.h | 4 +- 2 files changed, 65 insertions(+), 42 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 30d0570ff11..89d90011398 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3701,7 +3701,7 @@ std::pair MergeTreeData::getMaxPartsCountAndSizeForPartition() c } -size_t MergeTreeData::getMaxInactivePartsCountForPartition() const +size_t MergeTreeData::getMaxOutdatedPartsCountForPartition() const { return getMaxPartsCountAndSizeForPartitionWithState(DataPartState::Outdated).first; } @@ -3722,70 +3722,93 @@ std::optional MergeTreeData::getMinPartDataVersion() const } -void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr query_context) const +void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const { const auto settings = getSettings(); const auto & query_settings = query_context->getSettingsRef(); const size_t parts_count_in_total = getPartsCount(); + + /// check if have too many parts in total if (parts_count_in_total >= settings->max_parts_in_total) { ProfileEvents::increment(ProfileEvents::RejectedInserts); - throw Exception("Too many parts (" + toString(parts_count_in_total) + ") in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", ErrorCodes::TOO_MANY_PARTS); + throw Exception( + ErrorCodes::TOO_MANY_PARTS, + "Too many parts ({}) in all partitions in total. This indicates wrong choice of partition key. The threshold can be modified " + "with 'max_parts_in_total' setting in element in config.xml or with per-table setting.", + toString(parts_count_in_total)); } - auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition(); - ssize_t k_inactive = -1; - if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) + size_t outdated_parts_over_threshold = [&]() -> size_t { - size_t inactive_parts_count_in_partition = getMaxInactivePartsCountForPartition(); - if (settings->inactive_parts_to_throw_insert > 0 && inactive_parts_count_in_partition >= settings->inactive_parts_to_throw_insert) + size_t outdated_parts_count_in_partition = 0; + if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) + outdated_parts_count_in_partition = getMaxOutdatedPartsCountForPartition(); + + if (settings->inactive_parts_to_throw_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_throw_insert) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, "Too many inactive parts ({}). Parts cleaning are processing significantly slower than inserts", - inactive_parts_count_in_partition); + outdated_parts_count_in_partition); } - k_inactive = static_cast(inactive_parts_count_in_partition) - static_cast(settings->inactive_parts_to_delay_insert); - } + if (settings->inactive_parts_to_delay_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_delay_insert) + return outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1; - auto parts_to_delay_insert = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; - auto parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; + return 0; + }(); + auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition(); size_t average_part_size = parts_count_in_partition ? size_of_partition / parts_count_in_partition : 0; - bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts - && average_part_size > settings->max_avg_part_size_for_too_many_parts; - - if (parts_count_in_partition >= parts_to_throw_insert && !parts_are_large_enough_in_average) + const auto active_parts_to_delay_insert + = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; + const auto active_parts_to_throw_insert + = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; + size_t active_parts_over_threshold = [&](size_t parts_count) -> size_t { - ProfileEvents::increment(ProfileEvents::RejectedInserts); - throw Exception( - ErrorCodes::TOO_MANY_PARTS, - "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", - parts_count_in_partition, ReadableSize(average_part_size)); - } + bool parts_are_large_enough_in_average + = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts; - if (k_inactive < 0 && (parts_count_in_partition < parts_to_delay_insert || parts_are_large_enough_in_average)) + if (parts_count >= active_parts_to_throw_insert && !parts_are_large_enough_in_average) + { + ProfileEvents::increment(ProfileEvents::RejectedInserts); + throw Exception( + ErrorCodes::TOO_MANY_PARTS, + "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", + parts_count, + ReadableSize(average_part_size)); + } + if (active_parts_to_delay_insert > 0 && parts_count >= active_parts_to_delay_insert && !parts_are_large_enough_in_average) + /// if parts_count == parts_to_delay_insert -> we're 1 part over threshold + return parts_count - active_parts_to_delay_insert + 1; + + return 0; + }(parts_count_in_partition); + + /// no need for delay + if (!active_parts_over_threshold && !outdated_parts_over_threshold) return; - const ssize_t k_active = ssize_t(parts_count_in_partition) - ssize_t(parts_to_delay_insert); - size_t max_k; - size_t k; - if (k_active > k_inactive) + const UInt64 delay_milliseconds = [&]() -> UInt64 { - max_k = parts_to_throw_insert - parts_to_delay_insert; - k = k_active + 1; - } - else - { - max_k = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert; - k = k_inactive + 1; - } + size_t parts_over_threshold = std::max(active_parts_over_threshold, outdated_parts_over_threshold); + size_t allowed_parts_over_threshold = 1; + if (active_parts_over_threshold >= outdated_parts_over_threshold) + allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert; + else + allowed_parts_over_threshold + = (settings->inactive_parts_to_throw_insert > 0 + ? settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert + : outdated_parts_over_threshold); - const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000); - /// min() as a save guard here - const UInt64 delay_milliseconds - = std::min(max_delay_milliseconds, static_cast(::pow(max_delay_milliseconds, static_cast(k) / max_k))); + chassert(parts_over_threshold <= allowed_parts_over_threshold); + + const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000); + double delay_factor = static_cast(parts_over_threshold) / allowed_parts_over_threshold; + /// min() as a save guard here + return std::min(max_delay_milliseconds, static_cast(max_delay_milliseconds * delay_factor)); + }(); ProfileEvents::increment(ProfileEvents::DelayedInserts); ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 670c755cf72..f846ba5e184 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -533,7 +533,7 @@ public: std::pair getMaxPartsCountAndSizeForPartitionWithState(DataPartState state) const; std::pair getMaxPartsCountAndSizeForPartition() const; - size_t getMaxInactivePartsCountForPartition() const; + size_t getMaxOutdatedPartsCountForPartition() const; /// Get min value of part->info.getDataVersion() for all active parts. /// Makes sense only for ordinary MergeTree engines because for them block numbering doesn't depend on partition. @@ -553,7 +553,7 @@ public: /// If the table contains too many active parts, sleep for a while to give them time to merge. /// If until is non-null, wake up from the sleep earlier if the event happened. - void delayInsertOrThrowIfNeeded(Poco::Event * until, ContextPtr query_context) const; + void delayInsertOrThrowIfNeeded(Poco::Event * until, const ContextPtr & query_context) const; /// Renames temporary part to a permanent part and adds it to the parts set. /// It is assumed that the part does not intersect with existing parts. From 00b2c96ce8bb27fec49589ea86ab4255580557b5 Mon Sep 17 00:00:00 2001 From: serxa Date: Fri, 6 Jan 2023 22:48:19 +0000 Subject: [PATCH 108/262] fix special builds --- src/Common/Threading.cpp | 5 ++--- src/Common/Threading.h | 2 ++ 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 0e073162bb8..cc7d119fa8c 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -1,5 +1,4 @@ #include -#include namespace DB { @@ -25,12 +24,12 @@ namespace DB namespace { - inline long futexWait(void * address, UInt32 value) + inline Int64 futexWait(void * address, UInt32 value) { return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); } - inline long futexWake(void * address, int count) + inline Int64 futexWake(void * address, int count) { return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); } diff --git a/src/Common/Threading.h b/src/Common/Threading.h index e21fc5608b2..7a656b05ff1 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -3,6 +3,8 @@ #include #include +#include + #ifdef OS_LINUX /// Because of futex #include From 705c8f01affcdaff9a1abef8e3c955dae1eb0881 Mon Sep 17 00:00:00 2001 From: serxa Date: Sat, 7 Jan 2023 00:31:53 +0000 Subject: [PATCH 109/262] fix tests --- src/Common/Threading.cpp | 13 +++++++------ src/Common/Threading.h | 6 +++++- 2 files changed, 12 insertions(+), 7 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index cc7d119fa8c..714f45d8c90 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -143,32 +143,33 @@ void CancelToken::Registry::signal(UInt64 tid, int code, const String & message) it->second->signalImpl(code, message); } -CancelToken::Registry & CancelToken::Registry::instance() +const std::shared_ptr & CancelToken::Registry::instance() { - static Registry registry; + static std::shared_ptr registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry return registry; } CancelToken::CancelToken() : state(disabled) , thread_id(getThreadId()) + , registry(Registry::instance()) { - Registry::instance().insert(this); + registry->insert(this); } CancelToken::~CancelToken() { - Registry::instance().remove(this); + registry->remove(this); } void CancelToken::signal(UInt64 tid) { - Registry::instance().signal(tid); + Registry::instance()->signal(tid); } void CancelToken::signal(UInt64 tid, int code, const String & message) { - Registry::instance().signal(tid, code, message); + Registry::instance()->signal(tid, code, message); } bool CancelToken::wait(UInt32 * address, UInt32 value) diff --git a/src/Common/Threading.h b/src/Common/Threading.h index 7a656b05ff1..d5d32e73b67 100644 --- a/src/Common/Threading.h +++ b/src/Common/Threading.h @@ -10,6 +10,7 @@ #include #include #include +#include namespace DB { @@ -120,7 +121,7 @@ private: void signal(UInt64 tid); void signal(UInt64 tid, int code, const String & message); - static Registry & instance(); + static const std::shared_ptr & instance(); }; // Cancels this token and wakes thread if necessary. @@ -151,6 +152,9 @@ private: // Token is permanently attached to a single thread. There is one-to-one mapping between threads and tokens. const UInt64 thread_id; + + // To avoid `Registry` destruction before last `Token` destruction + const std::shared_ptr registry; }; class CancellableSharedMutex From bb71ec7f0e50ad15b18ad47f7d9cab9a6510fccf Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Jan 2023 02:47:32 +0100 Subject: [PATCH 110/262] Supposedly fix the "Download script failed" error --- tests/ci/get_previous_release_tag.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index b9ad51379d2..373d1656a18 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -6,6 +6,7 @@ import logging import requests # type: ignore CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" +CLICKHOUSE_PACKAGE_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" @@ -42,7 +43,29 @@ def find_previous_release(server_version, releases): for release in releases: if release.version < server_version: - return True, release + + # Check if the artifact exists on GitHub. + # It can be not true for a short period of time + # after creating a tag for a new release before uploading the packages. + if ( + requests.head( + CLICKHOUSE_PACKAGE_URL.format( + version=release.version, type=release.type + ), + total=10, + read=10, + connect=10, + backoff_factor=0.3, + ).status_code + != 404 + ): + return True, release + else: + print( + "The tag {version}-{type} exists bug the package is not yet available on GitHub".format( + version=release.version, type=release.type + ) + ) return False, None From 8ca1740c37d7b4f19cc00ec1ee108d5ba9c51bad Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Sat, 7 Jan 2023 04:48:42 +0300 Subject: [PATCH 111/262] Update get_previous_release_tag.py --- tests/ci/get_previous_release_tag.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index 373d1656a18..6551ba80ecd 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -62,7 +62,7 @@ def find_previous_release(server_version, releases): return True, release else: print( - "The tag {version}-{type} exists bug the package is not yet available on GitHub".format( + "The tag {version}-{type} exists but the package is not yet available on GitHub".format( version=release.version, type=release.type ) ) From a4470dd1b27bf5b1f5e251434bb017d39b12eb27 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Mon, 9 Jan 2023 09:01:44 +0100 Subject: [PATCH 112/262] Update src/Common/Threading.cpp Co-authored-by: Antonio Andelic --- src/Common/Threading.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp index 714f45d8c90..ae32a1a1052 100644 --- a/src/Common/Threading.cpp +++ b/src/Common/Threading.cpp @@ -350,9 +350,7 @@ void CancellableSharedMutex::lock() bool CancellableSharedMutex::try_lock() { UInt64 value = state.load(); - if ((value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers)) - return true; - return false; + return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers); } void CancellableSharedMutex::unlock() From 20c7c0b1eff01457fcc9ad4933c64f40c84341f6 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 9 Jan 2023 18:21:31 +0800 Subject: [PATCH 113/262] change as request --- .../functions/date-time-functions.md | 11 ++++++-- src/Common/DateLUTImpl.h | 26 +++++++++++++++++++ src/Functions/DateTimeTransforms.h | 20 ++++++++------ src/Functions/toDayOfWeek.cpp | 5 ++-- .../02521_to_custom_day_of_week.reference | 7 +++++ .../02521_to_custom_day_of_week.sql | 10 +++++++ 6 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 tests/queries/0_stateless/02521_to_custom_day_of_week.reference create mode 100644 tests/queries/0_stateless/02521_to_custom_day_of_week.sql diff --git a/docs/en/sql-reference/functions/date-time-functions.md b/docs/en/sql-reference/functions/date-time-functions.md index 89fa72de8bf..4ff89414e0a 100644 --- a/docs/en/sql-reference/functions/date-time-functions.md +++ b/docs/en/sql-reference/functions/date-time-functions.md @@ -207,9 +207,16 @@ Converts a date or date with time to a UInt8 number containing the number of the Aliases: `DAYOFMONTH`, `DAY`. -## toDayOfWeek +## toDayOfWeek(date\[,mode\]) -Converts a date or date with time to a UInt8 number containing the number of the day of the week (Monday is 1, and Sunday is 7). +Converts a date or date with time to a UInt8 number containing the number of the day of the week. The two-argument form of toDayOfWeek() enables you to specify whether the week starts on Monday or Sunday, and whether the return value should be in the range from 0 to 6 or from 1-7. If the mode argument is ommited, the default mode is 0. + +| Mode | First day of week | Range | +|------|-------------------|------------------------------------------------| +| 0 | Monday | 1-7, Monday = 1, Tuesday = 2, ..., Sunday = 7 | +| 1 | Monday | 0-6, Monday = 0, Tuesday = 1, ..., Sunday = 6 | +| 2 | Sunday | 0-6, Sunday = 0, Monday = 1, ..., Saturday = 6 | +| 3 | Sunday | 1-7, Sunday = 1, Monday = 2, ..., Saturday = 7 | Alias: `DAYOFWEEK`. diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 84f063f9555..6bf530008dc 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -622,6 +622,25 @@ public: template inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; } + template + inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const + { + /// 0: Sun = 7, Mon = 1 + /// 1: Sun = 6, Mon = 0 + /// 2: Sun = 0, Mon = 1 + /// 3: Sun = 1, Mon = 2 + week_day_mode = check_week_day_mode(week_day_mode); + auto res = toDayOfWeek(v); + + bool start_from_sunday = week_day_mode & (1 << 1); + bool zero_based = (week_day_mode == 1 || week_day_mode == 2); + if (start_from_sunday) + res = res % 7 + 1; + if (zero_based) + --res; + return res; + } + template inline UInt8 toDayOfMonth(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_month; } @@ -844,6 +863,13 @@ public: return week_format; } + /// Check and change mode to effective. + inline UInt8 check_week_day_mode(UInt8 mode) const /// NOLINT + { + return mode & 3; + } + + /** Calculate weekday from d. * Returns 0 for monday, 1 for tuesday... */ diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index f4163a336ef..56e4a0e2668 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -786,21 +786,25 @@ struct ToDayOfWeekImpl { static constexpr auto name = "toDayOfWeek"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) + static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); } + static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); } + static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(ExtendedDayNum(d)); } + static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(DayNum(d)); } + static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone) { - return time_zone.toDayOfWeek(t); + return time_zone.toDayOfWeek(t, week_day_mode); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) + static inline UInt8 execute(UInt32 t, UInt8 week_day_mode, const DateLUTImpl & time_zone) { - return time_zone.toDayOfWeek(t); + return time_zone.toDayOfWeek(t, week_day_mode); } - static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) + static inline UInt8 execute(Int32 d, UInt8 week_day_mode, const DateLUTImpl & time_zone) { - return time_zone.toDayOfWeek(ExtendedDayNum(d)); + return time_zone.toDayOfWeek(ExtendedDayNum(d), week_day_mode); } - static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) + static inline UInt8 execute(UInt16 d, UInt8 week_day_mode, const DateLUTImpl & time_zone) { - return time_zone.toDayOfWeek(DayNum(d)); + return time_zone.toDayOfWeek(DayNum(d), week_day_mode); } using FactorTransform = ToMondayImpl; diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp index 354d4dea894..09271cbe55d 100644 --- a/src/Functions/toDayOfWeek.cpp +++ b/src/Functions/toDayOfWeek.cpp @@ -1,13 +1,14 @@ #include #include -#include #include +#include + namespace DB { -using FunctionToDayOfWeek = FunctionDateOrDateTimeToSomething; +using FunctionToDayOfWeek = FunctionCustomWeekToSomething; REGISTER_FUNCTION(ToDayOfWeek) { diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.reference b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference new file mode 100644 index 00000000000..660dff37b72 --- /dev/null +++ b/tests/queries/0_stateless/02521_to_custom_day_of_week.reference @@ -0,0 +1,7 @@ +1 7 +1 7 +0 6 +1 0 +2 1 +1 7 +0 6 diff --git a/tests/queries/0_stateless/02521_to_custom_day_of_week.sql b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql new file mode 100644 index 00000000000..5475e15a984 --- /dev/null +++ b/tests/queries/0_stateless/02521_to_custom_day_of_week.sql @@ -0,0 +1,10 @@ + +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon), toDayOfWeek(date_sun); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 0), toDayOfWeek(date_sun, 0); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 1), toDayOfWeek(date_sun, 1); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 2), toDayOfWeek(date_sun, 2); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 3), toDayOfWeek(date_sun, 3); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 4), toDayOfWeek(date_sun, 4); +with toDate('2023-01-09') as date_mon, date_mon - 1 as date_sun select toDayOfWeek(date_mon, 5), toDayOfWeek(date_sun, 5); + +select toDayOfWeek(today(), -1); -- { serverError 43 } From 10aa2207b312c3a713b611693730cd7b2fa32bca Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 3 Jan 2023 12:55:12 +0100 Subject: [PATCH 114/262] Add typing to stopwatch.py --- tests/ci/stopwatch.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/ci/stopwatch.py b/tests/ci/stopwatch.py index db174550c03..1ab6737530c 100644 --- a/tests/ci/stopwatch.py +++ b/tests/ci/stopwatch.py @@ -9,9 +9,9 @@ class Stopwatch: self.start_time_str_value = self.start_time.strftime("%Y-%m-%d %H:%M:%S") @property - def duration_seconds(self): + def duration_seconds(self) -> float: return (datetime.datetime.utcnow() - self.start_time).total_seconds() @property - def start_time_str(self): + def start_time_str(self) -> str: return self.start_time_str_value From db96f9e3db4309fb51bad74986fcca144c8c7d17 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 3 Jan 2023 15:23:19 +0100 Subject: [PATCH 115/262] Rework test_results list/tuple/whatever into class --- tests/ci/ast_fuzzer_check.py | 20 ++-- tests/ci/bugfix_validate_check.py | 23 ++-- tests/ci/clickhouse_helper.py | 42 ++++---- tests/ci/codebrowser_check.py | 9 +- tests/ci/compatibility_check.py | 67 +++++++----- tests/ci/docker_images_check.py | 90 ++++++++-------- tests/ci/docker_manifests_merge.py | 5 +- tests/ci/docker_server.py | 17 +-- tests/ci/docker_test.py | 40 ++++--- tests/ci/docs_check.py | 41 ++++--- tests/ci/docs_release.py | 33 +++--- tests/ci/fast_test_check.py | 44 ++++---- tests/ci/functional_test_check.py | 45 ++++---- tests/ci/integration_test_check.py | 47 ++++---- tests/ci/jepsen_check.py | 31 +++--- tests/ci/report.py | 168 ++++++++++++++++++++--------- tests/ci/sqlancer_check.py | 37 +++---- tests/ci/stress_check.py | 35 +++--- tests/ci/style_check.py | 20 ++-- tests/ci/unit_tests_check.py | 38 ++++--- tests/ci/upload_result_helper.py | 55 +++++----- 21 files changed, 512 insertions(+), 395 deletions(-) diff --git a/tests/ci/ast_fuzzer_check.py b/tests/ci/ast_fuzzer_check.py index 096edeed149..2a7dc0ad947 100644 --- a/tests/ci/ast_fuzzer_check.py +++ b/tests/ci/ast_fuzzer_check.py @@ -7,6 +7,10 @@ import sys from github import Github +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, @@ -14,15 +18,12 @@ from env_helper import ( REPO_COPY, TEMP_PATH, ) -from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo -from build_download_helper import get_build_name_for_check, read_build_urls -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from stopwatch import Stopwatch +from report import TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch IMAGE_NAME = "clickhouse/fuzzer" @@ -148,16 +149,15 @@ if __name__ == "__main__": status = "failure" description = "Task failed: $?=" + str(retcode) + test_result = TestResult(description, "OK") if "fail" in status: - test_result = [(description, "FAIL")] - else: - test_result = [(description, "OK")] + test_result.status = "FAIL" ch_helper = ClickHouseHelper() prepared_events = prepare_tests_results_for_clickhouse( pr_info, - test_result, + [test_result], status, stopwatch.duration_seconds, stopwatch.start_time_str, diff --git a/tests/ci/bugfix_validate_check.py b/tests/ci/bugfix_validate_check.py index 6bdf3b1f7d2..14ea58500bc 100644 --- a/tests/ci/bugfix_validate_check.py +++ b/tests/ci/bugfix_validate_check.py @@ -1,18 +1,19 @@ #!/usr/bin/env python3 +from typing import List, Tuple import argparse import csv -import itertools import logging import os from github import Github -from s3_helper import S3Helper +from commit_status_helper import post_commit_status from get_robot_token import get_best_robot_token from pr_info import PRInfo +from report import TestResults, TestResult +from s3_helper import S3Helper from upload_result_helper import upload_results -from commit_status_helper import post_commit_status def parse_args(): @@ -21,11 +22,9 @@ def parse_args(): return parser.parse_args() -def post_commit_status_from_file(file_path): - res = [] +def post_commit_status_from_file(file_path: str) -> List[str]: with open(file_path, "r", encoding="utf-8") as f: - fin = csv.reader(f, delimiter="\t") - res = list(itertools.islice(fin, 1)) + res = list(csv.reader(f, delimiter="\t")) if len(res) < 1: raise Exception(f'Can\'t read from "{file_path}"') if len(res[0]) != 3: @@ -33,22 +32,22 @@ def post_commit_status_from_file(file_path): return res[0] -def process_result(file_path): - test_results = [] +def process_result(file_path: str) -> Tuple[bool, TestResults]: + test_results = [] # type: TestResults state, report_url, description = post_commit_status_from_file(file_path) prefix = os.path.basename(os.path.dirname(file_path)) is_ok = state == "success" if is_ok and report_url == "null": - return is_ok, None + return is_ok, test_results status = f'OK: Bug reproduced (Report)' if not is_ok: status = f'Bug is not reproduced (Report)' - test_results.append([f"{prefix}: {description}", status]) + test_results.append(TestResult(f"{prefix}: {description}", status)) return is_ok, test_results -def process_all_results(file_paths): +def process_all_results(file_paths: str) -> Tuple[bool, TestResults]: any_ok = False all_results = [] for status_path in file_paths: diff --git a/tests/ci/clickhouse_helper.py b/tests/ci/clickhouse_helper.py index c82d9da05e9..f914bb42d99 100644 --- a/tests/ci/clickhouse_helper.py +++ b/tests/ci/clickhouse_helper.py @@ -1,10 +1,14 @@ #!/usr/bin/env python3 -import time -import logging +from typing import List import json +import logging +import time import requests # type: ignore + from get_robot_token import get_parameter_from_ssm +from pr_info import PRInfo +from report import TestResults class InsertException(Exception): @@ -129,14 +133,14 @@ class ClickHouseHelper: def prepare_tests_results_for_clickhouse( - pr_info, - test_results, - check_status, - check_duration, - check_start_time, - report_url, - check_name, -): + pr_info: PRInfo, + test_results: TestResults, + check_status: str, + check_duration: float, + check_start_time: str, + report_url: str, + check_name: str, +) -> List[dict]: pull_request_url = "https://github.com/ClickHouse/ClickHouse/commits/master" base_ref = "master" @@ -172,13 +176,11 @@ def prepare_tests_results_for_clickhouse( result = [common_properties] for test_result in test_results: current_row = common_properties.copy() - test_name = test_result[0] - test_status = test_result[1] + test_name = test_result.name + test_status = test_result.status - test_time = 0 - if len(test_result) > 2 and test_result[2]: - test_time = test_result[2] - current_row["test_duration_ms"] = int(float(test_time) * 1000) + test_time = test_result.time or 0 + current_row["test_duration_ms"] = int(test_time * 1000) current_row["test_name"] = test_name current_row["test_status"] = test_status result.append(current_row) @@ -186,7 +188,9 @@ def prepare_tests_results_for_clickhouse( return result -def mark_flaky_tests(clickhouse_helper, check_name, test_results): +def mark_flaky_tests( + clickhouse_helper: ClickHouseHelper, check_name: str, test_results: TestResults +) -> None: try: query = f"""SELECT DISTINCT test_name FROM checks @@ -202,7 +206,7 @@ WHERE logging.info("Found flaky tests: %s", ", ".join(master_failed_tests)) for test_result in test_results: - if test_result[1] == "FAIL" and test_result[0] in master_failed_tests: - test_result[1] = "FLAKY" + if test_result.status == "FAIL" and test_result.name in master_failed_tests: + test_result.status = "FLAKY" except Exception as ex: logging.error("Exception happened during flaky tests fetch %s", ex) diff --git a/tests/ci/codebrowser_check.py b/tests/ci/codebrowser_check.py index a86749c794c..9fa202a357c 100644 --- a/tests/ci/codebrowser_check.py +++ b/tests/ci/codebrowser_check.py @@ -7,6 +7,8 @@ import logging from github import Github +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version from env_helper import ( IMAGES_PATH, REPO_COPY, @@ -14,10 +16,9 @@ from env_helper import ( S3_TEST_REPORTS_BUCKET, TEMP_PATH, ) -from commit_status_helper import post_commit_status -from docker_pull_helper import get_image_with_version from get_robot_token import get_best_robot_token from pr_info import PRInfo +from report import TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from tee_popen import TeePopen @@ -80,9 +81,9 @@ if __name__ == "__main__": "HTML report" ) - test_results = [(index_html, "Look at the report")] + test_result = TestResult(index_html, "Look at the report") - report_url = upload_results(s3_helper, 0, pr_info.sha, test_results, [], NAME) + report_url = upload_results(s3_helper, 0, pr_info.sha, [test_result], [], NAME) print(f"::notice ::Report url: {report_url}") diff --git a/tests/ci/compatibility_check.py b/tests/ci/compatibility_check.py index 2b61501a0dd..7d8086973bb 100644 --- a/tests/ci/compatibility_check.py +++ b/tests/ci/compatibility_check.py @@ -1,6 +1,7 @@ #!/usr/bin/env python3 from distutils.version import StrictVersion +from typing import List, Tuple import logging import os import subprocess @@ -8,21 +9,22 @@ import sys from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_builds_filter -from upload_result_helper import upload_results -from docker_pull_helper import get_images_with_versions -from commit_status_helper import post_commit_status from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status +from docker_pull_helper import get_images_with_versions +from env_helper import TEMP_PATH, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results IMAGE_UBUNTU = "clickhouse/test-old-ubuntu" IMAGE_CENTOS = "clickhouse/test-old-centos" @@ -31,18 +33,18 @@ DOWNLOAD_RETRIES_COUNT = 5 CHECK_NAME = "Compatibility check" -def process_os_check(log_path): +def process_os_check(log_path: str) -> TestResult: name = os.path.basename(log_path) with open(log_path, "r") as log: line = log.read().split("\n")[0].strip() if line != "OK": - return (name, "FAIL") + return TestResult(name, "FAIL") else: - return (name, "OK") + return TestResult(name, "OK") -def process_glibc_check(log_path): - bad_lines = [] +def process_glibc_check(log_path: str) -> TestResults: + test_results = [] # type: TestResults with open(log_path, "r") as log: for line in log: if line.strip(): @@ -50,32 +52,36 @@ def process_glibc_check(log_path): symbol_with_glibc = columns[-2] # sysconf@GLIBC_2.2.5 _, version = symbol_with_glibc.split("@GLIBC_") if version == "PRIVATE": - bad_lines.append((symbol_with_glibc, "FAIL")) + test_results.append(TestResult(symbol_with_glibc, "FAIL")) elif StrictVersion(version) > MAX_GLIBC_VERSION: - bad_lines.append((symbol_with_glibc, "FAIL")) - if not bad_lines: - bad_lines.append(("glibc check", "OK")) - return bad_lines + test_results.append(TestResult(symbol_with_glibc, "FAIL")) + if not test_results: + test_results.append(TestResult("glibc check", "OK")) + return test_results -def process_result(result_folder, server_log_folder): - summary = process_glibc_check(os.path.join(result_folder, "glibc.log")) +def process_result( + result_folder: str, server_log_folder: str +) -> Tuple[str, str, TestResults, List[str]]: + test_results = process_glibc_check(os.path.join(result_folder, "glibc.log")) status = "success" description = "Compatibility check passed" - if len(summary) > 1 or summary[0][1] != "OK": + if len(test_results) > 1 or test_results[0].status != "OK": status = "failure" description = "glibc check failed" if status == "success": for operating_system in ("ubuntu:12.04", "centos:5"): - result = process_os_check(os.path.join(result_folder, operating_system)) - if result[1] != "OK": + test_result = process_os_check( + os.path.join(result_folder, operating_system) + ) + if test_result.status != "OK": status = "failure" description = f"Old {operating_system} failed" - summary += [result] + test_results += [test_result] break - summary += [result] + test_results += [test_result] server_log_path = os.path.join(server_log_folder, "clickhouse-server.log") stderr_log_path = os.path.join(server_log_folder, "stderr.log") @@ -90,7 +96,7 @@ def process_result(result_folder, server_log_folder): if os.path.exists(client_stderr_log_path): result_logs.append(client_stderr_log_path) - return status, description, summary, result_logs + return status, description, test_results, result_logs def get_run_commands( @@ -109,13 +115,12 @@ def get_run_commands( ] -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() temp_path = TEMP_PATH - repo_path = REPO_COPY reports_path = REPORTS_PATH pr_info = PRInfo() @@ -201,5 +206,9 @@ if __name__ == "__main__": ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docker_images_check.py b/tests/ci/docker_images_check.py index 51cbbf6f0af..f5b707be48f 100644 --- a/tests/ci/docker_images_check.py +++ b/tests/ci/docker_images_check.py @@ -8,6 +8,7 @@ import shutil import subprocess import time import sys +from pathlib import Path from typing import Any, Dict, List, Optional, Set, Tuple, Union from github import Github @@ -17,6 +18,7 @@ from commit_status_helper import post_commit_status from env_helper import GITHUB_WORKSPACE, RUNNER_TEMP, GITHUB_RUN_URL from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo +from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from upload_result_helper import upload_results @@ -182,11 +184,12 @@ def build_and_push_dummy_image( image: DockerImage, version_string: str, push: bool, -) -> Tuple[bool, str]: +) -> Tuple[bool, Path]: dummy_source = "ubuntu:20.04" logging.info("Building docker image %s as %s", image.repo, dummy_source) - build_log = os.path.join( - TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}" + build_log = ( + Path(TEMP_PATH) + / f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}.log" ) with open(build_log, "wb") as bl: cmd = ( @@ -213,7 +216,7 @@ def build_and_push_one_image( additional_cache: str, push: bool, child: bool, -) -> Tuple[bool, str]: +) -> Tuple[bool, Path]: if image.only_amd64 and platform.machine() not in ["amd64", "x86_64"]: return build_and_push_dummy_image(image, version_string, push) logging.info( @@ -222,8 +225,9 @@ def build_and_push_one_image( version_string, image.full_path, ) - build_log = os.path.join( - TEMP_PATH, f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}" + build_log = ( + Path(TEMP_PATH) + / f"build_and_push_log_{image.repo.replace('/', '_')}_{version_string}.log" ) push_arg = "" if push: @@ -273,27 +277,42 @@ def process_single_image( additional_cache: str, push: bool, child: bool, -) -> List[Tuple[str, str, str]]: +) -> TestResults: logging.info("Image will be pushed with versions %s", ", ".join(versions)) - result = [] + results = [] # type: TestResults for ver in versions: + stopwatch = Stopwatch() for i in range(5): success, build_log = build_and_push_one_image( image, ver, additional_cache, push, child ) if success: - result.append((image.repo + ":" + ver, build_log, "OK")) + results.append( + TestResult( + image.repo + ":" + ver, + "OK", + stopwatch.duration_seconds, + [build_log], + ) + ) break logging.info( "Got error will retry %s time and sleep for %s seconds", i, i * 5 ) time.sleep(i * 5) else: - result.append((image.repo + ":" + ver, build_log, "FAIL")) + results.append( + TestResult( + image.repo + ":" + ver, + "FAIL", + stopwatch.duration_seconds, + [build_log], + ) + ) logging.info("Processing finished") image.built = True - return result + return results def process_image_with_parents( @@ -302,41 +321,19 @@ def process_image_with_parents( additional_cache: str, push: bool, child: bool = False, -) -> List[Tuple[str, str, str]]: - result = [] # type: List[Tuple[str,str,str]] +) -> TestResults: + results = [] # type: TestResults if image.built: - return result + return results if image.parent is not None: - result += process_image_with_parents( + results += process_image_with_parents( image.parent, versions, additional_cache, push, False ) child = True - result += process_single_image(image, versions, additional_cache, push, child) - return result - - -def process_test_results( - s3_client: S3Helper, test_results: List[Tuple[str, str, str]], s3_path_prefix: str -) -> Tuple[str, List[Tuple[str, str]]]: - overall_status = "success" - processed_test_results = [] - for image, build_log, status in test_results: - if status != "OK": - overall_status = "failure" - url_part = "" - if build_log is not None and os.path.exists(build_log): - build_url = s3_client.upload_test_report_to_s3( - build_log, s3_path_prefix + "/" + os.path.basename(build_log) - ) - url_part += f'build_log' - if url_part: - test_name = image + " (" + url_part + ")" - else: - test_name = image - processed_test_results.append((test_name, status)) - return overall_status, processed_test_results + results += process_single_image(image, versions, additional_cache, push, child) + return results def parse_args() -> argparse.Namespace: @@ -440,7 +437,7 @@ def main(): image_versions, result_version = gen_versions(pr_info, args.suffix) result_images = {} - images_processing_result = [] + test_results = [] # type: TestResults additional_cache = "" if pr_info.release_pr or pr_info.merged_pr: additional_cache = str(pr_info.release_pr or pr_info.merged_pr) @@ -448,7 +445,7 @@ def main(): for image in changed_images: # If we are in backport PR, then pr_info.release_pr is defined # We use it as tag to reduce rebuilding time - images_processing_result += process_image_with_parents( + test_results += process_image_with_parents( image, image_versions, additional_cache, args.push ) result_images[image.repo] = result_version @@ -466,12 +463,9 @@ def main(): s3_helper = S3Helper() - s3_path_prefix = ( - str(pr_info.number) + "/" + pr_info.sha + "/" + NAME.lower().replace(" ", "_") - ) - status, test_results = process_test_results( - s3_helper, images_processing_result, s3_path_prefix - ) + status = "success" + if [r for r in test_results if r.status != "OK"]: + status = "failure" url = upload_results(s3_helper, pr_info.number, pr_info.sha, test_results, [], NAME) @@ -495,7 +489,7 @@ def main(): ch_helper = ClickHouseHelper() ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if status == "error": + if status == "failure": sys.exit(1) diff --git a/tests/ci/docker_manifests_merge.py b/tests/ci/docker_manifests_merge.py index e39731c9ff3..9a77a91647e 100644 --- a/tests/ci/docker_manifests_merge.py +++ b/tests/ci/docker_manifests_merge.py @@ -14,6 +14,7 @@ from commit_status_helper import post_commit_status from env_helper import RUNNER_TEMP from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo +from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from upload_result_helper import upload_results @@ -189,11 +190,11 @@ def main(): merged = merge_images(to_merge) status = "success" - test_results = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults for image, versions in merged.items(): for tags in versions: manifest, test_result = create_manifest(image, tags, args.push) - test_results.append((manifest, test_result)) + test_results.append(TestResult(manifest, test_result)) if test_result != "OK": status = "failure" diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index fd28e5a1890..544ab4e0a90 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -8,7 +8,7 @@ import subprocess import sys import time from os import path as p, makedirs -from typing import List, Tuple +from typing import List from github import Github @@ -20,6 +20,7 @@ from env_helper import CI, GITHUB_RUN_URL, RUNNER_TEMP, S3_BUILDS_BUCKET, S3_DOW from get_robot_token import get_best_robot_token, get_parameter_from_ssm from git_helper import Git from pr_info import PRInfo +from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch from upload_result_helper import upload_results @@ -235,8 +236,8 @@ def build_and_push_image( os: str, tag: str, version: ClickHouseVersion, -) -> List[Tuple[str, str]]: - result = [] +) -> TestResults: + result = [] # type: TestResults if os != "ubuntu": tag += f"-{os}" init_args = ["docker", "buildx", "build", "--build-arg BUILDKIT_INLINE_CACHE=1"] @@ -270,9 +271,9 @@ def build_and_push_image( cmd = " ".join(cmd_args) logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd) if retry_popen(cmd) != 0: - result.append((f"{image.repo}:{tag}-{arch}", "FAIL")) + result.append(TestResult(f"{image.repo}:{tag}-{arch}", "FAIL")) return result - result.append((f"{image.repo}:{tag}-{arch}", "OK")) + result.append(TestResult(f"{image.repo}:{tag}-{arch}", "OK")) with open(metadata_path, "rb") as m: metadata = json.load(m) digests.append(metadata["containerimage.digest"]) @@ -283,7 +284,7 @@ def build_and_push_image( ) logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd) if retry_popen(cmd) != 0: - result.append((f"{image.repo}:{tag}", "FAIL")) + result.append(TestResult(f"{image.repo}:{tag}", "FAIL")) return result else: logging.info( @@ -323,7 +324,7 @@ def main(): logging.info("Following tags will be created: %s", ", ".join(tags)) status = "success" - test_results = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults for os in args.os: for tag in tags: test_results.extend( @@ -331,7 +332,7 @@ def main(): image, args.push, args.bucket_prefix, os, tag, args.version ) ) - if test_results[-1][1] != "OK": + if test_results[-1].status != "OK": status = "failure" pr_info = pr_info or PRInfo() diff --git a/tests/ci/docker_test.py b/tests/ci/docker_test.py index 8b18a580ed7..e7b54652272 100644 --- a/tests/ci/docker_test.py +++ b/tests/ci/docker_test.py @@ -3,9 +3,11 @@ import os import unittest from unittest.mock import patch, MagicMock +from pathlib import Path from env_helper import GITHUB_RUN_URL from pr_info import PRInfo +from report import TestResult import docker_images_check as di with patch("git_helper.Git"): @@ -223,40 +225,48 @@ class TestDockerImageCheck(unittest.TestCase): @patch("docker_images_check.build_and_push_one_image") def test_process_image_with_parents(self, mock_build): - mock_build.side_effect = lambda v, w, x, y, z: (True, f"{v.repo}_{w}.log") + mock_build.side_effect = lambda v, w, x, y, z: (True, Path(f"{v.repo}_{w}.log")) im1 = di.DockerImage("path1", "repo1", False) im2 = di.DockerImage("path2", "repo2", False, im1) im3 = di.DockerImage("path3", "repo3", False, im2) im4 = di.DockerImage("path4", "repo4", False, im1) # We use list to have determined order of image builgings images = [im4, im1, im3, im2, im1] - results = [ + test_results = [ di.process_image_with_parents(im, ["v1", "v2", "latest"], "", True) for im in images ] + # The time is random, so we check it's not None and greater than 0, + # and then set to 1 + for results in test_results: + for result in results: + self.assertIsNotNone(result.time) + self.assertGreater(result.time, 0) # type: ignore + result.time = 1 + self.maxDiff = None expected = [ [ # repo4 -> repo1 - ("repo1:v1", "repo1_v1.log", "OK"), - ("repo1:v2", "repo1_v2.log", "OK"), - ("repo1:latest", "repo1_latest.log", "OK"), - ("repo4:v1", "repo4_v1.log", "OK"), - ("repo4:v2", "repo4_v2.log", "OK"), - ("repo4:latest", "repo4_latest.log", "OK"), + TestResult("repo1:v1", "OK", 1, [Path("repo1_v1.log")]), + TestResult("repo1:v2", "OK", 1, [Path("repo1_v2.log")]), + TestResult("repo1:latest", "OK", 1, [Path("repo1_latest.log")]), + TestResult("repo4:v1", "OK", 1, [Path("repo4_v1.log")]), + TestResult("repo4:v2", "OK", 1, [Path("repo4_v2.log")]), + TestResult("repo4:latest", "OK", 1, [Path("repo4_latest.log")]), ], [], # repo1 is built [ # repo3 -> repo2 -> repo1 - ("repo2:v1", "repo2_v1.log", "OK"), - ("repo2:v2", "repo2_v2.log", "OK"), - ("repo2:latest", "repo2_latest.log", "OK"), - ("repo3:v1", "repo3_v1.log", "OK"), - ("repo3:v2", "repo3_v2.log", "OK"), - ("repo3:latest", "repo3_latest.log", "OK"), + TestResult("repo2:v1", "OK", 1, [Path("repo2_v1.log")]), + TestResult("repo2:v2", "OK", 1, [Path("repo2_v2.log")]), + TestResult("repo2:latest", "OK", 1, [Path("repo2_latest.log")]), + TestResult("repo3:v1", "OK", 1, [Path("repo3_v1.log")]), + TestResult("repo3:v2", "OK", 1, [Path("repo3_v2.log")]), + TestResult("repo3:latest", "OK", 1, [Path("repo3_latest.log")]), ], [], # repo2 -> repo1 are built [], # repo1 is built ] - self.assertEqual(results, expected) + self.assertEqual(test_results, expected) class TestDockerServer(unittest.TestCase): diff --git a/tests/ci/docs_check.py b/tests/ci/docs_check.py index cac1c3aea7c..4378c857afe 100644 --- a/tests/ci/docs_check.py +++ b/tests/ci/docs_check.py @@ -4,24 +4,27 @@ import logging import subprocess import os import sys + from github import Github -from env_helper import TEMP_PATH, REPO_COPY -from s3_helper import S3Helper -from pr_info import PRInfo -from get_robot_token import get_best_robot_token -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, get_commit from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status, get_commit +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPO_COPY +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results NAME = "Docs Check" -if __name__ == "__main__": + +def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description="Script to check the docs integrity", @@ -98,7 +101,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) files = os.listdir(test_output) - lines = [] + test_results = [] # type: TestResults additional_files = [] if not files: logging.error("No output files after docs check") @@ -111,27 +114,27 @@ if __name__ == "__main__": with open(path, "r", encoding="utf-8") as check_file: for line in check_file: if "ERROR" in line: - lines.append((line.split(":")[-1], "FAIL")) - if lines: + test_results.append(TestResult(line.split(":")[-1], "FAIL")) + if test_results: status = "failure" description = "Found errors in docs" elif status != "failure": - lines.append(("No errors found", "OK")) + test_results.append(TestResult("No errors found", "OK")) else: - lines.append(("Non zero exit code", "FAIL")) + test_results.append(TestResult("Non zero exit code", "FAIL")) s3_helper = S3Helper() ch_helper = ClickHouseHelper() report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME + s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME ) print("::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, NAME, description, status, report_url) prepared_events = prepare_tests_results_for_clickhouse( pr_info, - lines, + test_results, status, stopwatch.duration_seconds, stopwatch.start_time_str, @@ -140,5 +143,9 @@ if __name__ == "__main__": ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if status == "error": + if status == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/docs_release.py b/tests/ci/docs_release.py index f1f420318be..1b93aba99ba 100644 --- a/tests/ci/docs_release.py +++ b/tests/ci/docs_release.py @@ -7,16 +7,17 @@ import sys from github import Github -from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN -from s3_helper import S3Helper -from pr_info import PRInfo -from get_robot_token import get_best_robot_token -from ssh import SSHKey -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version from commit_status_helper import get_commit +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPO_COPY, CLOUDFLARE_TOKEN +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from ssh import SSHKey from tee_popen import TeePopen +from upload_result_helper import upload_results NAME = "Docs Release" @@ -32,7 +33,7 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) args = parse_args() @@ -84,7 +85,7 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) files = os.listdir(test_output) - lines = [] + test_results = [] # type: TestResults additional_files = [] if not files: logging.error("No output files after docs release") @@ -97,19 +98,19 @@ if __name__ == "__main__": with open(path, "r", encoding="utf-8") as check_file: for line in check_file: if "ERROR" in line: - lines.append((line.split(":")[-1], "FAIL")) - if lines: + test_results.append(TestResult(line.split(":")[-1], "FAIL")) + if test_results: status = "failure" description = "Found errors in docs" elif status != "failure": - lines.append(("No errors found", "OK")) + test_results.append(TestResult("No errors found", "OK")) else: - lines.append(("Non zero exit code", "FAIL")) + test_results.append(TestResult("Non zero exit code", "FAIL")) s3_helper = S3Helper() report_url = upload_results( - s3_helper, pr_info.number, pr_info.sha, lines, additional_files, NAME + s3_helper, pr_info.number, pr_info.sha, test_results, additional_files, NAME ) print("::notice ::Report url: {report_url}") commit = get_commit(gh, pr_info.sha) @@ -119,3 +120,7 @@ if __name__ == "__main__": if status == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/fast_test_check.py b/tests/ci/fast_test_check.py index 0f4c1b19707..7a87a93c26d 100644 --- a/tests/ci/fast_test_check.py +++ b/tests/ci/fast_test_check.py @@ -6,29 +6,31 @@ import os import csv import sys import atexit +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import CACHES_PATH, TEMP_PATH -from pr_info import FORCE_TESTS_LABEL, PRInfo -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import ( - post_commit_status, - update_mergeable_check, -) +from ccache_utils import get_ccache_if_not_exists, upload_ccache from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import ( + post_commit_status, + update_mergeable_check, +) +from docker_pull_helper import get_image_with_version +from env_helper import CACHES_PATH, TEMP_PATH +from get_robot_token import get_best_robot_token +from pr_info import FORCE_TESTS_LABEL, PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen -from ccache_utils import get_ccache_if_not_exists, upload_ccache +from upload_result_helper import upload_results NAME = "Fast test" @@ -53,8 +55,8 @@ def get_fasttest_cmd( def process_results( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of @@ -78,17 +80,15 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") - if os.path.exists(results_path): - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files return state, description, test_results, additional_files -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -175,7 +175,6 @@ if __name__ == "__main__": "test_log.txt" in test_output_files or "test_result.txt" in test_output_files ) test_result_exists = "test_results.tsv" in test_output_files - test_results = [] # type: List[Tuple[str, str]] if "submodule_log.txt" not in test_output_files: description = "Cannot clone repository" state = "failure" @@ -210,7 +209,6 @@ if __name__ == "__main__": test_results, [run_log_path] + additional_logs, NAME, - True, ) print(f"::notice ::Report url: {report_url}") post_commit_status(gh, pr_info.sha, NAME, description, state, report_url) @@ -232,3 +230,7 @@ if __name__ == "__main__": print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") else: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/functional_test_check.py b/tests/ci/functional_test_check.py index e7689a198cd..3653aefeb77 100644 --- a/tests/ci/functional_test_check.py +++ b/tests/ci/functional_test_check.py @@ -7,18 +7,17 @@ import os import subprocess import sys import atexit +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import FORCE_TESTS_LABEL, PRInfo from build_download_helper import download_all_deb_packages -from download_release_packages import download_last_release -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version +from clickhouse_helper import ( + ClickHouseHelper, + mark_flaky_tests, + prepare_tests_results_for_clickhouse, +) from commit_status_helper import ( post_commit_status, get_commit, @@ -26,14 +25,17 @@ from commit_status_helper import ( post_commit_status_to_file, update_mergeable_check, ) -from clickhouse_helper import ( - ClickHouseHelper, - mark_flaky_tests, - prepare_tests_results_for_clickhouse, -) -from stopwatch import Stopwatch +from docker_pull_helper import get_image_with_version +from download_release_packages import download_last_release +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import FORCE_TESTS_LABEL, PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results NO_CHANGES_MSG = "Nothing to run" @@ -126,8 +128,8 @@ def get_tests_to_run(pr_info): def process_results( result_folder: str, server_log_path: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of result_folder. @@ -161,16 +163,15 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") + results_path = Path(result_folder) / "test_results.tsv" - if os.path.exists(results_path): + if results_path.exists(): logging.info("Found test_results.tsv") else: logging.info("Files in result folder %s", os.listdir(result_folder)) return "error", "Not found test_results.tsv", test_results, additional_files - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + test_results = read_test_results(results_path) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -195,7 +196,7 @@ def parse_args(): return parser.parse_args() -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -377,3 +378,7 @@ if __name__ == "__main__": print(f"'{FORCE_TESTS_LABEL}' enabled, will report success") else: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/integration_test_check.py b/tests/ci/integration_test_check.py index a6935e22091..85933e27309 100644 --- a/tests/ci/integration_test_check.py +++ b/tests/ci/integration_test_check.py @@ -7,31 +7,33 @@ import logging import os import subprocess import sys +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_all_deb_packages -from download_release_packages import download_last_release -from upload_result_helper import upload_results -from docker_pull_helper import get_images_with_versions -from commit_status_helper import ( - post_commit_status, - override_status, - post_commit_status_to_file, -) from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import ( + post_commit_status, + override_status, + post_commit_status_to_file, +) +from docker_pull_helper import get_images_with_versions +from download_release_packages import download_last_release +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results # When update, update @@ -90,8 +92,8 @@ def get_env_for_runner(build_path, repo_path, result_path, work_path): def process_results( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content of result_folder. @@ -115,10 +117,8 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") - if os.path.exists(results_path): - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path, False) if len(test_results) == 0: return "error", "Empty test_results.tsv", test_results, additional_files @@ -142,7 +142,7 @@ def parse_args(): return parser.parse_args() -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -271,7 +271,6 @@ if __name__ == "__main__": test_results, [output_path_log] + additional_logs, check_name_with_group, - False, ) print(f"::notice:: {check_name} Report url: {report_url}") @@ -303,5 +302,9 @@ if __name__ == "__main__": ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/jepsen_check.py b/tests/ci/jepsen_check.py index 3ddc0089791..fc18cc4a5ca 100644 --- a/tests/ci/jepsen_check.py +++ b/tests/ci/jepsen_check.py @@ -11,20 +11,21 @@ import boto3 # type: ignore import requests # type: ignore from github import Github +from build_download_helper import get_build_name_for_check +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from compress_files import compress_fast from env_helper import REPO_COPY, TEMP_PATH, S3_BUILDS_BUCKET, S3_DOWNLOAD -from stopwatch import Stopwatch -from upload_result_helper import upload_results -from s3_helper import S3Helper from get_robot_token import get_best_robot_token, get_parameter_from_ssm from pr_info import PRInfo -from compress_files import compress_fast -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from version_helper import get_version_from_repo -from tee_popen import TeePopen -from ssh import SSHKey -from build_download_helper import get_build_name_for_check +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from ssh import SSHKey +from stopwatch import Stopwatch +from tee_popen import TeePopen +from upload_result_helper import upload_results +from version_helper import get_version_from_repo JEPSEN_GROUP_NAME = "jepsen_group" @@ -44,8 +45,8 @@ CRASHED_TESTS_ANCHOR = "# Crashed tests" FAILED_TESTS_ANCHOR = "# Failed tests" -def _parse_jepsen_output(path): - test_results = [] +def _parse_jepsen_output(path: str) -> TestResults: + test_results = [] # type: TestResults current_type = "" with open(path, "r") as f: for line in f: @@ -59,7 +60,7 @@ def _parse_jepsen_output(path): if ( line.startswith("store/clickhouse") or line.startswith("clickhouse") ) and current_type: - test_results.append((line.strip(), current_type)) + test_results.append(TestResult(line.strip(), current_type)) return test_results @@ -266,7 +267,7 @@ if __name__ == "__main__": additional_data = [] try: test_result = _parse_jepsen_output(jepsen_log_path) - if any(r[1] == "FAIL" for r in test_result): + if any(r.status == "FAIL" for r in test_result): status = "failure" description = "Found invalid analysis (ノಥ益ಥ)ノ ┻━┻" @@ -279,7 +280,7 @@ if __name__ == "__main__": print("Exception", ex) status = "failure" description = "No Jepsen output log" - test_result = [("No Jepsen output log", "FAIL")] + test_result = [TestResult("No Jepsen output log", "FAIL")] s3_helper = S3Helper() report_url = upload_results( diff --git a/tests/ci/report.py b/tests/ci/report.py index 6c152c927ef..2409d1ba6d8 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -1,4 +1,9 @@ # -*- coding: utf-8 -*- +from ast import literal_eval +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Tuple +import csv import os import datetime @@ -167,6 +172,66 @@ HTML_TEST_PART = """ BASE_HEADERS = ["Test name", "Test status"] +@dataclass +class TestResult: + name: str + status: str + # the following fields are optional + time: Optional[float] = None + log_files: Optional[List[Path]] = None + raw_logs: Optional[str] = None + # the field for uploaded logs URLs + log_urls: Optional[List[str]] = None + + def set_raw_logs(self, raw_logs: str) -> None: + self.raw_logs = raw_logs + + def set_log_files(self, log_files_literal: str) -> None: + self.log_files = [] + log_paths = literal_eval(log_files_literal) + if not isinstance(log_paths, list): + raise ValueError( + f"Malformed input: must be a list literal: {log_files_literal}" + ) + for log_path in log_paths: + file = Path(log_path) + assert file.exists() + self.log_files.append(file) + + +TestResults = List[TestResult] + + +def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestResults: + results = [] # type: TestResults + with open(results_path, "r", encoding="utf-8") as descriptor: + reader = csv.reader(descriptor, delimiter="\t") + for line in reader: + name = line[0] + status = line[1] + time = None + if len(line) >= 3 and line[2]: + # The value can be emtpy, but when it's not, + # it's the time spent on the test + try: + time = float(line[2]) + except ValueError: + pass + + result = TestResult(name, status, time) + if len(line) == 4 and line[3]: + # The value can be emtpy, but when it's not, + # the 4th value is a pythonic list, e.g. ['file1', 'file2'] + if with_raw_logs: + result.set_raw_logs(line[3]) + else: + result.set_log_files(line[3]) + + results.append(result) + + return results + + class ReportColorTheme: class ReportColor: yellow = "#FFB400" @@ -178,6 +243,9 @@ class ReportColorTheme: bugfixcheck = (ReportColor.yellow, ReportColor.blue, ReportColor.blue) +ColorTheme = Tuple[str, str, str] + + def _format_header(header, branch_name, branch_url=None): result = " ".join([w.capitalize() for w in header.split(" ")]) result = result.replace("Clickhouse", "ClickHouse") @@ -192,7 +260,7 @@ def _format_header(header, branch_name, branch_url=None): return result -def _get_status_style(status, colortheme=None): +def _get_status_style(status: str, colortheme: Optional[ColorTheme] = None) -> str: ok_statuses = ("OK", "success", "PASSED") fail_statuses = ("FAIL", "failure", "error", "FAILED", "Timeout") @@ -230,80 +298,80 @@ def _get_html_url(url): def create_test_html_report( - header, - test_result, - raw_log_url, - task_url, - job_url, - branch_url, - branch_name, - commit_url, - additional_urls=None, - with_raw_logs=False, - statuscolors=None, -): + header: str, + test_results: TestResults, + raw_log_url: str, + task_url: str, + job_url: str, + branch_url: str, + branch_name: str, + commit_url: str, + additional_urls: Optional[List[str]] = None, + statuscolors: Optional[ColorTheme] = None, +) -> str: if additional_urls is None: additional_urls = [] - if test_result: + if test_results: rows_part = "" num_fails = 0 has_test_time = False - has_test_logs = False + has_log_urls = False - if with_raw_logs: - # Display entires with logs at the top (they correspond to failed tests) - test_result.sort(key=lambda result: len(result) <= 3) + # Display entires with logs at the top (they correspond to failed tests) + test_results.sort( + key=lambda result: result.raw_logs is not None + or result.log_files is not None + ) - for result in test_result: - test_name = result[0] - test_status = result[1] - - test_logs = None - test_time = None - if len(result) > 2: - test_time = result[2] - has_test_time = True - - if len(result) > 3: - test_logs = result[3] - has_test_logs = True + for test_result in test_results: + colspan = 0 + if test_result.log_files is not None: + has_log_urls = True row = "" - is_fail = test_status in ("FAIL", "FLAKY") - if is_fail and with_raw_logs and test_logs is not None: + is_fail = test_result.status in ("FAIL", "FLAKY") + if is_fail and test_result.raw_logs is not None: row = '' - row += "" + test_name + "" - style = _get_status_style(test_status, colortheme=statuscolors) + row += "" + test_result.name + "" + colspan += 1 + style = _get_status_style(test_result.status, colortheme=statuscolors) # Allow to quickly scroll to the first failure. - is_fail_id = "" + fail_id = "" if is_fail: num_fails = num_fails + 1 - is_fail_id = 'id="fail' + str(num_fails) + '" ' + fail_id = f'id="fail{num_fails}" ' - row += f'{test_status}' + row += f'{test_result.status}' + colspan += 1 - if test_time is not None: - row += "" + test_time + "" + if test_result.time is not None: + has_test_time = True + row += f"{test_result.time}" + colspan += 1 - if test_logs is not None and not with_raw_logs: - test_logs_html = "
".join([_get_html_url(url) for url in test_logs]) + if test_result.log_urls is not None: + test_logs_html = "
".join( + [_get_html_url(url) for url in test_result.log_urls] + ) row += "" + test_logs_html + "" + colspan += 1 row += "" rows_part += row - if test_logs is not None and with_raw_logs: - row = '' - # TODO: compute colspan too - row += '
' + test_logs + "
" - row += "" + if test_result.raw_logs is not None: + row = ( + '' + f'
{test_result.raw_logs}
' + "" + ) rows_part += row headers = BASE_HEADERS if has_test_time: headers.append("Test time, sec.") - if has_test_logs and not with_raw_logs: + if has_log_urls: headers.append("Logs") headers_html = "".join(["" + h + "" for h in headers]) @@ -319,7 +387,7 @@ def create_test_html_report( if "?" in raw_log_name: raw_log_name = raw_log_name.split("?")[0] - result = HTML_BASE_TEST_TEMPLATE.format( + html = HTML_BASE_TEST_TEMPLATE.format( title=_format_header(header, branch_name), header=_format_header(header, branch_name, branch_url), raw_log_name=raw_log_name, @@ -331,7 +399,7 @@ def create_test_html_report( commit_url=commit_url, additional_urls=additional_html_urls, ) - return result + return html HTML_BASE_BUILD_TEMPLATE = """ diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index ce6d89a7267..66a61ae9991 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -4,27 +4,27 @@ import logging import subprocess import os import sys -from typing import List, Tuple +from typing import List from github import Github +from build_download_helper import get_build_name_for_check, read_build_urls +from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version from env_helper import ( GITHUB_REPOSITORY, GITHUB_RUN_URL, REPORTS_PATH, - REPO_COPY, TEMP_PATH, ) -from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import PRInfo -from build_download_helper import get_build_name_for_check, read_build_urls -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status -from clickhouse_helper import ClickHouseHelper, prepare_tests_results_for_clickhouse -from upload_result_helper import upload_results -from stopwatch import Stopwatch +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/sqlancer-test" @@ -48,13 +48,12 @@ def get_commit(gh, commit_sha): return commit -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() temp_path = TEMP_PATH - repo_path = REPO_COPY reports_path = REPORTS_PATH check_name = sys.argv[1] @@ -108,11 +107,6 @@ if __name__ == "__main__": subprocess.check_call(f"sudo chown -R ubuntu:ubuntu {temp_path}", shell=True) - check_name_lower = ( - check_name.lower().replace("(", "").replace(")", "").replace(" ", "") - ) - s3_prefix = f"{pr_info.number}/{pr_info.sha}/{check_name_lower}/" - tests = [ "TLPGroupBy", "TLPHaving", @@ -138,7 +132,7 @@ if __name__ == "__main__": report_url = GITHUB_RUN_URL status = "success" - test_results = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults # Try to get status message saved by the SQLancer try: # with open( @@ -146,13 +140,13 @@ if __name__ == "__main__": # ) as status_f: # status = status_f.readline().rstrip("\n") if os.path.exists(os.path.join(workspace_path, "server_crashed.log")): - test_results.append(("Server crashed", "FAIL")) + test_results.append(TestResult("Server crashed", "FAIL")) with open( os.path.join(workspace_path, "summary.tsv"), "r", encoding="utf-8" ) as summary_f: for line in summary_f: l = line.rstrip("\n").split("\t") - test_results.append((l[0], l[1])) + test_results.append(TestResult(l[0], l[1])) with open( os.path.join(workspace_path, "description.txt"), "r", encoding="utf-8" @@ -169,7 +163,6 @@ if __name__ == "__main__": test_results, paths, check_name, - False, ) post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) @@ -192,3 +185,7 @@ if __name__ == "__main__": print(f"::notice Result: '{status}', '{description}', '{report_url}'") post_commit_status(gh, pr_info.sha, check_name, description, status, report_url) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/stress_check.py b/tests/ci/stress_check.py index 37277538867..4116dbc52ce 100644 --- a/tests/ci/stress_check.py +++ b/tests/ci/stress_check.py @@ -5,26 +5,28 @@ import logging import subprocess import os import sys +from pathlib import Path from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_all_deb_packages -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results def get_run_command( @@ -48,8 +50,8 @@ def get_run_command( def process_results( result_folder: str, server_log_path: str, run_log_path: str -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible for content @@ -91,16 +93,15 @@ def process_results( return "error", "Invalid check_status.tsv", test_results, additional_files state, description = status[0][0], status[0][1] - results_path = os.path.join(result_folder, "test_results.tsv") - with open(results_path, "r", encoding="utf-8") as results_file: - test_results = list(csv.reader(results_file, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path, False) if len(test_results) == 0: raise Exception("Empty results") return state, description, test_results, additional_files -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() @@ -185,5 +186,9 @@ if __name__ == "__main__": ) ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/style_check.py b/tests/ci/style_check.py index 78c98813a72..9350785b33b 100644 --- a/tests/ci/style_check.py +++ b/tests/ci/style_check.py @@ -6,7 +6,7 @@ import logging import os import subprocess import sys - +from pathlib import Path from typing import List, Tuple @@ -22,6 +22,7 @@ from get_robot_token import get_best_robot_token from github_helper import GitHub from git_helper import git_runner from pr_info import PRInfo +from report import TestResults, read_test_results from rerun_helper import RerunHelper from s3_helper import S3Helper from ssh import SSHKey @@ -40,8 +41,8 @@ GIT_PREFIX = ( # All commits to remote are done as robot-clickhouse def process_result( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: - test_results = [] # type: List[Tuple[str, str]] +) -> Tuple[str, str, TestResults, List[str]]: + test_results = [] # type: TestResults additional_files = [] # Just upload all files from result_folder. # If task provides processed results, then it's responsible @@ -57,7 +58,7 @@ def process_result( status = [] status_path = os.path.join(result_folder, "check_status.tsv") if os.path.exists(status_path): - logging.info("Found test_results.tsv") + logging.info("Found check_status.tsv") with open(status_path, "r", encoding="utf-8") as status_file: status = list(csv.reader(status_file, delimiter="\t")) if len(status) != 1 or len(status[0]) != 2: @@ -66,9 +67,8 @@ def process_result( state, description = status[0][0], status[0][1] try: - results_path = os.path.join(result_folder, "test_results.tsv") - with open(results_path, "r", encoding="utf-8") as fd: - test_results = list(csv.reader(fd, delimiter="\t")) # type: ignore + results_path = Path(result_folder) / "test_results.tsv" + test_results = read_test_results(results_path) if len(test_results) == 0: raise Exception("Empty results") @@ -134,7 +134,7 @@ def commit_push_staged(pr_info: PRInfo) -> None: git_runner(push_cmd) -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) logging.getLogger("git_helper").setLevel(logging.DEBUG) args = parse_args() @@ -205,3 +205,7 @@ if __name__ == "__main__": if state in ["error", "failure"]: sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/unit_tests_check.py b/tests/ci/unit_tests_check.py index 7c4fa0e9fe4..915a77f3d48 100644 --- a/tests/ci/unit_tests_check.py +++ b/tests/ci/unit_tests_check.py @@ -9,22 +9,23 @@ from typing import List, Tuple from github import Github -from env_helper import TEMP_PATH, REPO_COPY, REPORTS_PATH -from s3_helper import S3Helper -from get_robot_token import get_best_robot_token -from pr_info import PRInfo from build_download_helper import download_unit_tests -from upload_result_helper import upload_results -from docker_pull_helper import get_image_with_version -from commit_status_helper import post_commit_status, update_mergeable_check from clickhouse_helper import ( ClickHouseHelper, mark_flaky_tests, prepare_tests_results_for_clickhouse, ) -from stopwatch import Stopwatch +from commit_status_helper import post_commit_status, update_mergeable_check +from docker_pull_helper import get_image_with_version +from env_helper import TEMP_PATH, REPORTS_PATH +from get_robot_token import get_best_robot_token +from pr_info import PRInfo +from report import TestResults, TestResult from rerun_helper import RerunHelper +from s3_helper import S3Helper +from stopwatch import Stopwatch from tee_popen import TeePopen +from upload_result_helper import upload_results IMAGE_NAME = "clickhouse/unit-test" @@ -40,20 +41,20 @@ def get_test_name(line): def process_results( result_folder: str, -) -> Tuple[str, str, List[Tuple[str, str]], List[str]]: +) -> Tuple[str, str, TestResults, List[str]]: OK_SIGN = "OK ]" FAILED_SIGN = "FAILED ]" SEGFAULT = "Segmentation fault" SIGNAL = "received signal SIG" PASSED = "PASSED" - summary = [] # type: List[Tuple[str, str]] + test_results = [] # type: TestResults total_counter = 0 failed_counter = 0 result_log_path = f"{result_folder}/test_result.txt" if not os.path.exists(result_log_path): logging.info("No output log on path %s", result_log_path) - return "error", "No output log", summary, [] + return "error", "No output log", test_results, [] status = "success" description = "" @@ -64,13 +65,13 @@ def process_results( logging.info("Found ok line: '%s'", line) test_name = get_test_name(line.strip()) logging.info("Test name: '%s'", test_name) - summary.append((test_name, "OK")) + test_results.append(TestResult(test_name, "OK")) total_counter += 1 elif FAILED_SIGN in line and "listed below" not in line and "ms)" in line: logging.info("Found fail line: '%s'", line) test_name = get_test_name(line.strip()) logging.info("Test name: '%s'", test_name) - summary.append((test_name, "FAIL")) + test_results.append(TestResult(test_name, "FAIL")) total_counter += 1 failed_counter += 1 elif SEGFAULT in line: @@ -99,16 +100,15 @@ def process_results( f"fail: {failed_counter}, passed: {total_counter - failed_counter}" ) - return status, description, summary, [result_log_path] + return status, description, test_results, [result_log_path] -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) stopwatch = Stopwatch() temp_path = TEMP_PATH - repo_path = REPO_COPY reports_path = REPORTS_PATH check_name = sys.argv[1] @@ -182,5 +182,9 @@ if __name__ == "__main__": ch_helper.insert_events_into(db="default", table="checks", events=prepared_events) - if state == "error": + if state == "failure": sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index 9fcd3733acb..d6476865bba 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,6 +1,6 @@ +from typing import List import os import logging -import ast from env_helper import ( GITHUB_JOB_URL, @@ -8,34 +8,35 @@ from env_helper import ( GITHUB_RUN_URL, GITHUB_SERVER_URL, ) -from report import ReportColorTheme, create_test_html_report +from report import ReportColorTheme, TestResults, create_test_html_report +from s3_helper import S3Helper def process_logs( - s3_client, additional_logs, s3_path_prefix, test_results, with_raw_logs -): + s3_client: S3Helper, + additional_logs: List[str], + s3_path_prefix: str, + test_results: TestResults, +) -> List[str]: logging.info("Upload files to s3 %s", additional_logs) processed_logs = {} # type: ignore # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: - if len(test_result) <= 3 or with_raw_logs: + if test_result.log_files is None: continue # Convert from string repr of list to list. - test_log_paths = ast.literal_eval(test_result[3]) - test_log_urls = [] - for log_path in test_log_paths: - if log_path in processed_logs: - test_log_urls.append(processed_logs[log_path]) - elif log_path: + test_result.log_urls = [] + for path in test_result.log_files: + if path.as_posix() in processed_logs: + test_result.log_urls.append(processed_logs[path]) + elif path: url = s3_client.upload_test_report_to_s3( - log_path, s3_path_prefix + "/" + os.path.basename(log_path) + path.as_posix(), s3_path_prefix + "/" + path.name ) - test_log_urls.append(url) - processed_logs[log_path] = url - - test_result[3] = test_log_urls + test_result.log_urls.append(url) + processed_logs[path] = url additional_urls = [] for log_path in additional_logs: @@ -50,20 +51,18 @@ def process_logs( def upload_results( - s3_client, - pr_number, - commit_sha, - test_results, - additional_files, - check_name, - with_raw_logs=True, - statuscolors=None, -): + s3_client: S3Helper, + pr_number: int, + commit_sha: str, + test_results: TestResults, + additional_files: List[str], + check_name: str, +) -> str: s3_path_prefix = f"{pr_number}/{commit_sha}/" + check_name.lower().replace( " ", "_" ).replace("(", "_").replace(")", "_").replace(",", "_") additional_urls = process_logs( - s3_client, additional_files, s3_path_prefix, test_results, with_raw_logs + s3_client, additional_files, s3_path_prefix, test_results ) branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commits/master" @@ -74,8 +73,7 @@ def upload_results( commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{commit_sha}" if additional_urls: - raw_log_url = additional_urls[0] - additional_urls.pop(0) + raw_log_url = additional_urls.pop(0) else: raw_log_url = GITHUB_JOB_URL() @@ -93,7 +91,6 @@ def upload_results( branch_name, commit_url, additional_urls, - with_raw_logs, statuscolors=statuscolors, ) with open("report.html", "w", encoding="utf-8") as f: From 84861c2b7c590b6689972e403e92162cced9c91a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Tue, 3 Jan 2023 23:47:46 +0100 Subject: [PATCH 116/262] Add TODO to style-check image --- docker/test/style/process_style_check_result.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docker/test/style/process_style_check_result.py b/docker/test/style/process_style_check_result.py index 2edf6ba3591..bc06df1af31 100755 --- a/docker/test/style/process_style_check_result.py +++ b/docker/test/style/process_style_check_result.py @@ -6,6 +6,8 @@ import argparse import csv +# TODO: add typing and log files to the fourth column, think about launching +# everything from the python and not bash def process_result(result_folder): status = "success" description = "" From 36e402b10db0be81ad064ddc2ea0fb62c3fa870f Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 4 Jan 2023 12:18:53 +0100 Subject: [PATCH 117/262] Add typing to create_build_html_report --- tests/ci/build_report_check.py | 35 ++++++++-------------------------- tests/ci/report.py | 30 ++++++++++++++++++++--------- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/tests/ci/build_report_check.py b/tests/ci/build_report_check.py index 1de401cde9c..0bdfb7c9ac0 100644 --- a/tests/ci/build_report_check.py +++ b/tests/ci/build_report_check.py @@ -10,13 +10,14 @@ from typing import Dict, List, Tuple from github import Github from env_helper import ( + GITHUB_JOB_URL, GITHUB_REPOSITORY, GITHUB_RUN_URL, GITHUB_SERVER_URL, REPORTS_PATH, TEMP_PATH, ) -from report import create_build_html_report +from report import create_build_html_report, BuildResult, BuildResults from s3_helper import S3Helper from get_robot_token import get_best_robot_token from pr_info import NeedsDataType, PRInfo @@ -31,24 +32,6 @@ from rerun_helper import RerunHelper NEEDS_DATA_PATH = os.getenv("NEEDS_DATA_PATH", "") -class BuildResult: - def __init__( - self, - compiler, - build_type, - sanitizer, - status, - elapsed_seconds, - with_coverage, - ): - self.compiler = compiler - self.build_type = build_type - self.sanitizer = sanitizer - self.status = status - self.elapsed_seconds = elapsed_seconds - self.with_coverage = with_coverage - - def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: groups = { "apk": [], @@ -81,7 +64,7 @@ def group_by_artifacts(build_urls: List[str]) -> Dict[str, List[str]]: def get_failed_report( job_name: str, -) -> Tuple[List[BuildResult], List[List[str]], List[str]]: +) -> Tuple[BuildResults, List[List[str]], List[str]]: message = f"{job_name} failed" build_result = BuildResult( compiler="unknown", @@ -89,14 +72,13 @@ def get_failed_report( sanitizer="unknown", status=message, elapsed_seconds=0, - with_coverage=False, ) return [build_result], [[""]], [GITHUB_RUN_URL] def process_report( build_report: dict, -) -> Tuple[List[BuildResult], List[List[str]], List[str]]: +) -> Tuple[BuildResults, List[List[str]], List[str]]: build_config = build_report["build_config"] build_result = BuildResult( compiler=build_config["compiler"], @@ -104,7 +86,6 @@ def process_report( sanitizer=build_config["sanitizer"], status="success" if build_report["status"] else "failure", elapsed_seconds=build_report["elapsed_seconds"], - with_coverage=False, ) build_results = [] build_urls = [] @@ -207,9 +188,9 @@ def main(): logging.info("Got exactly %s builds", len(builds_report_map)) # Group build artifacts by groups - build_results = [] # type: List[BuildResult] - build_artifacts = [] # - build_logs = [] + build_results = [] # type: BuildResults + build_artifacts = [] # type: List[List[str]] + build_logs = [] # type: List[str] for build_report in build_reports: _build_results, build_artifacts_url, build_logs_url = process_report( @@ -244,7 +225,7 @@ def main(): branch_name = f"PR #{pr_info.number}" branch_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/pull/{pr_info.number}" commit_url = f"{GITHUB_SERVER_URL}/{GITHUB_REPOSITORY}/commit/{pr_info.sha}" - task_url = GITHUB_RUN_URL + task_url = GITHUB_JOB_URL() report = create_build_html_report( build_check_name, build_results, diff --git a/tests/ci/report.py b/tests/ci/report.py index 2409d1ba6d8..d7f6e1e71d0 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -232,6 +232,18 @@ def read_test_results(results_path: Path, with_raw_logs: bool = True) -> TestRes return results +@dataclass +class BuildResult: + compiler: str + build_type: str + sanitizer: str + status: str + elapsed_seconds: int + + +BuildResults = List[BuildResult] + + class ReportColorTheme: class ReportColor: yellow = "#FFB400" @@ -447,15 +459,15 @@ LINK_TEMPLATE = '{text}' def create_build_html_report( - header, - build_results, - build_logs_urls, - artifact_urls_list, - task_url, - branch_url, - branch_name, - commit_url, -): + header: str, + build_results: BuildResults, + build_logs_urls: List[str], + artifact_urls_list: List[List[str]], + task_url: str, + branch_url: str, + branch_name: str, + commit_url: str, +) -> str: rows = "" for (build_result, build_log_url, artifact_urls) in zip( build_results, build_logs_urls, artifact_urls_list From fc4d6e41cff684e1960e5a24082575be326aed51 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 4 Jan 2023 16:52:32 +0100 Subject: [PATCH 118/262] Fix possible issue in process_logs by strict typing --- tests/ci/report.py | 2 +- tests/ci/upload_result_helper.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/ci/report.py b/tests/ci/report.py index d7f6e1e71d0..95f60794448 100644 --- a/tests/ci/report.py +++ b/tests/ci/report.py @@ -187,7 +187,7 @@ class TestResult: self.raw_logs = raw_logs def set_log_files(self, log_files_literal: str) -> None: - self.log_files = [] + self.log_files = [] # type: Optional[List[Path]] log_paths = literal_eval(log_files_literal) if not isinstance(log_paths, list): raise ValueError( diff --git a/tests/ci/upload_result_helper.py b/tests/ci/upload_result_helper.py index d6476865bba..b988e240b0e 100644 --- a/tests/ci/upload_result_helper.py +++ b/tests/ci/upload_result_helper.py @@ -1,4 +1,5 @@ -from typing import List +from pathlib import Path +from typing import Dict, List import os import logging @@ -20,7 +21,7 @@ def process_logs( ) -> List[str]: logging.info("Upload files to s3 %s", additional_logs) - processed_logs = {} # type: ignore + processed_logs = {} # type: Dict[Path, str] # Firstly convert paths of logs from test_results to urls to s3. for test_result in test_results: if test_result.log_files is None: @@ -29,7 +30,7 @@ def process_logs( # Convert from string repr of list to list. test_result.log_urls = [] for path in test_result.log_files: - if path.as_posix() in processed_logs: + if path in processed_logs: test_result.log_urls.append(processed_logs[path]) elif path: url = s3_client.upload_test_report_to_s3( From 425f7459d3f740cbb22d04cf6769451e33e7d420 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 5 Jan 2023 13:54:31 +0100 Subject: [PATCH 119/262] Improve report for docker_server.py --- tests/ci/docker_server.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 544ab4e0a90..031b7bb61ab 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -251,7 +251,9 @@ def build_and_push_image( # `docker buildx build --load` does not support multiple images currently # images must be built separately and merged together with `docker manifest` digests = [] + multiplatform_sw = Stopwatch() for arch in BUCKETS: + single_sw = Stopwatch() arch_tag = f"{tag}-{arch}" metadata_path = p.join(TEMP_PATH, arch_tag) dockerfile = p.join(image.full_path, f"Dockerfile.{os}") @@ -271,9 +273,15 @@ def build_and_push_image( cmd = " ".join(cmd_args) logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd) if retry_popen(cmd) != 0: - result.append(TestResult(f"{image.repo}:{tag}-{arch}", "FAIL")) + result.append( + TestResult( + f"{image.repo}:{tag}-{arch}", "FAIL", single_sw.duration_seconds + ) + ) return result - result.append(TestResult(f"{image.repo}:{tag}-{arch}", "OK")) + result.append( + TestResult(f"{image.repo}:{tag}-{arch}", "OK", single_sw.duration_seconds) + ) with open(metadata_path, "rb") as m: metadata = json.load(m) digests.append(metadata["containerimage.digest"]) @@ -284,8 +292,15 @@ def build_and_push_image( ) logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd) if retry_popen(cmd) != 0: - result.append(TestResult(f"{image.repo}:{tag}", "FAIL")) + result.append( + TestResult( + f"{image.repo}:{tag}", "FAIL", multiplatform_sw.duration_seconds + ) + ) return result + result.append( + TestResult(f"{image.repo}:{tag}", "OK", multiplatform_sw.duration_seconds) + ) else: logging.info( "Merging is available only on push, separate %s images are created", From c38bb5ec00b8d3d39209eb921407235a96b080db Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 5 Jan 2023 14:16:07 +0100 Subject: [PATCH 120/262] Add Path as an option for TeePopen log_file --- tests/ci/tee_popen.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/ci/tee_popen.py b/tests/ci/tee_popen.py index b74069c16ab..f80678fe8ba 100644 --- a/tests/ci/tee_popen.py +++ b/tests/ci/tee_popen.py @@ -1,10 +1,11 @@ #!/usr/bin/env python3 from io import TextIOWrapper +from pathlib import Path from subprocess import Popen, PIPE, STDOUT from threading import Thread from time import sleep -from typing import Optional +from typing import Optional, Union import logging import os import sys @@ -18,7 +19,7 @@ class TeePopen: def __init__( self, command: str, - log_file: str, + log_file: Union[str, Path], env: Optional[dict] = None, timeout: Optional[int] = None, ): @@ -63,7 +64,7 @@ class TeePopen: self.wait() self.log_file.close() - def wait(self): + def wait(self) -> int: if self.process.stdout is not None: for line in self.process.stdout: sys.stdout.write(line) From 64bbdee8248e87c436f5af0447e608f9fb68e48c Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 9 Jan 2023 19:36:51 +0800 Subject: [PATCH 121/262] fix stule --- src/Functions/toDayOfWeek.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/Functions/toDayOfWeek.cpp b/src/Functions/toDayOfWeek.cpp index 09271cbe55d..06343714b9d 100644 --- a/src/Functions/toDayOfWeek.cpp +++ b/src/Functions/toDayOfWeek.cpp @@ -3,8 +3,6 @@ #include #include - - namespace DB { From 7764fd9ac9f2388cc94d382909a49f76e98fbf83 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 9 Jan 2023 12:47:55 +0100 Subject: [PATCH 122/262] Fix possible cannot-read-all-data --- src/Storages/FileLog/StorageFileLog.cpp | 130 ++++++++++++------------ src/Storages/FileLog/StorageFileLog.h | 10 +- 2 files changed, 74 insertions(+), 66 deletions(-) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index 94d5f7441ec..b1b54a1700a 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -224,76 +224,48 @@ void StorageFileLog::loadFiles() void StorageFileLog::serialize() const { for (const auto & [inode, meta] : file_infos.meta_by_inode) - { - auto full_name = getFullMetaPath(meta.file_name); - if (!disk->exists(full_name)) - { - disk->createFile(full_name); - } - else - { - checkOffsetIsValid(full_name, meta.last_writen_position); - } - auto out = disk->writeFile(full_name); - writeIntText(inode, *out); - writeChar('\n', *out); - writeIntText(meta.last_writen_position, *out); - } + serialize(inode, meta); } void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { - auto full_name = getFullMetaPath(file_meta.file_name); - if (!disk->exists(full_name)) + auto full_path = getFullMetaPath(file_meta.file_name); + if (disk->exists(full_path)) { - disk->createFile(full_name); + checkOffsetIsValid(file_meta.file_name, file_meta.last_writen_position); } else { - checkOffsetIsValid(full_name, file_meta.last_writen_position); + disk->createFile(full_path); + } + + try + { + auto out = disk->writeFile(full_path); + writeIntText(inode, *out); + writeChar('\n', *out); + writeIntText(file_meta.last_writen_position, *out); + } + catch (...) + { + disk->removeFile(full_path); + throw; } - auto out = disk->writeFile(full_name); - writeIntText(inode, *out); - writeChar('\n', *out); - writeIntText(file_meta.last_writen_position, *out); } void StorageFileLog::deserialize() { if (!disk->exists(metadata_base_path)) return; + /// In case of single file (not a watched directory), /// iterated directory always has one file inside. for (const auto dir_iter = disk->iterateDirectory(metadata_base_path); dir_iter->isValid(); dir_iter->next()) { - auto full_name = getFullMetaPath(dir_iter->name()); - if (!disk->isFile(full_name)) - { - throw Exception( - ErrorCodes::BAD_FILE_TYPE, - "The file {} under {} is not a regular file when deserializing meta files", - dir_iter->name(), - metadata_base_path); - } - - auto in = disk->readFile(full_name); - FileMeta meta; - UInt64 inode, last_written_pos; - - if (!tryReadIntText(inode, *in)) - { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path()); - } - assertChar('\n', *in); - if (!tryReadIntText(last_written_pos, *in)) - { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", dir_iter->path()); - } - - meta.file_name = dir_iter->name(); - meta.last_writen_position = last_written_pos; - - file_infos.meta_by_inode.emplace(inode, meta); + auto [metadata, inode] = readMetadata(dir_iter->name()); + if (!metadata) + continue; + file_infos.meta_by_inode.emplace(inode, metadata); } } @@ -488,23 +460,51 @@ void StorageFileLog::storeMetas(size_t start, size_t end) } } -void StorageFileLog::checkOffsetIsValid(const String & full_name, UInt64 offset) const +void StorageFileLog::checkOffsetIsValid(const String & filename, UInt64 offset) const { - auto in = disk->readFile(full_name); - UInt64 _, last_written_pos; - - if (!tryReadIntText(_, *in)) + auto [metadata, _] = readMetadata(filename); + if (metadata.last_writen_position > offset) { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name); - } - assertChar('\n', *in); - if (!tryReadIntText(last_written_pos, *in)) - { - throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed", full_name); - } - if (last_written_pos > offset) throw Exception( - ErrorCodes::LOGICAL_ERROR, "Last stored last_written_pos in meta file {} is bigger than current last_written_pos", full_name); + ErrorCodes::LOGICAL_ERROR, + "Last stored last_written_position in meta file {} is bigger than current last_written_pos ({} > {})", + filename, metadata.last_writen_position, offset); + } +} + +StorageFileLog::ReadMetadataResult StorageFileLog::readMetadata(const String & filename) const +{ + auto full_path = getFullMetaPath(filename); + if (!disk->isFile(full_path)) + { + throw Exception( + ErrorCodes::BAD_FILE_TYPE, + "The file {} under {} is not a regular file", + filename, metadata_base_path); + } + + auto in = disk->readFile(full_path); + FileMeta metadata; + UInt64 inode, last_written_pos; + + if (in->eof()) /// File is empty. + { + disk->removeFile(full_path); + return {}; + } + + if (!tryReadIntText(inode, *in)) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (1)", full_path); + + if (!checkChar('\n', *in)) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (2)", full_path); + + if (!tryReadIntText(last_written_pos, *in)) + throw Exception(ErrorCodes::CANNOT_READ_ALL_DATA, "Read meta file {} failed (3)", full_path); + + metadata.file_name = filename; + metadata.last_writen_position = last_written_pos; + return { metadata, inode }; } size_t StorageFileLog::getMaxBlockSize() const diff --git a/src/Storages/FileLog/StorageFileLog.h b/src/Storages/FileLog/StorageFileLog.h index 9737c31acb6..c0c5ac904b5 100644 --- a/src/Storages/FileLog/StorageFileLog.h +++ b/src/Storages/FileLog/StorageFileLog.h @@ -82,6 +82,7 @@ public: String file_name; UInt64 last_writen_position = 0; UInt64 last_open_end = 0; + bool operator!() const { return file_name.empty(); } }; using InodeToFileMeta = std::unordered_map; @@ -202,7 +203,14 @@ private: void serialize(UInt64 inode, const FileMeta & file_meta) const; void deserialize(); - void checkOffsetIsValid(const String & full_name, UInt64 offset) const; + void checkOffsetIsValid(const String & filename, UInt64 offset) const; + + struct ReadMetadataResult + { + FileMeta metadata; + UInt64 inode = 0; + }; + ReadMetadataResult readMetadata(const String & filename) const; }; } From 1e4fe038f562029fc24a0e7a33e5d428ea0474f9 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Thu, 5 Jan 2023 14:16:31 +0100 Subject: [PATCH 123/262] Add logs to docker_server reports --- tests/ci/docker_server.py | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tests/ci/docker_server.py b/tests/ci/docker_server.py index 031b7bb61ab..fbe934367b4 100644 --- a/tests/ci/docker_server.py +++ b/tests/ci/docker_server.py @@ -7,6 +7,7 @@ import logging import subprocess import sys import time +from pathlib import Path from os import path as p, makedirs from typing import List @@ -23,6 +24,7 @@ from pr_info import PRInfo from report import TestResults, TestResult from s3_helper import S3Helper from stopwatch import Stopwatch +from tee_popen import TeePopen from upload_result_helper import upload_results from version_helper import ( ClickHouseVersion, @@ -117,7 +119,7 @@ def parse_args() -> argparse.Namespace: return parser.parse_args() -def retry_popen(cmd: str) -> int: +def retry_popen(cmd: str, log_file: Path) -> int: max_retries = 5 for retry in range(max_retries): # From time to time docker build may failed. Curl issues, or even push @@ -130,18 +132,14 @@ def retry_popen(cmd: str) -> int: cmd, ) time.sleep(progressive_sleep) - with subprocess.Popen( + with TeePopen( cmd, - shell=True, - stderr=subprocess.STDOUT, - stdout=subprocess.PIPE, - universal_newlines=True, + log_file=log_file, ) as process: - for line in process.stdout: # type: ignore - print(line, end="") retcode = process.wait() if retcode == 0: return 0 + return retcode @@ -272,15 +270,24 @@ def build_and_push_image( ) cmd = " ".join(cmd_args) logging.info("Building image %s:%s for arch %s: %s", image.repo, tag, arch, cmd) - if retry_popen(cmd) != 0: + log_file = Path(TEMP_PATH) / f"{image.repo.replace('/', '__')}:{tag}-{arch}.log" + if retry_popen(cmd, log_file) != 0: result.append( TestResult( - f"{image.repo}:{tag}-{arch}", "FAIL", single_sw.duration_seconds + f"{image.repo}:{tag}-{arch}", + "FAIL", + single_sw.duration_seconds, + [log_file], ) ) return result result.append( - TestResult(f"{image.repo}:{tag}-{arch}", "OK", single_sw.duration_seconds) + TestResult( + f"{image.repo}:{tag}-{arch}", + "OK", + single_sw.duration_seconds, + [log_file], + ) ) with open(metadata_path, "rb") as m: metadata = json.load(m) @@ -291,7 +298,7 @@ def build_and_push_image( f"--tag {image.repo}:{tag} {' '.join(digests)}" ) logging.info("Pushing merged %s:%s image: %s", image.repo, tag, cmd) - if retry_popen(cmd) != 0: + if retry_popen(cmd, Path("/dev/null")) != 0: result.append( TestResult( f"{image.repo}:{tag}", "FAIL", multiplatform_sw.duration_seconds From 4bc2c614068055df675262eeb1fc9f2f56b8c635 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 9 Jan 2023 20:37:16 +0800 Subject: [PATCH 124/262] change as request --- src/Common/DateLUTImpl.h | 23 ++++++++++++++--------- src/Functions/DateTimeTransforms.h | 4 ---- src/Functions/dateName.cpp | 2 +- src/Functions/formatDateTime.cpp | 8 ++++---- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 6bf530008dc..1e7f11d3c9e 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -39,6 +39,15 @@ enum class WeekModeFlag : UInt8 }; using YearWeek = std::pair; +/// Modes for toDayOfWeek() function. +enum class WeekDayMode +{ + WeekStartsMonday1 = 0, + WeekStartsMonday0 = 1, + WeekStartsSunday0 = 2, + WeekStartsSunday1 = 3 +}; + /** Lookup table to conversion of time to date, and to month / year / day of week / day of month and so on. * First time was implemented for OLAPServer, that needed to do billions of such transformations. */ @@ -625,15 +634,11 @@ public: template inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const { - /// 0: Sun = 7, Mon = 1 - /// 1: Sun = 6, Mon = 0 - /// 2: Sun = 0, Mon = 1 - /// 3: Sun = 1, Mon = 2 - week_day_mode = check_week_day_mode(week_day_mode); + WeekDayMode mode = check_week_day_mode(week_day_mode); auto res = toDayOfWeek(v); - bool start_from_sunday = week_day_mode & (1 << 1); - bool zero_based = (week_day_mode == 1 || week_day_mode == 2); + bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1); + bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0); if (start_from_sunday) res = res % 7 + 1; if (zero_based) @@ -864,9 +869,9 @@ public: } /// Check and change mode to effective. - inline UInt8 check_week_day_mode(UInt8 mode) const /// NOLINT + inline WeekDayMode check_week_day_mode(UInt8 mode) const /// NOLINT { - return mode & 3; + return static_cast(mode & 3); } diff --git a/src/Functions/DateTimeTransforms.h b/src/Functions/DateTimeTransforms.h index 56e4a0e2668..56a7a960ac9 100644 --- a/src/Functions/DateTimeTransforms.h +++ b/src/Functions/DateTimeTransforms.h @@ -786,10 +786,6 @@ struct ToDayOfWeekImpl { static constexpr auto name = "toDayOfWeek"; - static inline UInt8 execute(Int64 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); } - static inline UInt8 execute(UInt32 t, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t); } - static inline UInt8 execute(Int32 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(ExtendedDayNum(d)); } - static inline UInt8 execute(UInt16 d, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(DayNum(d)); } static inline UInt8 execute(Int64 t, UInt8 week_day_mode, const DateLUTImpl & time_zone) { return time_zone.toDayOfWeek(t, week_day_mode); diff --git a/src/Functions/dateName.cpp b/src/Functions/dateName.cpp index 36c0be49190..bfb190b9a08 100644 --- a/src/Functions/dateName.cpp +++ b/src/Functions/dateName.cpp @@ -276,7 +276,7 @@ private: { static inline void write(WriteBuffer & buffer, Time source, const DateLUTImpl & timezone) { - const auto day = ToDayOfWeekImpl::execute(source, timezone); + const auto day = ToDayOfWeekImpl::execute(source, 0, timezone); static constexpr std::string_view day_names[] = { "Monday", diff --git a/src/Functions/formatDateTime.cpp b/src/Functions/formatDateTime.cpp index e7c9a1b5103..c01f32f68ae 100644 --- a/src/Functions/formatDateTime.cpp +++ b/src/Functions/formatDateTime.cpp @@ -344,13 +344,13 @@ private: static size_t mysqlDayOfWeek(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - *dest = '0' + ToDayOfWeekImpl::execute(source, timezone); + *dest = '0' + ToDayOfWeekImpl::execute(source, 0, timezone); return 1; } static size_t mysqlDayOfWeek0To6(char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - auto day = ToDayOfWeekImpl::execute(source, timezone); + auto day = ToDayOfWeekImpl::execute(source, 0, timezone); *dest = '0' + (day == 7 ? 0 : day); return 1; } @@ -499,13 +499,13 @@ private: static size_t jodaDayOfWeek1Based(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - auto week_day = ToDayOfWeekImpl::execute(source, timezone); + auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone); return writeNumberWithPadding(dest, week_day, min_represent_digits); } static size_t jodaDayOfWeekText(size_t min_represent_digits, char * dest, Time source, UInt64, UInt32, const DateLUTImpl & timezone) { - auto week_day = ToDayOfWeekImpl::execute(source, timezone); + auto week_day = ToDayOfWeekImpl::execute(source, 0, timezone); if (week_day == 7) week_day = 0; From a8da7b4c20964a08eccb83b56afaa56df41146f2 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Mon, 9 Jan 2023 20:39:46 +0800 Subject: [PATCH 125/262] change as request --- src/Common/DateLUTImpl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Common/DateLUTImpl.h b/src/Common/DateLUTImpl.h index 1e7f11d3c9e..3d496e088bb 100644 --- a/src/Common/DateLUTImpl.h +++ b/src/Common/DateLUTImpl.h @@ -628,6 +628,7 @@ public: template inline Int16 toYear(DateOrTime v) const { return lut[toLUTIndex(v)].year; } + /// 1-based, starts on Monday template inline UInt8 toDayOfWeek(DateOrTime v) const { return lut[toLUTIndex(v)].day_of_week; } @@ -635,7 +636,7 @@ public: inline UInt8 toDayOfWeek(DateOrTime v, UInt8 week_day_mode) const { WeekDayMode mode = check_week_day_mode(week_day_mode); - auto res = toDayOfWeek(v); + UInt8 res = toDayOfWeek(v); bool start_from_sunday = (mode == WeekDayMode::WeekStartsSunday0 || mode == WeekDayMode::WeekStartsSunday1); bool zero_based = (mode == WeekDayMode::WeekStartsMonday0 || mode == WeekDayMode::WeekStartsSunday0); From 88c3c2946b172de296824ac5dd6cbbfb3ed8b380 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 9 Jan 2023 14:58:44 +0000 Subject: [PATCH 126/262] review fixes + split into files --- src/Common/CancelToken.cpp | 243 +++++++++++ src/Common/{Threading.h => CancelToken.h} | 135 ++---- src/Common/CancelableSharedMutex.cpp | 115 +++++ src/Common/CancelableSharedMutex.h | 62 +++ src/Common/ErrorCodes.cpp | 2 +- src/Common/SharedMutex.cpp | 80 ++++ src/Common/SharedMutex.h | 47 +++ src/Common/Threading.cpp | 484 ---------------------- src/Common/futex.h | 97 +++++ src/Common/tests/gtest_threading.cpp | 56 +-- 10 files changed, 710 insertions(+), 611 deletions(-) create mode 100644 src/Common/CancelToken.cpp rename src/Common/{Threading.h => CancelToken.h} (61%) create mode 100644 src/Common/CancelableSharedMutex.cpp create mode 100644 src/Common/CancelableSharedMutex.h create mode 100644 src/Common/SharedMutex.cpp create mode 100644 src/Common/SharedMutex.h delete mode 100644 src/Common/Threading.cpp create mode 100644 src/Common/futex.h diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp new file mode 100644 index 00000000000..87bcdc26bd4 --- /dev/null +++ b/src/Common/CancelToken.cpp @@ -0,0 +1,243 @@ +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int THREAD_WAS_CANCELED; +} +} + +#ifdef OS_LINUX /// Because of futex + +#include + +#include +#include +#include +#include + +namespace DB +{ + +namespace +{ + inline Int64 futexWait(void * address, UInt32 value) + { + return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); + } + + inline Int64 futexWake(void * address, int count) + { + return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); + } +} + +void CancelToken::Registry::insert(CancelToken * token) +{ + std::lock_guard lock(mutex); + threads[token->thread_id] = token; +} + +void CancelToken::Registry::remove(CancelToken * token) +{ + std::lock_guard lock(mutex); + threads.erase(token->thread_id); +} + +void CancelToken::Registry::signal(UInt64 tid) +{ + std::lock_guard lock(mutex); + if (auto it = threads.find(tid); it != threads.end()) + it->second->signalImpl(); +} + +void CancelToken::Registry::signal(UInt64 tid, int code, const String & message) +{ + std::lock_guard lock(mutex); + if (auto it = threads.find(tid); it != threads.end()) + it->second->signalImpl(code, message); +} + +const std::shared_ptr & CancelToken::Registry::instance() +{ + static std::shared_ptr registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry + return registry; +} + +CancelToken::CancelToken() + : state(disabled) + , thread_id(getThreadId()) + , registry(Registry::instance()) +{ + registry->insert(this); +} + +CancelToken::~CancelToken() +{ + registry->remove(this); +} + +void CancelToken::signal(UInt64 tid) +{ + Registry::instance()->signal(tid); +} + +void CancelToken::signal(UInt64 tid, int code, const String & message) +{ + Registry::instance()->signal(tid, code, message); +} + +bool CancelToken::wait(UInt32 * address, UInt32 value) +{ + chassert((reinterpret_cast(address) & canceled) == 0); // An `address` must be 2-byte aligned + if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread + return true; // Spin-wait unless signal is handled + + UInt64 s = state.load(); + while (true) + { + if (s & disabled) + { + // Start non-cancelable wait on futex. Spurious wake-up is possible. + futexWait(address, value); + return true; // Disabled - true is forced + } + if (s & canceled) + return false; // Has already been canceled + if (state.compare_exchange_strong(s, reinterpret_cast(address))) + break; // This futex has been "acquired" by this token + } + + // Start cancelable wait. Spurious wake-up is possible. + futexWait(address, value); + + // "Release" futex and check for cancelation + s = state.load(); + while (true) + { + chassert((s & disabled) != disabled); // `disable()` must not be called from another thread + if (s & canceled) + { + if (s == canceled) + break; // Signaled; futex "release" has been done by the signaling thread + else + { + s = state.load(); + continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish + } + } + if (state.compare_exchange_strong(s, 0)) + return true; // There was no cancelation; futex "released" + } + + // Reset signaled bit + reinterpret_cast *>(address)->fetch_and(~signaled); + return false; +} + +void CancelToken::raise() +{ + std::unique_lock lock(signal_mutex); + if (exception_code != 0) + throw DB::Exception( + std::exchange(exception_code, 0), + std::exchange(exception_message, {})); + else + throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled"); +} + +void CancelToken::notifyOne(UInt32 * address) +{ + futexWake(address, 1); +} + +void CancelToken::notifyAll(UInt32 * address) +{ + futexWake(address, INT_MAX); +} + +void CancelToken::signalImpl() +{ + signalImpl(0, {}); +} + +std::mutex CancelToken::signal_mutex; + +void CancelToken::signalImpl(int code, const String & message) +{ + // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls + std::unique_lock lock(signal_mutex); + + UInt64 s = state.load(); + while (true) + { + if (s & canceled) + return; // Already canceled - don't signal twice + if (state.compare_exchange_strong(s, s | canceled)) + break; // It is the canceling thread - should deliver signal if necessary + } + + exception_code = code; + exception_message = message; + + if ((s & disabled) == disabled) + return; // Cancelation is disabled - just signal token for later, but don't wake + std::atomic * address = reinterpret_cast *>(s & disabled); + if (address == nullptr) + return; // Thread is currently not waiting on futex - wake-up not required + + // Set signaled bit + UInt32 value = address->load(); + while (true) + { + if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter + value = address->load(); + else if (address->compare_exchange_strong(value, value | signaled)) + break; + } + + // Wake all threads waiting on `address`, one of them will be canceled and others will get spurious wake-ups + // Woken canceled thread will reset signaled bit + futexWake(address, INT_MAX); + + // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return. + // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed. + state.store(canceled); +} + +Cancelable::Cancelable() +{ + CancelToken::local().reset(); +} + +Cancelable::~Cancelable() +{ + CancelToken::local().disable(); +} + +NonCancelable::NonCancelable() +{ + CancelToken::local().disable(); +} + +NonCancelable::~NonCancelable() +{ + CancelToken::local().enable(); +} + +} + +#else + +namespace DB +{ + +void CancelToken::raise() +{ + throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELED, "Thread was canceled"); +} + +} + +#endif diff --git a/src/Common/Threading.h b/src/Common/CancelToken.h similarity index 61% rename from src/Common/Threading.h rename to src/Common/CancelToken.h index d5d32e73b67..27b9d41f0f3 100644 --- a/src/Common/Threading.h +++ b/src/Common/CancelToken.h @@ -15,23 +15,25 @@ namespace DB { -// Scoped object, enabling thread cancellation (cannot be nested) -struct Cancellable +// Scoped object, enabling thread cancelation (cannot be nested). +// Intended to be used once per cancelable task. It erases any previously held cancelation signal. +// Note that by default thread is not cancelable. +struct Cancelable { - Cancellable(); - ~Cancellable(); + Cancelable(); + ~Cancelable(); }; -// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancellable` region) -struct NonCancellable +// Scoped object, disabling thread cancelation (cannot be nested; must be inside `Cancelable` region) +struct NonCancelable { - NonCancellable(); - ~NonCancellable(); + NonCancelable(); + ~NonCancelable(); }; -// Responsible for synchronization needed to deliver thread cancellation signal. -// Basic building block for cancellable synchronization primitives. -// Allows to perform cancellable wait on memory addresses (think futex) +// Responsible for synchronization needed to deliver thread cancelation signal. +// Basic building block for cancelable synchronization primitives. +// Allows to perform cancelable wait on memory addresses (think futex) class CancelToken { public: @@ -39,6 +41,7 @@ public: CancelToken(const CancelToken &) = delete; CancelToken(CancelToken &&) = delete; CancelToken & operator=(const CancelToken &) = delete; + CancelToken & operator=(CancelToken &&) = delete; ~CancelToken(); // Returns token for the current thread @@ -48,17 +51,17 @@ public: return token; } - // Cancellable wait on memory address (futex word). + // Cancelable wait on memory address (futex word). // Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()` // or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`. - // Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`. + // Note that spurious wake-ups are also possible due to cancelation of other waiters on the same `address`. // WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero. // Return value: // true - woken by either notify or spurious wakeup; - // false - iff cancellation signal has been received. + // false - iff cancelation signal has been received. // Implementation details: - // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal. - // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation. + // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancelation signal. + // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancelation. // Intended to be called only by thread associated with this token. bool wait(UInt32 * address, UInt32 value); @@ -72,27 +75,27 @@ public: static void notifyAll(UInt32 * address); // Send cancel signal to thread with specified `tid`. - // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled). + // If thread was waiting using `wait()` it will be woken up (unless cancelation is disabled). // Can be called from any thread. static void signal(UInt64 tid); static void signal(UInt64 tid, int code, const String & message); - // Flag used to deliver cancellation into memory address to wake a thread. + // Flag used to deliver cancelation into memory address to wake a thread. // Note that most significant bit at `addresses` to be used with `wait()` is reserved. static constexpr UInt32 signaled = 1u << 31u; private: - friend struct Cancellable; - friend struct NonCancellable; + friend struct Cancelable; + friend struct NonCancelable; - // Restores initial state for token to be reused. See `Cancellable` struct. + // Restores initial state for token to be reused. See `Cancelable` struct. // Intended to be called only by thread associated with this token. void reset() { state.store(0); } - // Enable thread cancellation. See `NonCancellable` struct. + // Enable thread cancelation. See `NonCancelable` struct. // Intended to be called only by thread associated with this token. void enable() { @@ -100,7 +103,7 @@ private: state.fetch_and(~disabled); } - // Disable thread cancellation. See `NonCancellable` struct. + // Disable thread cancelation. See `NonCancelable` struct. // Intended to be called only by thread associated with this token. void disable() { @@ -109,8 +112,6 @@ private: } // Singleton. Maps thread IDs to tokens. - struct Registry; - friend struct Registry; struct Registry { std::mutex mutex; @@ -134,7 +135,7 @@ private: // Upper bits - possible values: // 1) all zeros: token is enabed, i.e. wait() call can return false, thread is not waiting on any address; - // 2) all ones: token is disabled, i.e. wait() call cannot be cancelled; + // 2) all ones: token is disabled, i.e. wait() call cannot be canceled; // 3) specific `address`: token is enabled and thread is currently waiting on this `address`. static constexpr UInt64 disabled = ~canceled; static_assert(sizeof(UInt32 *) == sizeof(UInt64)); // State must be able to hold an address @@ -142,11 +143,11 @@ private: // All signal handling logic should be globally serialized using this mutex static std::mutex signal_mutex; - // Cancellation state + // Cancelation state alignas(64) std::atomic state; [[maybe_unused]] char padding[64 - sizeof(state)]; - // Cancellation exception + // Cancelation exception int exception_code; String exception_message; @@ -157,86 +158,25 @@ private: const std::shared_ptr registry; }; -class CancellableSharedMutex -{ -public: - CancellableSharedMutex(); - ~CancellableSharedMutex() = default; - CancellableSharedMutex(const CancellableSharedMutex &) = delete; - CancellableSharedMutex & operator=(const CancellableSharedMutex &) = delete; - - // Exclusive ownership - void lock(); - bool try_lock(); - void unlock(); - - // Shared ownership - void lock_shared(); - bool try_lock_shared(); - void unlock_shared(); - -private: - // State 64-bits layout: - // 1b - 31b - 1b - 31b - // signaled - writers - signaled - readers - // 63------------------------------------0 - // Two 32-bit words are used for cancellable waiting, so each has its own separate signaled bit - static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled; - static constexpr UInt64 readers_signaled = CancelToken::signaled; - static constexpr UInt64 writers = readers << 32ull; - static constexpr UInt64 writers_signaled = readers_signaled << 32ull; - - alignas(64) std::atomic state; - std::atomic waiters; -}; - -class FastSharedMutex -{ -public: - FastSharedMutex(); - ~FastSharedMutex() = default; - FastSharedMutex(const FastSharedMutex &) = delete; - FastSharedMutex & operator=(const FastSharedMutex &) = delete; - - // Exclusive ownership - void lock(); - bool try_lock(); - void unlock(); - - // Shared ownership - void lock_shared(); - bool try_lock_shared(); - void unlock_shared(); - -private: - static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state - static constexpr UInt64 writers = ~readers; // Upper 32 bits of state - - alignas(64) std::atomic state; - std::atomic waiters; -}; - } #else -#include - -// WARNING: We support cancellable synchronization primitives only on linux for now +// WARNING: We support cancelable synchronization primitives only on linux for now namespace DB { -struct Cancellable +struct Cancelable { - Cancellable() = default; - ~Cancellable() = default; + Cancelable() = default; + ~Cancelable() = default; }; -struct NonCancellable +struct NonCancelable { - NonCancellable() = default; - ~NonCancellable() = default; + NonCancelable() = default; + ~NonCancelable() = default; }; class CancelToken @@ -262,9 +202,6 @@ public: static void signal(UInt64, int, const String &) {} }; -using CancellableSharedMutex = std::shared_mutex; -using FastSharedMutex = std::shared_mutex; - } #endif diff --git a/src/Common/CancelableSharedMutex.cpp b/src/Common/CancelableSharedMutex.cpp new file mode 100644 index 00000000000..c8ca93309ee --- /dev/null +++ b/src/Common/CancelableSharedMutex.cpp @@ -0,0 +1,115 @@ +#include + +#ifdef OS_LINUX /// Because of futex + +#include + +namespace DB +{ + +namespace +{ + inline bool cancelableWaitUpperFetch(std::atomic & address, UInt64 & value) + { + bool res = CancelToken::local().wait(upperHalfAddress(&address), upperHalf(value)); + value = address.load(); + return res; + } + + inline bool cancelableWaitLowerFetch(std::atomic & address, UInt64 & value) + { + bool res = CancelToken::local().wait(lowerHalfAddress(&address), lowerHalf(value)); + value = address.load(); + return res; + } +} + +CancelableSharedMutex::CancelableSharedMutex() + : state(0) + , waiters(0) +{} + +void CancelableSharedMutex::lock() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + if (!cancelableWaitUpperFetch(state, value)) + { + waiters--; + CancelToken::local().raise(); + } + else + waiters--; + } + else if (state.compare_exchange_strong(value, value | writers)) + break; + } + + value |= writers; + while (value & readers) + { + if (!cancelableWaitLowerFetch(state, value)) + { + state.fetch_and(~writers); + futexWakeUpperAll(state); + CancelToken::local().raise(); + } + } +} + +bool CancelableSharedMutex::try_lock() +{ + UInt64 value = state.load(); + return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers); +} + +void CancelableSharedMutex::unlock() +{ + state.fetch_and(~writers); + if (waiters) + futexWakeUpperAll(state); +} + +void CancelableSharedMutex::lock_shared() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + if (!cancelableWaitUpperFetch(state, value)) + { + waiters--; + CancelToken::local().raise(); + } + else + waiters--; + } + else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic + break; + } +} + +bool CancelableSharedMutex::try_lock_shared() +{ + UInt64 value = state.load(); + if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic + return true; + return false; +} + +void CancelableSharedMutex::unlock_shared() +{ + UInt64 value = state.fetch_sub(1) - 1; + if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers + futexWakeLowerOne(state); // Wake writer +} + +} + +#endif diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h new file mode 100644 index 00000000000..f989e8d5beb --- /dev/null +++ b/src/Common/CancelableSharedMutex.h @@ -0,0 +1,62 @@ +#pragma once + +#ifdef OS_LINUX /// Because of futex + +#include +#include +#include +#include // for std::unique_lock and std::shared_lock + +namespace DB +{ + +// Reimplementation of `std::shared_mutex` that can interoperate with thread cancelation via `CancelToken::signal()`. +// It has cancelation point on waiting during `lock()` and `shared_lock()`. +// NOTE: It has NO cancelation points on fast code path, when locking does not require waiting. +class CancelableSharedMutex +{ +public: + CancelableSharedMutex(); + ~CancelableSharedMutex() = default; + CancelableSharedMutex(const CancelableSharedMutex &) = delete; + CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete; + + // Exclusive ownership + void lock(); + bool try_lock(); + void unlock(); + + // Shared ownership + void lock_shared(); + bool try_lock_shared(); + void unlock_shared(); + +private: + // State 64-bits layout: + // 1b - 31b - 1b - 31b + // signaled - writers - signaled - readers + // 63------------------------------------0 + // Two 32-bit words are used for cancelable waiting, so each has its own separate signaled bit + static constexpr UInt64 readers = (1ull << 32ull) - 1ull - CancelToken::signaled; + static constexpr UInt64 readers_signaled = CancelToken::signaled; + static constexpr UInt64 writers = readers << 32ull; + static constexpr UInt64 writers_signaled = readers_signaled << 32ull; + + alignas(64) std::atomic state; + std::atomic waiters; +}; + +} + +#else + +// WARNING: We support cancelable synchronization primitives only on linux for now + +namespace DB +{ + +using CancelableSharedMutex = std::shared_mutex; + +} + +#endif diff --git a/src/Common/ErrorCodes.cpp b/src/Common/ErrorCodes.cpp index 6dbeefe1823..0ad4cbb9e6f 100644 --- a/src/Common/ErrorCodes.cpp +++ b/src/Common/ErrorCodes.cpp @@ -645,7 +645,7 @@ M(674, RESOURCE_NOT_FOUND) \ M(675, CANNOT_PARSE_IPV4) \ M(676, CANNOT_PARSE_IPV6) \ - M(677, THREAD_WAS_CANCELLED) \ + M(677, THREAD_WAS_CANCELED) \ \ M(999, KEEPER_EXCEPTION) \ M(1000, POCO_EXCEPTION) \ diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp new file mode 100644 index 00000000000..3a69c106800 --- /dev/null +++ b/src/Common/SharedMutex.cpp @@ -0,0 +1,80 @@ +#include + +#ifdef OS_LINUX /// Because of futex + +#include + +#include + +namespace DB +{ + +void SharedMutex::lock() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + futexWaitUpperFetch(state, value); + waiters--; + } + else if (state.compare_exchange_strong(value, value | writers)) + break; + } + + value |= writers; + while (value & readers) + futexWaitLowerFetch(state, value); +} + +bool SharedMutex::try_lock() +{ + UInt64 value = 0; + if (state.compare_exchange_strong(value, writers)) + return true; + return false; +} + +void SharedMutex::unlock() +{ + state.store(0); + if (waiters) + futexWakeUpperAll(state); +} + +void SharedMutex::lock_shared() +{ + UInt64 value = state.load(); + while (true) + { + if (value & writers) + { + waiters++; + futexWaitUpperFetch(state, value); + waiters--; + } + else if (state.compare_exchange_strong(value, value + 1)) + break; + } +} + +bool SharedMutex::try_lock_shared() +{ + UInt64 value = state.load(); + if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) + return true; + return false; +} + +void SharedMutex::unlock_shared() +{ + UInt64 value = state.fetch_sub(1) - 1; + if (value == writers) + futexWakeLowerOne(state); // Wake writer +} + +} + +#endif diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h new file mode 100644 index 00000000000..ebe730ca419 --- /dev/null +++ b/src/Common/SharedMutex.h @@ -0,0 +1,47 @@ +#pragma once + +#ifdef OS_LINUX /// Because of futex + +#include +#include +#include // for std::unique_lock and std::shared_lock + +namespace DB +{ + +// Faster implementation of `std::shared_mutex` based on a pair of futexes +class SharedMutex +{ +public: + SharedMutex(); + ~SharedMutex() = default; + SharedMutex(const SharedMutex &) = delete; + SharedMutex & operator=(const SharedMutex &) = delete; + + // Exclusive ownership + void lock(); + bool try_lock(); + void unlock(); + + // Shared ownership + void lock_shared(); + bool try_lock_shared(); + void unlock_shared(); + +private: + static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state + static constexpr UInt64 writers = ~readers; // Upper 32 bits of state + + alignas(64) std::atomic state; + std::atomic waiters; +}; + +} + +#else + +using SharedMutex = std::shared_mutex; + +} + +#endif diff --git a/src/Common/Threading.cpp b/src/Common/Threading.cpp deleted file mode 100644 index ae32a1a1052..00000000000 --- a/src/Common/Threading.cpp +++ /dev/null @@ -1,484 +0,0 @@ -#include - -namespace DB -{ -namespace ErrorCodes -{ - extern const int THREAD_WAS_CANCELLED; -} -} - -#ifdef OS_LINUX /// Because of futex - -#include - -#include - -#include -#include -#include -#include - -namespace DB -{ - -namespace -{ - inline Int64 futexWait(void * address, UInt32 value) - { - return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); - } - - inline Int64 futexWake(void * address, int count) - { - return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); - } - - // inline void waitFetch(std::atomic & address, UInt32 & value) - // { - // futexWait(&address, value); - // value = address.load(); - // } - - // inline void wakeOne(std::atomic & address) - // { - // futexWake(&address, 1); - // } - - // inline void wakeAll(std::atomic & address) - // { - // futexWake(&address, INT_MAX); - // } - - inline constexpr UInt32 lowerValue(UInt64 value) - { - return static_cast(value & 0xffffffffull); - } - - inline constexpr UInt32 upperValue(UInt64 value) - { - return static_cast(value >> 32ull); - } - - inline UInt32 * lowerAddress(void * address) - { - return reinterpret_cast(address) + (std::endian::native == std::endian::big); - } - - inline UInt32 * upperAddress(void * address) - { - return reinterpret_cast(address) + (std::endian::native == std::endian::little); - } - - inline void waitLowerFetch(std::atomic & address, UInt64 & value) - { - futexWait(lowerAddress(&address), lowerValue(value)); - value = address.load(); - } - - inline bool cancellableWaitLowerFetch(std::atomic & address, UInt64 & value) - { - bool res = CancelToken::local().wait(lowerAddress(&address), lowerValue(value)); - value = address.load(); - return res; - } - - inline void wakeLowerOne(std::atomic & address) - { - syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); - } - - // inline void wakeLowerAll(std::atomic & address) - // { - // syscall(SYS_futex, lowerAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); - // } - - inline void waitUpperFetch(std::atomic & address, UInt64 & value) - { - futexWait(upperAddress(&address), upperValue(value)); - value = address.load(); - } - - inline bool cancellableWaitUpperFetch(std::atomic & address, UInt64 & value) - { - bool res = CancelToken::local().wait(upperAddress(&address), upperValue(value)); - value = address.load(); - return res; - } - - // inline void wakeUpperOne(std::atomic & address) - // { - // syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, 1, nullptr, nullptr, 0); - // } - - inline void wakeUpperAll(std::atomic & address) - { - syscall(SYS_futex, upperAddress(&address), FUTEX_WAKE_PRIVATE, INT_MAX, nullptr, nullptr, 0); - } -} - -void CancelToken::Registry::insert(CancelToken * token) -{ - std::lock_guard lock(mutex); - threads[token->thread_id] = token; -} - -void CancelToken::Registry::remove(CancelToken * token) -{ - std::lock_guard lock(mutex); - threads.erase(token->thread_id); -} - -void CancelToken::Registry::signal(UInt64 tid) -{ - std::lock_guard lock(mutex); - if (auto it = threads.find(tid); it != threads.end()) - it->second->signalImpl(); -} - -void CancelToken::Registry::signal(UInt64 tid, int code, const String & message) -{ - std::lock_guard lock(mutex); - if (auto it = threads.find(tid); it != threads.end()) - it->second->signalImpl(code, message); -} - -const std::shared_ptr & CancelToken::Registry::instance() -{ - static std::shared_ptr registry{new Registry()}; // shared_ptr is used to enforce correct destruction order of tokens and registry - return registry; -} - -CancelToken::CancelToken() - : state(disabled) - , thread_id(getThreadId()) - , registry(Registry::instance()) -{ - registry->insert(this); -} - -CancelToken::~CancelToken() -{ - registry->remove(this); -} - -void CancelToken::signal(UInt64 tid) -{ - Registry::instance()->signal(tid); -} - -void CancelToken::signal(UInt64 tid, int code, const String & message) -{ - Registry::instance()->signal(tid, code, message); -} - -bool CancelToken::wait(UInt32 * address, UInt32 value) -{ - chassert((reinterpret_cast(address) & canceled) == 0); // An `address` must be 2-byte aligned - if (value & signaled) // Can happen after spurious wake-up due to cancel of other thread - return true; // Spin-wait unless signal is handled - - UInt64 s = state.load(); - while (true) - { - if (s & disabled) - { - // Start non-cancellable wait on futex. Spurious wake-up is possible. - futexWait(address, value); - return true; // Disabled - true is forced - } - if (s & canceled) - return false; // Has already been canceled - if (state.compare_exchange_strong(s, reinterpret_cast(address))) - break; // This futex has been "acquired" by this token - } - - // Start cancellable wait. Spurious wake-up is possible. - futexWait(address, value); - - // "Release" futex and check for cancellation - s = state.load(); - while (true) - { - chassert((s & disabled) != disabled); // `disable()` must not be called from another thread - if (s & canceled) - { - if (s == canceled) - break; // Signaled; futex "release" has been done by the signaling thread - else - { - s = state.load(); - continue; // To avoid race (may lead to futex destruction) we have to wait for signaling thread to finish - } - } - if (state.compare_exchange_strong(s, 0)) - return true; // There was no cancellation; futex "released" - } - - // Reset signaled bit - reinterpret_cast *>(address)->fetch_and(~signaled); - return false; -} - -void CancelToken::raise() -{ - std::unique_lock lock(signal_mutex); - if (exception_code != 0) - throw DB::Exception( - std::exchange(exception_code, 0), - std::exchange(exception_message, {})); - else - throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled"); -} - -void CancelToken::notifyOne(UInt32 * address) -{ - futexWake(address, 1); -} - -void CancelToken::notifyAll(UInt32 * address) -{ - futexWake(address, INT_MAX); -} - -void CancelToken::signalImpl() -{ - signalImpl(0, {}); -} - -std::mutex CancelToken::signal_mutex; - -void CancelToken::signalImpl(int code, const String & message) -{ - // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls - std::unique_lock lock(signal_mutex); - - UInt64 s = state.load(); - while (true) - { - if (s & canceled) - return; // Already cancelled - don't signal twice - if (state.compare_exchange_strong(s, s | canceled)) - break; // It is the cancelling thread - should deliver signal if necessary - } - - exception_code = code; - exception_message = message; - - if ((s & disabled) == disabled) - return; // Cancellation is disabled - just signal token for later, but don't wake - std::atomic * address = reinterpret_cast *>(s & disabled); - if (address == nullptr) - return; // Thread is currently not waiting on futex - wake-up not required - - // Set signaled bit - UInt32 value = address->load(); - while (true) - { - if (value & signaled) // Already signaled, just spin-wait until previous signal is handled by waiter - value = address->load(); - else if (address->compare_exchange_strong(value, value | signaled)) - break; - } - - // Wake all threads waiting on `address`, one of them will be cancelled and others will get spurious wake-ups - // Woken canceled thread will reset signaled bit - futexWake(address, INT_MAX); - - // Signaling thread must remove address from state to notify canceled thread that `futexWake()` is done, thus `wake()` can return. - // Otherwise we may have race condition: signaling thread may try to wake futex that has been already destructed. - state.store(canceled); -} - -Cancellable::Cancellable() -{ - CancelToken::local().reset(); -} - -Cancellable::~Cancellable() -{ - CancelToken::local().disable(); -} - -NonCancellable::NonCancellable() -{ - CancelToken::local().disable(); -} - -NonCancellable::~NonCancellable() -{ - CancelToken::local().enable(); -} - -CancellableSharedMutex::CancellableSharedMutex() - : state(0) - , waiters(0) -{} - -void CancellableSharedMutex::lock() -{ - UInt64 value = state.load(); - while (true) - { - if (value & writers) - { - waiters++; - if (!cancellableWaitUpperFetch(state, value)) - { - waiters--; - CancelToken::local().raise(); - } - else - waiters--; - } - else if (state.compare_exchange_strong(value, value | writers)) - break; - } - - value |= writers; - while (value & readers) - { - if (!cancellableWaitLowerFetch(state, value)) - { - state.fetch_and(~writers); - wakeUpperAll(state); - CancelToken::local().raise(); - } - } -} - -bool CancellableSharedMutex::try_lock() -{ - UInt64 value = state.load(); - return (value & (readers | writers)) == 0 && state.compare_exchange_strong(value, value | writers); -} - -void CancellableSharedMutex::unlock() -{ - state.fetch_and(~writers); - if (waiters) - wakeUpperAll(state); -} - -void CancellableSharedMutex::lock_shared() -{ - UInt64 value = state.load(); - while (true) - { - if (value & writers) - { - waiters++; - if (!cancellableWaitUpperFetch(state, value)) - { - waiters--; - CancelToken::local().raise(); - } - else - waiters--; - } - else if (state.compare_exchange_strong(value, value + 1)) // overflow is not realistic - break; - } -} - -bool CancellableSharedMutex::try_lock_shared() -{ - UInt64 value = state.load(); - if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) // overflow is not realistic - return true; - return false; -} - -void CancellableSharedMutex::unlock_shared() -{ - UInt64 value = state.fetch_sub(1) - 1; - if ((value & (writers | readers)) == writers) // If writer is waiting and no more readers - wakeLowerOne(state); // Wake writer -} - -FastSharedMutex::FastSharedMutex() - : state(0) - , waiters(0) -{} - -void FastSharedMutex::lock() -{ - UInt64 value = state.load(); - while (true) - { - if (value & writers) - { - waiters++; - waitUpperFetch(state, value); - waiters--; - } - else if (state.compare_exchange_strong(value, value | writers)) - break; - } - - value |= writers; - while (value & readers) - waitLowerFetch(state, value); -} - -bool FastSharedMutex::try_lock() -{ - UInt64 value = 0; - if (state.compare_exchange_strong(value, writers)) - return true; - return false; -} - -void FastSharedMutex::unlock() -{ - state.store(0); - if (waiters) - wakeUpperAll(state); -} - -void FastSharedMutex::lock_shared() -{ - UInt64 value = state.load(); - while (true) - { - if (value & writers) - { - waiters++; - waitUpperFetch(state, value); - waiters--; - } - else if (state.compare_exchange_strong(value, value + 1)) - break; - } -} - -bool FastSharedMutex::try_lock_shared() -{ - UInt64 value = state.load(); - if (!(value & writers) && state.compare_exchange_strong(value, value + 1)) - return true; - return false; -} - -void FastSharedMutex::unlock_shared() -{ - UInt64 value = state.fetch_sub(1) - 1; - if (value == writers) - wakeLowerOne(state); // Wake writer -} - -} - -#else - -namespace DB -{ - -void CancelToken::raise() -{ - throw DB::Exception(ErrorCodes::THREAD_WAS_CANCELLED, "Thread was cancelled"); -} - -} - -#endif diff --git a/src/Common/futex.h b/src/Common/futex.h new file mode 100644 index 00000000000..33279ff4831 --- /dev/null +++ b/src/Common/futex.h @@ -0,0 +1,97 @@ +#pragma once + +#ifdef OS_LINUX + +#include + +#include + +#include +#include +#include +#include + +namespace DB +{ + +inline Int64 futexWait(void * address, UInt32 value) +{ + return syscall(SYS_futex, address, FUTEX_WAIT_PRIVATE, value, nullptr, nullptr, 0); +} + +inline Int64 futexWake(void * address, int count) +{ + return syscall(SYS_futex, address, FUTEX_WAKE_PRIVATE, count, nullptr, nullptr, 0); +} + +inline void futexWaitFetch(std::atomic & address, UInt32 & value) +{ + futexWait(&address, value); + value = address.load(); +} + +inline void futexWakeOne(std::atomic & address) +{ + futexWake(&address, 1); +} + +inline void futexWakeAll(std::atomic & address) +{ + futexWake(&address, INT_MAX); +} + +inline constexpr UInt32 lowerHalf(UInt64 value) +{ + return static_cast(value & 0xffffffffull); +} + +inline constexpr UInt32 upperHalf(UInt64 value) +{ + return static_cast(value >> 32ull); +} + +inline UInt32 * lowerHalfAddress(void * address) +{ + return reinterpret_cast(address) + (std::endian::native == std::endian::big); +} + +inline UInt32 * upperHalfAddress(void * address) +{ + return reinterpret_cast(address) + (std::endian::native == std::endian::little); +} + +inline void futexWaitLowerFetch(std::atomic & address, UInt64 & value) +{ + futexWait(lowerHalfAddress(&address), lowerHalf(value)); + value = address.load(); +} + +inline void futexWakeLowerOne(std::atomic & address) +{ + futexWake(lowerHalfAddress(&address), 1); +} + +inline void futexWakeLowerAll(std::atomic & address) +{ + futexWake(lowerHalfAddress(&address), INT_MAX); +} + +inline void futexWaitUpperFetch(std::atomic & address, UInt64 & value) +{ + futexWait(upperHalfAddress(&address), upperHalf(value)); + value = address.load(); +} + +inline void futexWakeUpperOne(std::atomic & address) +{ + futexWake(upperHalfAddress(&address), 1); +} + +inline void futexWakeUpperAll(std::atomic & address) +{ + futexWake(upperHalfAddress(&address), INT_MAX); +} + +} + +#endif diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp index 767739deb46..a4ed7ad6642 100644 --- a/src/Common/tests/gtest_threading.cpp +++ b/src/Common/tests/gtest_threading.cpp @@ -7,7 +7,9 @@ #include #include "Common/Exception.h" -#include +#include +#include +#include #include #include @@ -18,7 +20,7 @@ namespace DB { namespace ErrorCodes { - extern const int THREAD_WAS_CANCELLED; + extern const int THREAD_WAS_CANCELED; } } @@ -126,7 +128,7 @@ void TestSharedMutexCancelReader() } catch (DB::Exception & e) { - ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED); + ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED); ASSERT_EQ(e.message(), "test"); cancels++; cancel_sync.arrive_and_wait(); // (C) sync with writer @@ -148,13 +150,13 @@ void TestSharedMutexCancelReader() sync.arrive_and_wait(); // (B) sync with readers //std::unique_lock lock(m); // not needed, already synced using barrier for (UInt64 tid : tids_to_cancel) - DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test"); + DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test"); // This sync is crucial. It is needed to hold `lock` long enough. - // It guarantees that every cancelled thread will find `sm` blocked by writer, and thus will begin to wait. - // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. + // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait. + // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception. // And this is the desired behaviour. - cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock. + cancel_sync.arrive_and_wait(); // (C) wait for cancelation to finish, before unlock. } } @@ -199,18 +201,18 @@ void TestSharedMutexCancelWriter() for (UInt64 tid : all_tids) { if (tid != getThreadId()) - DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELLED, "test"); + DB::CancelToken::signal(tid, DB::ErrorCodes::THREAD_WAS_CANCELED, "test"); } // This sync is crucial. It is needed to hold `lock` long enough. - // It guarantees that every cancelled thread will find `sm` blocked, and thus will begin to wait. - // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. + // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait. + // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception. // And this is the desired behaviour. - sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock. + sync.arrive_and_wait(); // (B) wait for cancelation to finish, before unlock. } catch (DB::Exception & e) { - ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELLED); + ASSERT_EQ(e.code(), DB::ErrorCodes::THREAD_WAS_CANCELED); ASSERT_EQ(e.message(), "test"); cancels++; sync.arrive_and_wait(); // (B) sync with race winner @@ -341,29 +343,29 @@ void PerfTestSharedMutexRW() } } -TEST(Threading, SharedMutexSmokeCancellableEnabled) { TestSharedMutex(); } -TEST(Threading, SharedMutexSmokeCancellableDisabled) { TestSharedMutex(); } -TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeCancelableEnabled) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeCancelableDisabled) { TestSharedMutex(); } +TEST(Threading, SharedMutexSmokeFast) { TestSharedMutex(); } TEST(Threading, SharedMutexSmokeStd) { TestSharedMutex(); } -TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableEnabled) { PerfTestSharedMutexReadersOnly(); } -TEST(Threading, PerfTestSharedMutexReadersOnlyCancellableDisabled) { PerfTestSharedMutexReadersOnly(); } -TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableEnabled) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyCancelableDisabled) { PerfTestSharedMutexReadersOnly(); } +TEST(Threading, PerfTestSharedMutexReadersOnlyFast) { PerfTestSharedMutexReadersOnly(); } TEST(Threading, PerfTestSharedMutexReadersOnlyStd) { PerfTestSharedMutexReadersOnly(); } -TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableEnabled) { PerfTestSharedMutexWritersOnly(); } -TEST(Threading, PerfTestSharedMutexWritersOnlyCancellableDisabled) { PerfTestSharedMutexWritersOnly(); } -TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableEnabled) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyCancelableDisabled) { PerfTestSharedMutexWritersOnly(); } +TEST(Threading, PerfTestSharedMutexWritersOnlyFast) { PerfTestSharedMutexWritersOnly(); } TEST(Threading, PerfTestSharedMutexWritersOnlyStd) { PerfTestSharedMutexWritersOnly(); } -TEST(Threading, PerfTestSharedMutexRWCancellableEnabled) { PerfTestSharedMutexRW(); } -TEST(Threading, PerfTestSharedMutexRWCancellableDisabled) { PerfTestSharedMutexRW(); } -TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWCancelableEnabled) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW(); } +TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW(); } TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW(); } -#ifdef OS_LINUX /// These tests require cancellability +#ifdef OS_LINUX /// These tests require cancelability -TEST(Threading, SharedMutexCancelReaderCancellableEnabled) { TestSharedMutexCancelReader(); } -TEST(Threading, SharedMutexCancelWriterCancellableEnabled) { TestSharedMutexCancelWriter(); } +TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader(); } +TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter(); } #endif From 534db794c1953e9fe89d2fae6517504ca86be93d Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 9 Jan 2023 15:05:41 +0000 Subject: [PATCH 127/262] more review fixes --- src/Common/CancelToken.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp index 87bcdc26bd4..0d80bdcb5b2 100644 --- a/src/Common/CancelToken.cpp +++ b/src/Common/CancelToken.cpp @@ -138,7 +138,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) void CancelToken::raise() { - std::unique_lock lock(signal_mutex); + std::unique_lock lock(signal_mutex); if (exception_code != 0) throw DB::Exception( std::exchange(exception_code, 0), @@ -167,7 +167,7 @@ std::mutex CancelToken::signal_mutex; void CancelToken::signalImpl(int code, const String & message) { // Serialize all signaling threads to avoid races due to concurrent signal()/raise() calls - std::unique_lock lock(signal_mutex); + std::unique_lock lock(signal_mutex); UInt64 s = state.load(); while (true) From 4236bc32ee29c34af6b6d25d4a64c547208fe3f7 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Thu, 22 Dec 2022 11:06:38 +0100 Subject: [PATCH 128/262] Analyzer duplicate alias crash fix --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 17 ++++------------- ...analyzer_duplicate_alias_crash_fix.reference | 0 ...02513_analyzer_duplicate_alias_crash_fix.sql | 4 ++++ 3 files changed, 8 insertions(+), 13 deletions(-) create mode 100644 tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference create mode 100644 tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index e93548d34ed..d7a686d4dfa 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2020,7 +2020,10 @@ QueryTreeNodePtr QueryAnalyzer::tryResolveTableIdentifierFromDatabaseCatalog(con StorageID storage_id(database_name, table_name); storage_id = context->resolveStorageID(storage_id); - auto storage = DatabaseCatalog::instance().getTable(storage_id, context); + auto storage = DatabaseCatalog::instance().tryGetTable(storage_id, context); + if (!storage) + return {}; + auto storage_lock = storage->lockForShare(context->getInitialQueryId(), context->getSettingsRef().lock_acquire_timeout); auto storage_snapshot = storage->getStorageSnapshot(storage->getInMemoryMetadataPtr(), context); @@ -4084,8 +4087,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi auto & in_second_argument = function_in_arguments_nodes[1]; auto * table_node = in_second_argument->as(); auto * table_function_node = in_second_argument->as(); - auto * query_node = in_second_argument->as(); - auto * union_node = in_second_argument->as(); if (table_node && dynamic_cast(table_node->getStorage().get()) != nullptr) { @@ -4118,16 +4119,6 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi in_second_argument = std::move(in_second_argument_query_node); } - else if (query_node || union_node) - { - IdentifierResolveScope subquery_scope(in_second_argument, &scope /*parent_scope*/); - subquery_scope.subquery_depth = scope.subquery_depth + 1; - - if (query_node) - resolveQuery(in_second_argument, subquery_scope); - else if (union_node) - resolveUnion(in_second_argument, subquery_scope); - } } /// Initialize function argument columns diff --git a/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql new file mode 100644 index 00000000000..fb50ea2c4ca --- /dev/null +++ b/tests/queries/0_stateless/02513_analyzer_duplicate_alias_crash_fix.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; + +SELECT toUInt64(NULL) AS x FROM (SELECT 1) HAVING x IN + (SELECT NULL FROM (SELECT x IN (SELECT x IN (SELECT 1), x IN (SELECT 1) FROM (SELECT 1 WHERE x IN (SELECT NULL FROM (SELECT NULL)))))); From ae56ac1b56d3258ab7af1afaae1222c31367745a Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 9 Jan 2023 16:46:07 +0000 Subject: [PATCH 129/262] add TSA support --- src/Common/CancelableSharedMutex.h | 15 ++++++++------- src/Common/SharedMutex.h | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h index f989e8d5beb..0e5f48b4a93 100644 --- a/src/Common/CancelableSharedMutex.h +++ b/src/Common/CancelableSharedMutex.h @@ -4,6 +4,7 @@ #include #include +#include #include #include // for std::unique_lock and std::shared_lock @@ -13,7 +14,7 @@ namespace DB // Reimplementation of `std::shared_mutex` that can interoperate with thread cancelation via `CancelToken::signal()`. // It has cancelation point on waiting during `lock()` and `shared_lock()`. // NOTE: It has NO cancelation points on fast code path, when locking does not require waiting. -class CancelableSharedMutex +class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex { public: CancelableSharedMutex(); @@ -22,14 +23,14 @@ public: CancelableSharedMutex & operator=(const CancelableSharedMutex &) = delete; // Exclusive ownership - void lock(); - bool try_lock(); - void unlock(); + void lock() TSA_ACQUIRE(); + bool try_lock() TSA_TRY_ACQUIRE(true); + void unlock() TSA_RELEASE(); // Shared ownership - void lock_shared(); - bool try_lock_shared(); - void unlock_shared(); + void lock_shared() TSA_ACQUIRE_SHARED(); + bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true); + void unlock_shared() TSA_RELEASE_SHARED(); private: // State 64-bits layout: diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h index ebe730ca419..26c649c6fa8 100644 --- a/src/Common/SharedMutex.h +++ b/src/Common/SharedMutex.h @@ -3,6 +3,7 @@ #ifdef OS_LINUX /// Because of futex #include +#include #include #include // for std::unique_lock and std::shared_lock @@ -10,7 +11,7 @@ namespace DB { // Faster implementation of `std::shared_mutex` based on a pair of futexes -class SharedMutex +class TSA_CAPABILITY("SharedMutex") SharedMutex { public: SharedMutex(); @@ -19,14 +20,14 @@ public: SharedMutex & operator=(const SharedMutex &) = delete; // Exclusive ownership - void lock(); - bool try_lock(); - void unlock(); + void lock() TSA_ACQUIRE(); + bool try_lock() TSA_TRY_ACQUIRE(true); + void unlock() TSA_RELEASE(); // Shared ownership - void lock_shared(); - bool try_lock_shared(); - void unlock_shared(); + void lock_shared() TSA_ACQUIRE_SHARED(); + bool try_lock_shared() TSA_TRY_ACQUIRE_SHARED(true); + void unlock_shared() TSA_RELEASE_SHARED(); private: static constexpr UInt64 readers = (1ull << 32ull) - 1ull; // Lower 32 bits of state From d561f66419868928769e4dbf47b8d751e6d47e26 Mon Sep 17 00:00:00 2001 From: serxa Date: Mon, 9 Jan 2023 16:48:39 +0000 Subject: [PATCH 130/262] more TSA support --- base/base/defines.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/base/defines.h b/base/base/defines.h index 52310362991..a516e4f575c 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -144,6 +144,13 @@ # define TSA_REQUIRES_SHARED(...) __attribute__((requires_shared_capability(__VA_ARGS__))) /// thread needs shared possession of given capability # define TSA_ACQUIRED_AFTER(...) __attribute__((acquired_after(__VA_ARGS__))) /// annotated lock must be locked after given lock # define TSA_NO_THREAD_SAFETY_ANALYSIS __attribute__((no_thread_safety_analysis)) /// disable TSA for a function +# define TSA_CAPABILITY(...) __attribute__((capability(__VA_ARGS__))) /// object of a class can be used as capability +# define TSA_ACQUIRE(...) __attribute__((acquire_capability(__VA_ARGS__))) /// function acquires a capability, but does not release it +# define TSA_TRY_ACQUIRE(...) __attribute__((try_acquire_capability(__VA_ARGS__))) /// function tries to acquire a capability and returns a boolean value indicating success or failure +# define TSA_RELEASE(...) __attribute__((release_capability(__VA_ARGS__))) /// function releases the given capability +# define TSA_ACQUIRE_SHARED(...) __attribute__((acquire_shared_capability(__VA_ARGS__))) /// function acquires a shared capability, but does not release it +# define TSA_TRY_ACQUIRE_SHARED(...) __attribute__((try_acquire_shared_capability(__VA_ARGS__))) /// function tries to acquire a shared capability and returns a boolean value indicating success or failure +# define TSA_RELEASE_SHARED(...) __attribute__((release_shared_capability(__VA_ARGS__))) /// function releases the given shared capability /// Macros for suppressing TSA warnings for specific reads/writes (instead of suppressing it for the whole function) /// They use a lambda function to apply function attribute to a single statement. This enable us to suppress warnings locally instead of From b1407b1070e5eb44daac8cd8df40ff7d54e1d888 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 10 Jan 2023 01:19:42 +0000 Subject: [PATCH 131/262] fix TSA support --- base/base/defines.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/base/base/defines.h b/base/base/defines.h index a516e4f575c..391e97ab406 100644 --- a/base/base/defines.h +++ b/base/base/defines.h @@ -171,6 +171,13 @@ # define TSA_REQUIRES(...) # define TSA_REQUIRES_SHARED(...) # define TSA_NO_THREAD_SAFETY_ANALYSIS +# define TSA_CAPABILITY(...) +# define TSA_ACQUIRE(...) +# define TSA_TRY_ACQUIRE(...) +# define TSA_RELEASE(...) +# define TSA_ACQUIRE_SHARED(...) +# define TSA_TRY_ACQUIRE_SHARED(...) +# define TSA_RELEASE_SHARED(...) # define TSA_SUPPRESS_WARNING_FOR_READ(x) (x) # define TSA_SUPPRESS_WARNING_FOR_WRITE(x) (x) From 0f80ad6e069a68117487f3108022d0bfe2abe4dd Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 10 Jan 2023 01:26:59 +0000 Subject: [PATCH 132/262] make style-check happy --- src/Common/CancelToken.cpp | 6 +++--- src/Common/CancelToken.h | 28 ++++++++++++++-------------- src/Common/CancelableSharedMutex.h | 6 +++--- src/Common/tests/gtest_threading.cpp | 10 +++++----- 4 files changed, 25 insertions(+), 25 deletions(-) diff --git a/src/Common/CancelToken.cpp b/src/Common/CancelToken.cpp index 0d80bdcb5b2..f1d2b9d119f 100644 --- a/src/Common/CancelToken.cpp +++ b/src/Common/CancelToken.cpp @@ -112,7 +112,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) // Start cancelable wait. Spurious wake-up is possible. futexWait(address, value); - // "Release" futex and check for cancelation + // "Release" futex and check for cancellation s = state.load(); while (true) { @@ -128,7 +128,7 @@ bool CancelToken::wait(UInt32 * address, UInt32 value) } } if (state.compare_exchange_strong(s, 0)) - return true; // There was no cancelation; futex "released" + return true; // There was no cancellation; futex "released" } // Reset signaled bit @@ -182,7 +182,7 @@ void CancelToken::signalImpl(int code, const String & message) exception_message = message; if ((s & disabled) == disabled) - return; // Cancelation is disabled - just signal token for later, but don't wake + return; // cancellation is disabled - just signal token for later, but don't wake std::atomic * address = reinterpret_cast *>(s & disabled); if (address == nullptr) return; // Thread is currently not waiting on futex - wake-up not required diff --git a/src/Common/CancelToken.h b/src/Common/CancelToken.h index 27b9d41f0f3..22afdfe38f4 100644 --- a/src/Common/CancelToken.h +++ b/src/Common/CancelToken.h @@ -15,8 +15,8 @@ namespace DB { -// Scoped object, enabling thread cancelation (cannot be nested). -// Intended to be used once per cancelable task. It erases any previously held cancelation signal. +// Scoped object, enabling thread cancellation (cannot be nested). +// Intended to be used once per cancelable task. It erases any previously held cancellation signal. // Note that by default thread is not cancelable. struct Cancelable { @@ -24,14 +24,14 @@ struct Cancelable ~Cancelable(); }; -// Scoped object, disabling thread cancelation (cannot be nested; must be inside `Cancelable` region) +// Scoped object, disabling thread cancellation (cannot be nested; must be inside `Cancelable` region) struct NonCancelable { NonCancelable(); ~NonCancelable(); }; -// Responsible for synchronization needed to deliver thread cancelation signal. +// Responsible for synchronization needed to deliver thread cancellation signal. // Basic building block for cancelable synchronization primitives. // Allows to perform cancelable wait on memory addresses (think futex) class CancelToken @@ -54,14 +54,14 @@ public: // Cancelable wait on memory address (futex word). // Thread will do atomic compare-and-sleep `*address == value`. Waiting will continue until `notify_one()` // or `notify_all()` will be called with the same `address` or calling thread will be canceled using `signal()`. - // Note that spurious wake-ups are also possible due to cancelation of other waiters on the same `address`. + // Note that spurious wake-ups are also possible due to cancellation of other waiters on the same `address`. // WARNING: `address` must be 2-byte aligned and `value` highest bit must be zero. // Return value: // true - woken by either notify or spurious wakeup; - // false - iff cancelation signal has been received. + // false - iff cancellation signal has been received. // Implementation details: - // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancelation signal. - // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancelation. + // It registers `address` inside token's `state` to allow other threads to wake this thread and deliver cancellation signal. + // Highest bit of `*address` is used for guaranteed delivery of the signal, but is guaranteed to be zero on return due to cancellation. // Intended to be called only by thread associated with this token. bool wait(UInt32 * address, UInt32 value); @@ -75,12 +75,12 @@ public: static void notifyAll(UInt32 * address); // Send cancel signal to thread with specified `tid`. - // If thread was waiting using `wait()` it will be woken up (unless cancelation is disabled). + // If thread was waiting using `wait()` it will be woken up (unless cancellation is disabled). // Can be called from any thread. static void signal(UInt64 tid); static void signal(UInt64 tid, int code, const String & message); - // Flag used to deliver cancelation into memory address to wake a thread. + // Flag used to deliver cancellation into memory address to wake a thread. // Note that most significant bit at `addresses` to be used with `wait()` is reserved. static constexpr UInt32 signaled = 1u << 31u; @@ -95,7 +95,7 @@ private: state.store(0); } - // Enable thread cancelation. See `NonCancelable` struct. + // Enable thread cancellation. See `NonCancelable` struct. // Intended to be called only by thread associated with this token. void enable() { @@ -103,7 +103,7 @@ private: state.fetch_and(~disabled); } - // Disable thread cancelation. See `NonCancelable` struct. + // Disable thread cancellation. See `NonCancelable` struct. // Intended to be called only by thread associated with this token. void disable() { @@ -143,11 +143,11 @@ private: // All signal handling logic should be globally serialized using this mutex static std::mutex signal_mutex; - // Cancelation state + // Cancellation state alignas(64) std::atomic state; [[maybe_unused]] char padding[64 - sizeof(state)]; - // Cancelation exception + // Cancellation exception int exception_code; String exception_message; diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h index 0e5f48b4a93..dfd9631c564 100644 --- a/src/Common/CancelableSharedMutex.h +++ b/src/Common/CancelableSharedMutex.h @@ -11,9 +11,9 @@ namespace DB { -// Reimplementation of `std::shared_mutex` that can interoperate with thread cancelation via `CancelToken::signal()`. -// It has cancelation point on waiting during `lock()` and `shared_lock()`. -// NOTE: It has NO cancelation points on fast code path, when locking does not require waiting. +// Reimplementation of `std::shared_mutex` that can interoperate with thread cancellation via `CancelToken::signal()`. +// It has cancellation point on waiting during `lock()` and `shared_lock()`. +// NOTE: It has NO cancellation points on fast code path, when locking does not require waiting. class TSA_CAPABILITY("CancelableSharedMutex") CancelableSharedMutex { public: diff --git a/src/Common/tests/gtest_threading.cpp b/src/Common/tests/gtest_threading.cpp index a4ed7ad6642..8662e93e81b 100644 --- a/src/Common/tests/gtest_threading.cpp +++ b/src/Common/tests/gtest_threading.cpp @@ -154,9 +154,9 @@ void TestSharedMutexCancelReader() // This sync is crucial. It is needed to hold `lock` long enough. // It guarantees that every canceled thread will find `sm` blocked by writer, and thus will begin to wait. - // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception. + // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. // And this is the desired behaviour. - cancel_sync.arrive_and_wait(); // (C) wait for cancelation to finish, before unlock. + cancel_sync.arrive_and_wait(); // (C) wait for cancellation to finish, before unlock. } } @@ -206,9 +206,9 @@ void TestSharedMutexCancelWriter() // This sync is crucial. It is needed to hold `lock` long enough. // It guarantees that every canceled thread will find `sm` blocked, and thus will begin to wait. - // Wait() call is required for cancelation. Otherwise, fastpath acquire w/o wait will not generate exception. + // Wait() call is required for cancellation. Otherwise, fastpath acquire w/o wait will not generate exception. // And this is the desired behaviour. - sync.arrive_and_wait(); // (B) wait for cancelation to finish, before unlock. + sync.arrive_and_wait(); // (B) wait for cancellation to finish, before unlock. } catch (DB::Exception & e) { @@ -363,7 +363,7 @@ TEST(Threading, PerfTestSharedMutexRWCancelableDisabled) { PerfTestSharedMutexRW TEST(Threading, PerfTestSharedMutexRWFast) { PerfTestSharedMutexRW(); } TEST(Threading, PerfTestSharedMutexRWStd) { PerfTestSharedMutexRW(); } -#ifdef OS_LINUX /// These tests require cancelability +#ifdef OS_LINUX /// These tests require cancellability TEST(Threading, SharedMutexCancelReaderCancelableEnabled) { TestSharedMutexCancelReader(); } TEST(Threading, SharedMutexCancelWriterCancelableEnabled) { TestSharedMutexCancelWriter(); } From 09c1cecb01f2b4ad339f4d0641351b3c0363ad3a Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Tue, 10 Jan 2023 10:56:13 +0800 Subject: [PATCH 133/262] fix build error --- src/Functions/dateDiff.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Functions/dateDiff.cpp b/src/Functions/dateDiff.cpp index 60668f81edf..d43ef2d4caf 100644 --- a/src/Functions/dateDiff.cpp +++ b/src/Functions/dateDiff.cpp @@ -225,8 +225,8 @@ public: } else if constexpr (std::is_same_v>>) { - auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, timezone_x); - auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, timezone_y); + auto x_day_of_week = TransformDateTime64(transform_x.getScaleMultiplier()).execute(x, 0, timezone_x); + auto y_day_of_week = TransformDateTime64(transform_y.getScaleMultiplier()).execute(y, 0, timezone_y); if ((x_day_of_week > y_day_of_week) || ((x_day_of_week == y_day_of_week) && (a_comp.time.hour > b_comp.time.hour)) || ((a_comp.time.hour == b_comp.time.hour) && ((a_comp.time.minute > b_comp.time.minute) From 7aef7d95de16d6e69d603f14c2fae31d30f98e17 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 Jan 2023 09:15:07 +0000 Subject: [PATCH 134/262] Small fixes for keeper_map tests --- tests/integration/test_keeper_map/test.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 71f6343101a..d2a3d4f3748 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -47,13 +47,25 @@ def remove_children(client, path): def test_create_keeper_map(started_cluster): + node.query("DROP TABLE IF EXISTS test_keeper_map SYNC") + node.query("DROP TABLE IF EXISTS test_keeper_map_another SYNC") + node.query( "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" ) zk_client = get_genuine_zk() def assert_children_size(path, expected_size): - assert len(zk_client.get_children(path)) == expected_size + children_size = 0 + # 4 secs should be more than enough for replica to sync + for _ in range(10): + children_size = len(zk_client.get_children(path)) + if children_size == expected_size: + return + sleep(0.4) + assert ( + False + ), f"Invalid number of children for '{path}': actual {children_size}, expected {expected_size}" def assert_root_children_size(expected_size): assert_children_size("/test_keeper_map/test1", expected_size) @@ -138,6 +150,8 @@ def test_create_drop_keeper_map_concurrent(started_cluster): def test_keeper_map_without_zk(started_cluster): + node.query("DROP TABLE IF EXISTS test_keeper_map SYNC") + def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: pm.drop_instance_zk_connections(node) From 74ba9d1f2bdced9ea69093f4745f44d1336c3298 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Tue, 10 Jan 2023 10:15:38 +0100 Subject: [PATCH 135/262] Updated test to .sh to use unique database name - 40907 Parameterized views as table functions --- .../02428_parameterized_view.reference | 7 ++ .../0_stateless/02428_parameterized_view.sh | 88 +++++++++++++++++++ .../0_stateless/02428_parameterized_view.sql | 88 ------------------- 3 files changed, 95 insertions(+), 88 deletions(-) create mode 100755 tests/queries/0_stateless/02428_parameterized_view.sh delete mode 100644 tests/queries/0_stateless/02428_parameterized_view.sql diff --git a/tests/queries/0_stateless/02428_parameterized_view.reference b/tests/queries/0_stateless/02428_parameterized_view.reference index da3ad8a9a3c..db3ffd0b01e 100644 --- a/tests/queries/0_stateless/02428_parameterized_view.reference +++ b/tests/queries/0_stateless/02428_parameterized_view.reference @@ -1,5 +1,6 @@ 20 20 +ERROR 10 50 SELECT @@ -12,9 +13,15 @@ FROM FROM default.test_02428_Catalog WHERE Price = _CAST(10, \'UInt64\') ) AS test_02428_pv1 +ERROR +ERROR +ERROR 50 +ERROR 10 +ERROR 20 +ERROR 30 20 30 diff --git a/tests/queries/0_stateless/02428_parameterized_view.sh b/tests/queries/0_stateless/02428_parameterized_view.sh new file mode 100755 index 00000000000..44c1976a654 --- /dev/null +++ b/tests/queries/0_stateless/02428_parameterized_view.sh @@ -0,0 +1,88 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +export CLICKHOUSE_TEST_UNIQUE_NAME="${CLICKHOUSE_TEST_NAME}_${CLICKHOUSE_DATABASE}" + +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv1" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv2" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv3" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv4" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv5" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv6" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_pv7" +$CLICKHOUSE_CLIENT -q "DROP VIEW IF EXISTS test_02428_v1" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02428_Catalog" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1" +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog" +$CLICKHOUSE_CLIENT -q "DROP DATABASE IF EXISTS ${CLICKHOUSE_TEST_UNIQUE_NAME}" + +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory" + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3)" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2)" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1)" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1(price=20)" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`test_02428_pv1\`(price=20)" + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv1" 2>&1 | grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK' +$CLICKHOUSE_CLIENT --param_p 10 -q "SELECT Price FROM test_02428_pv1(price={p:UInt64})" + +$CLICKHOUSE_CLIENT --param_l 1 -q "SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64})" +$CLICKHOUSE_CLIENT -q "DETACH TABLE test_02428_pv1" +$CLICKHOUSE_CLIENT -q "ATTACH TABLE test_02428_pv1" + +$CLICKHOUSE_CLIENT -q "EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10)" + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2)" 2>&1 | grep -Fq "NOT_IMPLEMENTED" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM pv123(price=20)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10" + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_v1(price=10)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}" + +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50,quantity=2)" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv2(price=50)" 2>&1 | grep -Fq "UNKNOWN_QUERY_PARAMETER" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv3(price=10)" + +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}" 2>&1 | grep -Fq "DUPLICATE_COLUMN" && echo 'ERROR' || echo 'OK' + +$CLICKHOUSE_CLIENT -q "CREATE DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" +$CLICKHOUSE_CLIENT -q "CREATE TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory" +$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Pen', 10, 3)" +$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Book', 50, 2)" +$CLICKHOUSE_CLIENT -q "INSERT INTO ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog VALUES ('Paper', 20, 1)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1 AS SELECT * FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog WHERE Price={price:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1(price=20)" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM \`${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1\`(price=20)" 2>&1 | grep -Fq "UNKNOWN_FUNCTION" && echo 'ERROR' || echo 'OK' + + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8)" +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT Price FROM test_02428_pv5(price=30, quantity=8, limit=1)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv6(price=10)" +$CLICKHOUSE_CLIENT -q "CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price" +$CLICKHOUSE_CLIENT -q "SELECT * FROM test_02428_pv7(price=10)" + +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv1" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv2" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv3" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv5" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv6" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_pv7" +$CLICKHOUSE_CLIENT -q "DROP VIEW test_02428_v1" +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02428_Catalog" +$CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.pv1" +$CLICKHOUSE_CLIENT -q "DROP TABLE ${CLICKHOUSE_TEST_UNIQUE_NAME}.Catalog" +$CLICKHOUSE_CLIENT -q "DROP DATABASE ${CLICKHOUSE_TEST_UNIQUE_NAME}" \ No newline at end of file diff --git a/tests/queries/0_stateless/02428_parameterized_view.sql b/tests/queries/0_stateless/02428_parameterized_view.sql deleted file mode 100644 index fbc1d8b2970..00000000000 --- a/tests/queries/0_stateless/02428_parameterized_view.sql +++ /dev/null @@ -1,88 +0,0 @@ -DROP VIEW IF EXISTS test_02428_pv1; -DROP VIEW IF EXISTS test_02428_pv2; -DROP VIEW IF EXISTS test_02428_pv3; -DROP VIEW IF EXISTS test_02428_pv4; -DROP VIEW IF EXISTS test_02428_pv5; -DROP VIEW IF EXISTS test_02428_pv6; -DROP VIEW IF EXISTS test_02428_pv7; -DROP VIEW IF EXISTS test_02428_v1; -DROP TABLE IF EXISTS test_02428_Catalog; -DROP TABLE IF EXISTS db_02428.pv1; -DROP TABLE IF EXISTS db_02428.Catalog; -DROP DATABASE IF EXISTS db_02428; - -CREATE TABLE test_02428_Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; - -INSERT INTO test_02428_Catalog VALUES ('Pen', 10, 3); -INSERT INTO test_02428_Catalog VALUES ('Book', 50, 2); -INSERT INTO test_02428_Catalog VALUES ('Paper', 20, 1); - -CREATE VIEW test_02428_pv1 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64}; -SELECT Price FROM test_02428_pv1(price=20); -SELECT Price FROM `test_02428_pv1`(price=20); - -set param_p=10; -SELECT Price FROM test_02428_pv1; -- { serverError UNKNOWN_QUERY_PARAMETER} -SELECT Price FROM test_02428_pv1(price={p:UInt64}); - -set param_l=1; -SELECT Price FROM test_02428_pv1(price=50) LIMIT ({l:UInt64}); - -DETACH TABLE test_02428_pv1; -ATTACH TABLE test_02428_pv1; - -EXPLAIN SYNTAX SELECT * from test_02428_pv1(price=10); - -INSERT INTO test_02428_pv1 VALUES ('Bag', 50, 2); -- { serverError NOT_IMPLEMENTED} - -SELECT Price FROM pv123(price=20); -- { serverError UNKNOWN_FUNCTION } - -CREATE VIEW test_02428_v1 AS SELECT * FROM test_02428_Catalog WHERE Price=10; - -SELECT Price FROM test_02428_v1(price=10); -- { serverError UNKNOWN_FUNCTION } - -CREATE VIEW test_02428_pv2 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={quantity:UInt64}; -SELECT Price FROM test_02428_pv2(price=50,quantity=2); - -SELECT Price FROM test_02428_pv2(price=50); -- { serverError UNKNOWN_QUERY_PARAMETER} - -CREATE VIEW test_02428_pv3 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity=3; -SELECT Price FROM test_02428_pv3(price=10); - -CREATE VIEW test_02428_pv4 AS SELECT * FROM test_02428_Catalog WHERE Price={price:UInt64} AND Quantity={price:UInt64}; -- {serverError DUPLICATE_COLUMN} - -CREATE DATABASE db_02428; - -CREATE TABLE db_02428.Catalog (Name String, Price UInt64, Quantity UInt64) ENGINE = Memory; - -INSERT INTO db_02428.Catalog VALUES ('Pen', 10, 3); -INSERT INTO db_02428.Catalog VALUES ('Book', 50, 2); -INSERT INTO db_02428.Catalog VALUES ('Paper', 20, 1); - -CREATE VIEW db_02428.pv1 AS SELECT * FROM db_02428.Catalog WHERE Price={price:UInt64}; -SELECT Price FROM db_02428.pv1(price=20); -SELECT Price FROM `db_02428.pv1`(price=20); -- { serverError UNKNOWN_FUNCTION } - -INSERT INTO test_02428_Catalog VALUES ('Book2', 30, 8); -INSERT INTO test_02428_Catalog VALUES ('Book3', 30, 8); - -CREATE VIEW test_02428_pv5 AS SELECT Price FROM test_02428_Catalog WHERE {price:UInt64} HAVING Quantity in (SELECT {quantity:UInt64}) LIMIT {limit:UInt64}; -SELECT Price FROM test_02428_pv5(price=30, quantity=8,limit=1); - -CREATE VIEW test_02428_pv6 AS SELECT Price+{price:UInt64} FROM test_02428_Catalog GROUP BY Price+{price:UInt64} ORDER BY Price+{price:UInt64}; -SELECT * FROM test_02428_pv6(price=10); - -CREATE VIEW test_02428_pv7 AS SELECT Price/{price:UInt64} FROM test_02428_Catalog ORDER BY Price; -SELECT * FROM test_02428_pv7(price=10); - -DROP VIEW test_02428_pv1; -DROP VIEW test_02428_pv2; -DROP VIEW test_02428_pv3; -DROP VIEW test_02428_pv5; -DROP VIEW test_02428_pv6; -DROP VIEW test_02428_pv7; -DROP VIEW test_02428_v1; -DROP TABLE test_02428_Catalog; -DROP TABLE db_02428.pv1; -DROP TABLE db_02428.Catalog; -DROP DATABASE db_02428; \ No newline at end of file From 613ff3387dc885677f61d2a4250100c91dc06425 Mon Sep 17 00:00:00 2001 From: kssenii Date: Mon, 9 Jan 2023 16:33:26 +0100 Subject: [PATCH 136/262] Better --- src/Storages/FileLog/StorageFileLog.cpp | 30 +++++++++++++++++++------ 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/src/Storages/FileLog/StorageFileLog.cpp b/src/Storages/FileLog/StorageFileLog.cpp index b1b54a1700a..5835dc3294f 100644 --- a/src/Storages/FileLog/StorageFileLog.cpp +++ b/src/Storages/FileLog/StorageFileLog.cpp @@ -47,6 +47,8 @@ namespace const auto MAX_THREAD_WORK_DURATION_MS = 60000; } +static constexpr auto TMP_SUFFIX = ".tmp"; + StorageFileLog::StorageFileLog( const StorageID & table_id_, ContextPtr context_, @@ -234,23 +236,24 @@ void StorageFileLog::serialize(UInt64 inode, const FileMeta & file_meta) const { checkOffsetIsValid(file_meta.file_name, file_meta.last_writen_position); } - else - { - disk->createFile(full_path); - } + + std::string tmp_path = full_path + TMP_SUFFIX; + disk->removeFileIfExists(tmp_path); try { - auto out = disk->writeFile(full_path); + disk->createFile(tmp_path); + auto out = disk->writeFile(tmp_path); writeIntText(inode, *out); writeChar('\n', *out); writeIntText(file_meta.last_writen_position, *out); } catch (...) { - disk->removeFile(full_path); + disk->removeFileIfExists(tmp_path); throw; } + disk->replaceFile(tmp_path, full_path); } void StorageFileLog::deserialize() @@ -258,15 +261,28 @@ void StorageFileLog::deserialize() if (!disk->exists(metadata_base_path)) return; + std::vector files_to_remove; + /// In case of single file (not a watched directory), /// iterated directory always has one file inside. for (const auto dir_iter = disk->iterateDirectory(metadata_base_path); dir_iter->isValid(); dir_iter->next()) { - auto [metadata, inode] = readMetadata(dir_iter->name()); + const auto & filename = dir_iter->name(); + if (filename.ends_with(TMP_SUFFIX)) + { + files_to_remove.push_back(getFullMetaPath(filename)); + continue; + } + + auto [metadata, inode] = readMetadata(filename); if (!metadata) continue; + file_infos.meta_by_inode.emplace(inode, metadata); } + + for (const auto & file : files_to_remove) + disk->removeFile(file); } UInt64 StorageFileLog::getInode(const String & file_name) From c95925a5abae54412ea20746beae712d838d2825 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 10 Jan 2023 11:16:29 +0100 Subject: [PATCH 137/262] Fixed tests --- src/Analyzer/Passes/QueryAnalysisPass.cpp | 51 ++++++++++++------- .../02337_analyzer_columns_basic.sql | 2 +- 2 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/Analyzer/Passes/QueryAnalysisPass.cpp b/src/Analyzer/Passes/QueryAnalysisPass.cpp index d7a686d4dfa..4aa6422b6b8 100644 --- a/src/Analyzer/Passes/QueryAnalysisPass.cpp +++ b/src/Analyzer/Passes/QueryAnalysisPass.cpp @@ -2870,7 +2870,10 @@ IdentifierResolveResult QueryAnalyzer::tryResolveIdentifierInParentScopes(const if (resolved_identifier) { - bool is_cte = resolved_identifier->as() && resolved_identifier->as()->isCTE(); + auto * subquery_node = resolved_identifier->as(); + auto * union_node = resolved_identifier->as(); + + bool is_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); /** From parent scopes we can resolve table identifiers only as CTE. * Example: SELECT (SELECT 1 FROM a) FROM test_table AS a; @@ -4119,6 +4122,10 @@ ProjectionNames QueryAnalyzer::resolveFunction(QueryTreeNodePtr & node, Identifi in_second_argument = std::move(in_second_argument_query_node); } + else + { + resolveExpressionNode(in_second_argument, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); + } } /// Initialize function argument columns @@ -4708,13 +4715,29 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id { node = tryResolveIdentifier({unresolved_identifier, IdentifierLookupContext::TABLE_EXPRESSION}, scope).resolved_identifier; - /// If table identifier is resolved as CTE clone it - bool resolved_as_cte = node && node->as() && node->as()->isCTE(); + /// If table identifier is resolved as CTE clone it and resolve + auto * subquery_node = node->as(); + auto * union_node = node->as(); + bool resolved_as_cte = (subquery_node && subquery_node->isCTE()) || (union_node && union_node->isCTE()); if (resolved_as_cte) { node = node->clone(); - node->as().setIsCTE(false); + subquery_node = node->as(); + union_node = node->as(); + + if (subquery_node) + subquery_node->setIsCTE(false); + else + union_node->setIsCTE(false); + + IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); + subquery_scope.subquery_depth = scope.subquery_depth + 1; + + if (subquery_node) + resolveQuery(node, subquery_scope); + else + resolveUnion(node, subquery_scope); } } @@ -4830,6 +4853,9 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id IdentifierResolveScope subquery_scope(node, &scope /*parent_scope*/); subquery_scope.subquery_depth = scope.subquery_depth + 1; + ++subquery_counter; + std::string projection_name = "_subquery_" + std::to_string(subquery_counter); + if (node_type == QueryTreeNodeType::QUERY) resolveQuery(node, subquery_scope); else @@ -4838,9 +4864,8 @@ ProjectionNames QueryAnalyzer::resolveExpressionNode(QueryTreeNodePtr & node, Id if (!allow_table_expression) evaluateScalarSubqueryIfNeeded(node, subquery_scope.subquery_depth, subquery_scope.context); - ++subquery_counter; if (result_projection_names.empty()) - result_projection_names.push_back("_subquery_" + std::to_string(subquery_counter)); + result_projection_names.push_back(std::move(projection_name)); break; } @@ -5187,11 +5212,6 @@ void QueryAnalyzer::initializeQueryJoinTreeNode(QueryTreeNodePtr & join_tree_nod if (resolved_identifier_query_node || resolved_identifier_union_node) { - if (resolved_identifier_query_node) - resolved_identifier_query_node->setIsCTE(false); - else - resolved_identifier_union_node->setIsCTE(false); - if (table_expression_modifiers.has_value()) { throw Exception(ErrorCodes::UNSUPPORTED_METHOD, @@ -5428,14 +5448,7 @@ void QueryAnalyzer::resolveQueryJoinTreeNode(QueryTreeNodePtr & join_tree_node, [[fallthrough]]; case QueryTreeNodeType::UNION: { - IdentifierResolveScope subquery_scope(join_tree_node, &scope); - subquery_scope.subquery_depth = scope.subquery_depth + 1; - - if (from_node_type == QueryTreeNodeType::QUERY) - resolveQuery(join_tree_node, subquery_scope); - else if (from_node_type == QueryTreeNodeType::UNION) - resolveUnion(join_tree_node, subquery_scope); - + resolveExpressionNode(join_tree_node, scope, false /*allow_lambda_expression*/, true /*allow_table_expression*/); break; } case QueryTreeNodeType::TABLE_FUNCTION: diff --git a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql index 76f9f8b25e4..368a5670d17 100644 --- a/tests/queries/0_stateless/02337_analyzer_columns_basic.sql +++ b/tests/queries/0_stateless/02337_analyzer_columns_basic.sql @@ -31,7 +31,7 @@ INSERT INTO test_table VALUES (0, 'Value'); SELECT 'Table access without table name qualification'; SELECT test_id FROM test_table; -- { serverError 47 } -SELECT test_id FROM test_unknown_table; -- { serverError 60 } +SELECT test_id FROM test_unknown_table; -- { serverError 47 } DESCRIBE (SELECT id FROM test_table); SELECT id FROM test_table; From d7ca0c04fac6a421c4e0570bae85dd38d9ea153a Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 Jan 2023 11:35:02 +0100 Subject: [PATCH 138/262] remove drop if exists --- tests/integration/test_keeper_map/test.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index d2a3d4f3748..859481de188 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -47,9 +47,6 @@ def remove_children(client, path): def test_create_keeper_map(started_cluster): - node.query("DROP TABLE IF EXISTS test_keeper_map SYNC") - node.query("DROP TABLE IF EXISTS test_keeper_map_another SYNC") - node.query( "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" ) @@ -150,8 +147,6 @@ def test_create_drop_keeper_map_concurrent(started_cluster): def test_keeper_map_without_zk(started_cluster): - node.query("DROP TABLE IF EXISTS test_keeper_map SYNC") - def assert_keeper_exception_after_partition(query): with PartitionManager() as pm: pm.drop_instance_zk_connections(node) From 134cc3e2736887cc41e5d4bfb7914031e774da00 Mon Sep 17 00:00:00 2001 From: Antonio Andelic Date: Tue, 10 Jan 2023 14:16:28 +0100 Subject: [PATCH 139/262] Change table name --- tests/integration/test_keeper_map/test.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/test_keeper_map/test.py b/tests/integration/test_keeper_map/test.py index 859481de188..2e80ada963f 100644 --- a/tests/integration/test_keeper_map/test.py +++ b/tests/integration/test_keeper_map/test.py @@ -154,35 +154,35 @@ def test_keeper_map_without_zk(started_cluster): assert "Coordination::Exception" in error assert_keeper_exception_after_partition( - "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) node.query( - "CREATE TABLE test_keeper_map (key UInt64, value UInt64) ENGINE = KeeperMap('/test1') PRIMARY KEY(key);" + "CREATE TABLE test_keeper_map_without_zk (key UInt64, value UInt64) ENGINE = KeeperMap('/test_without_zk') PRIMARY KEY(key);" ) assert_keeper_exception_after_partition( - "INSERT INTO test_keeper_map VALUES (1, 11)" + "INSERT INTO test_keeper_map_without_zk VALUES (1, 11)" ) - node.query("INSERT INTO test_keeper_map VALUES (1, 11)") + node.query("INSERT INTO test_keeper_map_without_zk VALUES (1, 11)") - assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map") - node.query("SELECT * FROM test_keeper_map") + assert_keeper_exception_after_partition("SELECT * FROM test_keeper_map_without_zk") + node.query("SELECT * FROM test_keeper_map_without_zk") with PartitionManager() as pm: pm.drop_instance_zk_connections(node) node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map") + error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of connection issues" in error - node.query("SELECT * FROM test_keeper_map") + node.query("SELECT * FROM test_keeper_map_without_zk") client = get_genuine_zk() - remove_children(client, "/test_keeper_map/test1") + remove_children(client, "/test_keeper_map/test_without_zk") node.restart_clickhouse(60) - error = node.query_and_get_error("SELECT * FROM test_keeper_map") + error = node.query_and_get_error("SELECT * FROM test_keeper_map_without_zk") assert "Failed to activate table because of invalid metadata in ZooKeeper" in error - node.query("DETACH TABLE test_keeper_map") + node.query("DETACH TABLE test_keeper_map_without_zk") client.stop() From 4673b3fe1de44a030ca53ced88bd8d0efe9f94d6 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Jan 2023 16:31:01 +0100 Subject: [PATCH 140/262] Revert "Revert "Custom reading for mutation"" --- src/Interpreters/Context.h | 6 - src/Interpreters/MutationsInterpreter.cpp | 418 +++++++++++++----- src/Interpreters/MutationsInterpreter.h | 72 ++- .../QueryPlan/ReadFromMergeTree.cpp | 1 - .../Sources/ThrowingExceptionSource.h | 32 ++ src/Storages/IStorage.h | 2 + src/Storages/MergeTree/MergeTreeData.h | 2 + .../MergeTree/MergeTreeDataSelectExecutor.h | 14 +- .../MergeTree/MergeTreeSequentialSource.cpp | 112 ++++- .../MergeTree/MergeTreeSequentialSource.h | 13 + src/Storages/MergeTree/MutateTask.cpp | 9 +- 11 files changed, 539 insertions(+), 142 deletions(-) create mode 100644 src/Processors/Sources/ThrowingExceptionSource.h diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h index 4b7d0685ba3..58478ab79b8 100644 --- a/src/Interpreters/Context.h +++ b/src/Interpreters/Context.h @@ -377,9 +377,6 @@ private: inline static ContextPtr global_context_instance; - /// A flag, used to mark if reader needs to apply deleted rows mask. - bool apply_deleted_mask = true; - /// Temporary data for query execution accounting. TemporaryDataOnDiskScopePtr temp_data_on_disk; public: @@ -973,9 +970,6 @@ public: bool isInternalQuery() const { return is_internal_query; } void setInternalQuery(bool internal) { is_internal_query = internal; } - bool applyDeletedMask() const { return apply_deleted_mask; } - void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } - ActionLocksManagerPtr getActionLocksManager() const; enum class ApplicationType diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index 3960e0759d6..f8627f1ff85 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -30,6 +30,9 @@ #include #include #include +#include +#include +#include namespace DB @@ -190,7 +193,8 @@ ColumnDependencies getAllColumnDependencies(const StorageMetadataPtr & metadata_ bool isStorageTouchedByMutations( - const StoragePtr & storage, + MergeTreeData & storage, + MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, ContextMutablePtr context_copy) @@ -199,19 +203,15 @@ bool isStorageTouchedByMutations( return false; bool all_commands_can_be_skipped = true; - auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast(storage); for (const MutationCommand & command : commands) { if (!command.predicate) /// The command touches all rows. return true; - if (command.partition && !storage_from_merge_tree_data_part) - throw Exception("ALTER UPDATE/DELETE ... IN PARTITION is not supported for non-MergeTree tables", ErrorCodes::NOT_IMPLEMENTED); - - if (command.partition && storage_from_merge_tree_data_part) + if (command.partition) { - const String partition_id = storage_from_merge_tree_data_part->getPartitionIDFromQuery(command.partition, context_copy); - if (partition_id == storage_from_merge_tree_data_part->getPartitionId()) + const String partition_id = storage.getPartitionIDFromQuery(command.partition, context_copy); + if (partition_id == source_part->info.partition_id) all_commands_can_be_skipped = false; } else @@ -229,13 +229,15 @@ bool isStorageTouchedByMutations( context_copy->setSetting("allow_asynchronous_read_from_io_pool_for_merge_tree", false); context_copy->setSetting("max_streams_for_merge_tree_reading", Field(0)); - ASTPtr select_query = prepareQueryAffectedAST(commands, storage, context_copy); + ASTPtr select_query = prepareQueryAffectedAST(commands, storage.shared_from_this(), context_copy); + + auto storage_from_part = std::make_shared(source_part); /// Interpreter must be alive, when we use result of execute() method. /// For some reason it may copy context and give it into ExpressionTransform /// after that we will use context from destroyed stack frame in our stream. InterpreterSelectQuery interpreter( - select_query, context_copy, storage, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections()); + select_query, context_copy, storage_from_part, metadata_snapshot, SelectQueryOptions().ignoreLimits().ignoreProjections()); auto io = interpreter.execute(); PullingPipelineExecutor executor(io.pipeline); @@ -288,6 +290,57 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( return command.predicate ? command.predicate->clone() : partition_predicate_as_ast_func; } +MutationsInterpreter::Source::Source(StoragePtr storage_) : storage(std::move(storage_)) +{ +} + +MutationsInterpreter::Source::Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_) + : data(&storage_), part(std::move(source_part_)) +{ +} + +StorageSnapshotPtr MutationsInterpreter::Source::getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const +{ + if (data) + return data->getStorageSnapshot(snapshot_, context_); + + return storage->getStorageSnapshot(snapshot_, context_); +} + +StoragePtr MutationsInterpreter::Source::getStorage() const +{ + if (data) + return data->shared_from_this(); + + return storage; +} + +const MergeTreeData * MutationsInterpreter::Source::getMergeTreeData() const +{ + if (data) + return data; + + return dynamic_cast(storage.get()); +} + +bool MutationsInterpreter::Source::supportsLightweightDelete() const +{ + if (part) + return part->supportLightweightDeleteMutate(); + + return storage->supportsLightweightDelete(); +} + + +bool MutationsInterpreter::Source::hasLightweightDeleteMask() const +{ + return part && part->hasLightweightDelete(); +} + +bool MutationsInterpreter::Source::materializeTTLRecalculateOnly() const +{ + return data && data->getSettings()->materialize_ttl_recalculate_only; +} MutationsInterpreter::MutationsInterpreter( StoragePtr storage_, @@ -297,7 +350,45 @@ MutationsInterpreter::MutationsInterpreter( bool can_execute_, bool return_all_columns_, bool return_deleted_rows_) - : storage(std::move(storage_)) + : MutationsInterpreter( + Source(std::move(storage_)), + metadata_snapshot_, std::move(commands_), std::move(context_), + can_execute_, return_all_columns_, return_deleted_rows_) +{ + if (can_execute_ && dynamic_cast(source.getStorage().get())) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Cannot execute mutation for {}. Mutation should be applied to every part separately.", + source.getStorage()->getName()); + } +} + +MutationsInterpreter::MutationsInterpreter( + MergeTreeData & storage_, + MergeTreeData::DataPartPtr source_part_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_, + bool return_deleted_rows_) + : MutationsInterpreter( + Source(storage_, std::move(source_part_)), + metadata_snapshot_, std::move(commands_), std::move(context_), + can_execute_, return_all_columns_, return_deleted_rows_) +{ +} + +MutationsInterpreter::MutationsInterpreter( + Source source_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_, + bool return_deleted_rows_) + : source(std::move(source_)) , metadata_snapshot(metadata_snapshot_) , commands(std::move(commands_)) , context(Context::createCopy(context_)) @@ -306,12 +397,12 @@ MutationsInterpreter::MutationsInterpreter( , return_all_columns(return_all_columns_) , return_deleted_rows(return_deleted_rows_) { - mutation_ast = prepare(!can_execute); + prepare(!can_execute); } -static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPtr & metadata_snapshot) +static NameSet getKeyColumns(const MutationsInterpreter::Source & source, const StorageMetadataPtr & metadata_snapshot) { - const MergeTreeData * merge_tree_data = dynamic_cast(storage.get()); + const MergeTreeData * merge_tree_data = source.getMergeTreeData(); if (!merge_tree_data) return {}; @@ -333,21 +424,12 @@ static NameSet getKeyColumns(const StoragePtr & storage, const StorageMetadataPt return key_columns; } -static bool materializeTTLRecalculateOnly(const StoragePtr & storage) -{ - auto storage_from_merge_tree_data_part = std::dynamic_pointer_cast(storage); - if (!storage_from_merge_tree_data_part) - return false; - - return storage_from_merge_tree_data_part->materializeTTLRecalculateOnly(); -} - static void validateUpdateColumns( - const StoragePtr & storage, + const MutationsInterpreter::Source & source, const StorageMetadataPtr & metadata_snapshot, const NameSet & updated_columns, const std::unordered_map & column_to_affected_materialized) { - NameSet key_columns = getKeyColumns(storage, metadata_snapshot); + NameSet key_columns = getKeyColumns(source, metadata_snapshot); for (const String & column_name : updated_columns) { @@ -364,7 +446,7 @@ static void validateUpdateColumns( /// Allow to override value of lightweight delete filter virtual column if (!found && column_name == LightweightDeleteDescription::FILTER_COLUMN.name) { - if (!storage->supportsLightweightDelete()) + if (!source.supportsLightweightDelete()) throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Lightweight delete is not supported for table"); found = true; } @@ -427,7 +509,7 @@ static std::optional> getExpressionsOfUpdatedNestedSubcolumn return res; } -ASTPtr MutationsInterpreter::prepare(bool dry_run) +void MutationsInterpreter::prepare(bool dry_run) { if (is_prepared) throw Exception("MutationsInterpreter is already prepared. It is a bug.", ErrorCodes::LOGICAL_ERROR); @@ -448,7 +530,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } NameSet updated_columns; - bool materialize_ttl_recalculate_only = materializeTTLRecalculateOnly(storage); + bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly(); for (const MutationCommand & command : commands) { @@ -481,7 +563,7 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) } } - validateUpdateColumns(storage, metadata_snapshot, updated_columns, column_to_affected_materialized); + validateUpdateColumns(source, metadata_snapshot, updated_columns, column_to_affected_materialized); } dependencies = getAllColumnDependencies(metadata_snapshot, updated_columns); @@ -778,15 +860,10 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) stages_copy.back().filters = stage.filters; } - const ASTPtr select_query = prepareInterpreterSelectQuery(stages_copy, /* dry_run = */ true); - InterpreterSelectQuery interpreter{ - select_query, context, storage, metadata_snapshot, - SelectQueryOptions().analyze(/* dry_run = */ false).ignoreLimits().ignoreProjections()}; + prepareMutationStages(stages_copy, true); - auto first_stage_header = interpreter.getSampleBlock(); QueryPlan plan; - auto source = std::make_shared(first_stage_header); - plan.addStep(std::make_unique(Pipe(std::move(source)))); + initQueryPlan(stages_copy.front(), plan); auto pipeline = addStreamsForLaterStages(stages_copy, plan); updated_header = std::make_unique(pipeline.getHeader()); } @@ -801,21 +878,18 @@ ASTPtr MutationsInterpreter::prepare(bool dry_run) is_prepared = true; - return prepareInterpreterSelectQuery(stages, dry_run); + prepareMutationStages(stages, dry_run); } -ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & prepared_stages, bool dry_run) +void MutationsInterpreter::prepareMutationStages(std::vector & prepared_stages, bool dry_run) { - auto storage_snapshot = storage->getStorageSnapshot(metadata_snapshot, context); + auto storage_snapshot = source.getStorageSnapshot(metadata_snapshot, context); auto options = GetColumnsOptions(GetColumnsOptions::AllPhysical).withExtendedObjects(); auto all_columns = storage_snapshot->getColumns(options); /// Add _row_exists column if it is present in the part - if (auto part_storage = dynamic_pointer_cast(storage)) - { - if (part_storage->hasLightweightDeletedMask()) - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - } + if (source.hasLightweightDeleteMask()) + all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); /// Next, for each stage calculate columns changed by this and previous stages. for (size_t i = 0; i < prepared_stages.size(); ++i) @@ -839,7 +913,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// Now, calculate `expressions_chain` for each stage except the first. /// Do it backwards to propagate information about columns required as input for a stage to the previous stage. - for (size_t i = prepared_stages.size() - 1; i > 0; --i) + for (int64_t i = prepared_stages.size() - 1; i >= 0; --i) { auto & stage = prepared_stages[i]; @@ -859,7 +933,7 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & /// e.g. ALTER referencing the same table in scalar subquery bool execute_scalar_subqueries = !dry_run; auto syntax_result = TreeRewriter(context).analyze( - all_asts, all_columns, storage, storage_snapshot, + all_asts, all_columns, source.getStorage(), storage_snapshot, false, true, execute_scalar_subqueries); if (execute_scalar_subqueries && context->hasQueryContext()) @@ -897,6 +971,9 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & } } + if (i == 0 && actions_chain.steps.empty()) + actions_chain.lastStep(syntax_result->required_source_columns); + /// Remove all intermediate columns. actions_chain.addStep(); actions_chain.getLastStep().required_output.clear(); @@ -908,49 +985,198 @@ ASTPtr MutationsInterpreter::prepareInterpreterSelectQuery(std::vector & actions_chain.finalize(); - /// Propagate information about columns needed as input. - for (const auto & column : actions_chain.steps.front()->getRequiredColumns()) - prepared_stages[i - 1].output_columns.insert(column.name); - } - - /// Execute first stage as a SELECT statement. - - auto select = std::make_shared(); - - select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared()); - for (const auto & column_name : prepared_stages[0].output_columns) - select->select()->children.push_back(std::make_shared(column_name)); - - /// Don't let select list be empty. - if (select->select()->children.empty()) - select->select()->children.push_back(std::make_shared(Field(0))); - - if (!prepared_stages[0].filters.empty()) - { - ASTPtr where_expression; - if (prepared_stages[0].filters.size() == 1) - where_expression = prepared_stages[0].filters[0]; - else + if (i) { - auto coalesced_predicates = std::make_shared(); - coalesced_predicates->name = "and"; - coalesced_predicates->arguments = std::make_shared(); - coalesced_predicates->children.push_back(coalesced_predicates->arguments); - coalesced_predicates->arguments->children = prepared_stages[0].filters; - where_expression = std::move(coalesced_predicates); + /// Propagate information about columns needed as input. + for (const auto & column : actions_chain.steps.front()->getRequiredColumns()) + prepared_stages[i - 1].output_columns.insert(column.name); + } + } +} + +/// This structure re-implements adding virtual columns while reading from MergeTree part. +/// It would be good to unify it with IMergeTreeSelectAlgorithm. +struct VirtualColumns +{ + struct ColumnAndPosition + { + ColumnWithTypeAndName column; + size_t position; + }; + + using Columns = std::vector; + + Columns virtuals; + Names columns_to_read; + + VirtualColumns(Names required_columns, const MergeTreeData::DataPartPtr & part) : columns_to_read(std::move(required_columns)) + { + for (size_t i = 0; i < columns_to_read.size(); ++i) + { + if (columns_to_read[i] == LightweightDeleteDescription::FILTER_COLUMN.name) + { + LoadedMergeTreeDataPartInfoForReader part_info_reader(part); + if (!part_info_reader.getColumns().contains(LightweightDeleteDescription::FILTER_COLUMN.name)) + { + ColumnWithTypeAndName mask_column; + mask_column.type = LightweightDeleteDescription::FILTER_COLUMN.type; + mask_column.column = mask_column.type->createColumnConst(0, 1); + mask_column.name = std::move(columns_to_read[i]); + + virtuals.emplace_back(ColumnAndPosition{.column = std::move(mask_column), .position = i}); + } + } + else if (columns_to_read[i] == "_partition_id") + { + ColumnWithTypeAndName column; + column.type = std::make_shared(); + column.column = column.type->createColumnConst(0, part->info.partition_id); + column.name = std::move(columns_to_read[i]); + + virtuals.emplace_back(ColumnAndPosition{.column = std::move(column), .position = i}); + } + } + + if (!virtuals.empty()) + { + Names columns_no_virtuals; + columns_no_virtuals.reserve(columns_to_read.size()); + size_t next_virtual = 0; + for (size_t i = 0; i < columns_to_read.size(); ++i) + { + if (next_virtual < virtuals.size() && i == virtuals[next_virtual].position) + ++next_virtual; + else + columns_no_virtuals.emplace_back(std::move(columns_to_read[i])); + } + + columns_to_read.swap(columns_no_virtuals); } - select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); } - return select; + void addVirtuals(QueryPlan & plan) + { + auto dag = std::make_unique(plan.getCurrentDataStream().header.getColumnsWithTypeAndName()); + + for (auto & column : virtuals) + { + const auto & adding_const = dag->addColumn(std::move(column.column)); + auto & outputs = dag->getOutputs(); + outputs.insert(outputs.begin() + column.position, &adding_const); + } + + auto step = std::make_unique(plan.getCurrentDataStream(), std::move(dag)); + plan.addStep(std::move(step)); + } +}; + +void MutationsInterpreter::Source::read( + Stage & first_stage, + QueryPlan & plan, + const StorageMetadataPtr & snapshot_, + const ContextPtr & context_, + bool apply_deleted_mask_, + bool can_execute_) const +{ + auto required_columns = first_stage.expressions_chain.steps.front()->getRequiredColumns().getNames(); + auto storage_snapshot = getStorageSnapshot(snapshot_, context_); + + if (!can_execute_) + { + auto header = storage_snapshot->getSampleBlockForColumns(required_columns); + auto callback = []() + { + return DB::Exception(ErrorCodes::LOGICAL_ERROR, "Cannot execute a mutation because can_execute flag set to false"); + }; + + Pipe pipe(std::make_shared(header, callback)); + + auto read_from_pipe = std::make_unique(std::move(pipe)); + plan.addStep(std::move(read_from_pipe)); + return; + } + + if (data) + { + const auto & steps = first_stage.expressions_chain.steps; + const auto & names = first_stage.filter_column_names; + size_t num_filters = names.size(); + + ActionsDAGPtr filter; + if (!first_stage.filter_column_names.empty()) + { + + ActionsDAG::NodeRawConstPtrs nodes(num_filters); + for (size_t i = 0; i < num_filters; ++i) + nodes[i] = &steps[i]->actions()->findInOutputs(names[i]); + + filter = ActionsDAG::buildFilterActionsDAG(nodes, {}, context_); + } + + VirtualColumns virtual_columns(std::move(required_columns), part); + + createMergeTreeSequentialSource( + plan, *data, storage_snapshot, part, std::move(virtual_columns.columns_to_read), apply_deleted_mask_, filter, context_, + &Poco::Logger::get("MutationsInterpreter")); + + virtual_columns.addVirtuals(plan); + } + else + { + auto select = std::make_shared(); + + select->setExpression(ASTSelectQuery::Expression::SELECT, std::make_shared()); + for (const auto & column_name : first_stage.output_columns) + select->select()->children.push_back(std::make_shared(column_name)); + + /// Don't let select list be empty. + if (select->select()->children.empty()) + select->select()->children.push_back(std::make_shared(Field(0))); + + if (!first_stage.filters.empty()) + { + ASTPtr where_expression; + if (first_stage.filters.size() == 1) + where_expression = first_stage.filters[0]; + else + { + auto coalesced_predicates = std::make_shared(); + coalesced_predicates->name = "and"; + coalesced_predicates->arguments = std::make_shared(); + coalesced_predicates->children.push_back(coalesced_predicates->arguments); + coalesced_predicates->arguments->children = first_stage.filters; + where_expression = std::move(coalesced_predicates); + } + select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(where_expression)); + } + + SelectQueryInfo query_info; + query_info.query = std::move(select); + + size_t max_block_size = context_->getSettingsRef().max_block_size; + size_t max_streams = 1; + storage->read(plan, required_columns, storage_snapshot, query_info, context_, QueryProcessingStage::FetchColumns, max_block_size, max_streams); + + if (!plan.isInitialized()) + { + /// It may be possible when there is nothing to read from storage. + auto header = storage_snapshot->getSampleBlockForColumns(required_columns); + auto read_from_pipe = std::make_unique(Pipe(std::make_shared(header))); + plan.addStep(std::move(read_from_pipe)); + } + } +} + +void MutationsInterpreter::initQueryPlan(Stage & first_stage, QueryPlan & plan) +{ + source.read(first_stage, plan, metadata_snapshot, context, apply_deleted_mask, can_execute); + addCreatingSetsStep(plan, first_stage.analyzer->getPreparedSets(), context); } QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const { - for (size_t i_stage = 1; i_stage < prepared_stages.size(); ++i_stage) + for (const Stage & stage : prepared_stages) { - const Stage & stage = prepared_stages[i_stage]; - for (size_t i = 0; i < stage.expressions_chain.steps.size(); ++i) { const auto & step = stage.expressions_chain.steps[i]; @@ -988,14 +1214,11 @@ QueryPipelineBuilder MutationsInterpreter::addStreamsForLaterStages(const std::v void MutationsInterpreter::validate() { - if (!select_interpreter) - select_interpreter = std::make_unique(mutation_ast, context, storage, metadata_snapshot, select_limits); - const Settings & settings = context->getSettingsRef(); /// For Replicated* storages mutations cannot employ non-deterministic functions /// because that produces inconsistencies between replicas - if (startsWith(storage->getName(), "Replicated") && !settings.allow_nondeterministic_mutations) + if (startsWith(source.getStorage()->getName(), "Replicated") && !settings.allow_nondeterministic_mutations) { for (const auto & command : commands) { @@ -1012,7 +1235,7 @@ void MutationsInterpreter::validate() } QueryPlan plan; - select_interpreter->buildQueryPlan(plan); + initQueryPlan(stages.front(), plan); auto pipeline = addStreamsForLaterStages(stages, plan); } @@ -1021,23 +1244,8 @@ QueryPipelineBuilder MutationsInterpreter::execute() if (!can_execute) throw Exception("Cannot execute mutations interpreter because can_execute flag set to false", ErrorCodes::LOGICAL_ERROR); - if (!select_interpreter) - { - /// Skip to apply deleted mask for MutateSomePartColumn cases when part has lightweight delete. - if (!apply_deleted_mask) - { - auto context_for_reading = Context::createCopy(context); - context_for_reading->setApplyDeletedMask(apply_deleted_mask); - select_interpreter = std::make_unique(mutation_ast, context_for_reading, storage, metadata_snapshot, select_limits); - } - else - select_interpreter = std::make_unique(mutation_ast, context, storage, metadata_snapshot, select_limits); - } - - QueryPlan plan; - select_interpreter->buildQueryPlan(plan); - + initQueryPlan(stages.front(), plan); auto builder = addStreamsForLaterStages(stages, plan); /// Sometimes we update just part of columns (for example UPDATE mutation) @@ -1069,11 +1277,7 @@ const ColumnDependencies & MutationsInterpreter::getColumnDependencies() const size_t MutationsInterpreter::evaluateCommandsSize() { - for (const MutationCommand & command : commands) - if (unlikely(!command.predicate && !command.partition)) /// The command touches all rows. - return mutation_ast->size(); - - return std::max(prepareQueryAffectedAST(commands, storage, context)->size(), mutation_ast->size()); + return prepareQueryAffectedAST(commands, source.getStorage(), context)->size(); } std::optional MutationsInterpreter::getStorageSortDescriptionIfPossible(const Block & header) const @@ -1096,7 +1300,7 @@ std::optional MutationsInterpreter::getStorageSortDescriptionIf ASTPtr MutationsInterpreter::getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const { - return DB::getPartitionAndPredicateExpressionForMutationCommand(command, storage, context); + return DB::getPartitionAndPredicateExpressionForMutationCommand(command, source.getStorage(), context); } bool MutationsInterpreter::Stage::isAffectingAllColumns(const Names & storage_columns) const diff --git a/src/Interpreters/MutationsInterpreter.h b/src/Interpreters/MutationsInterpreter.h index 336c5f11162..fbcb56fac6f 100644 --- a/src/Interpreters/MutationsInterpreter.h +++ b/src/Interpreters/MutationsInterpreter.h @@ -19,7 +19,8 @@ using QueryPipelineBuilderPtr = std::unique_ptr; /// Return false if the data isn't going to be changed by mutations. bool isStorageTouchedByMutations( - const StoragePtr & storage, + MergeTreeData & storage, + MergeTreeData::DataPartPtr source_part, const StorageMetadataPtr & metadata_snapshot, const std::vector & commands, ContextMutablePtr context_copy @@ -35,6 +36,8 @@ ASTPtr getPartitionAndPredicateExpressionForMutationCommand( /// to this data. class MutationsInterpreter { + struct Stage; + public: /// Storage to mutate, array of mutations commands and context. If you really want to execute mutation /// use can_execute = true, in other cases (validation, amount of commands) it can be false @@ -47,8 +50,18 @@ public: bool return_all_columns_ = false, bool return_deleted_rows_ = false); - void validate(); + /// Special case for MergeTree + MutationsInterpreter( + MergeTreeData & storage_, + MergeTreeData::DataPartPtr source_part_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_ = false, + bool return_deleted_rows_ = false); + void validate(); size_t evaluateCommandsSize(); /// The resulting stream will return blocks containing only changed columns and columns, that we need to recalculate indices. @@ -82,19 +95,60 @@ public: void setApplyDeletedMask(bool apply) { apply_deleted_mask = apply; } + /// Internal class which represents a data part for MergeTree + /// or just storage for other storages. + /// The main idea is to create a dedicated reading from MergeTree part. + /// Additionally we propagate some storage properties. + struct Source + { + StorageSnapshotPtr getStorageSnapshot(const StorageMetadataPtr & snapshot_, const ContextPtr & context_) const; + StoragePtr getStorage() const; + const MergeTreeData * getMergeTreeData() const; + + bool supportsLightweightDelete() const; + bool hasLightweightDeleteMask() const; + bool materializeTTLRecalculateOnly() const; + + void read( + Stage & first_stage, + QueryPlan & plan, + const StorageMetadataPtr & snapshot_, + const ContextPtr & context_, + bool apply_deleted_mask_, + bool can_execute_) const; + + explicit Source(StoragePtr storage_); + Source(MergeTreeData & storage_, MergeTreeData::DataPartPtr source_part_); + + private: + StoragePtr storage; + + /// Special case for MergeTree. + MergeTreeData * data = nullptr; + MergeTreeData::DataPartPtr part; + }; + private: - ASTPtr prepare(bool dry_run); + MutationsInterpreter( + Source source_, + const StorageMetadataPtr & metadata_snapshot_, + MutationCommands commands_, + ContextPtr context_, + bool can_execute_, + bool return_all_columns_, + bool return_deleted_rows_); - struct Stage; + void prepare(bool dry_run); - ASTPtr prepareInterpreterSelectQuery(std::vector &prepared_stages, bool dry_run); + void initQueryPlan(Stage & first_stage, QueryPlan & query_plan); + void prepareMutationStages(std::vector &prepared_stages, bool dry_run); QueryPipelineBuilder addStreamsForLaterStages(const std::vector & prepared_stages, QueryPlan & plan) const; std::optional getStorageSortDescriptionIfPossible(const Block & header) const; ASTPtr getPartitionAndPredicateExpressionForMutationCommand(const MutationCommand & command) const; - StoragePtr storage; + Source source; StorageMetadataPtr metadata_snapshot; MutationCommands commands; ContextPtr context; @@ -103,12 +157,6 @@ private: bool apply_deleted_mask = true; - ASTPtr mutation_ast; - - /// We have to store interpreter because it use own copy of context - /// and some streams from execute method may use it. - std::unique_ptr select_interpreter; - /// A sequence of mutation commands is executed as a sequence of stages. Each stage consists of several /// filters, followed by updating values of some columns. Commands can reuse expressions calculated by the /// previous commands in the same stage, but at the end of each stage intermediate columns are thrown away diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0d8fe84f9d3..4765b2cbfbe 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -64,7 +64,6 @@ static MergeTreeReaderSettings getMergeTreeReaderSettings( .save_marks_in_cache = true, .checksum_on_read = settings.checksum_on_read, .read_in_order = query_info.input_order_info != nullptr, - .apply_deleted_mask = context->applyDeletedMask(), .use_asynchronous_read_from_pool = settings.allow_asynchronous_read_from_io_pool_for_merge_tree && (settings.max_streams_to_max_threads_ratio > 1 || settings.max_streams_for_merge_tree_reading > 1), }; diff --git a/src/Processors/Sources/ThrowingExceptionSource.h b/src/Processors/Sources/ThrowingExceptionSource.h new file mode 100644 index 00000000000..5abebd89d07 --- /dev/null +++ b/src/Processors/Sources/ThrowingExceptionSource.h @@ -0,0 +1,32 @@ +#pragma once +#include + + +namespace DB +{ + +/// This source is throwing exception at the first attempt to read from it. +/// Can be used as a additional check that pipeline (or its part) is never executed. +class ThrowingExceptionSource : public ISource +{ +public: + + using CallBack = std::function; + + explicit ThrowingExceptionSource(Block header, CallBack callback_) + : ISource(std::move(header)) + , callback(std::move(callback_)) + {} + + String getName() const override { return "ThrowingExceptionSource"; } + +protected: + Chunk generate() override + { + throw callback(); + } + + CallBack callback; +}; + +} diff --git a/src/Storages/IStorage.h b/src/Storages/IStorage.h index cdf273b47df..7d927b51e5f 100644 --- a/src/Storages/IStorage.h +++ b/src/Storages/IStorage.h @@ -110,6 +110,8 @@ public: /// The name of the table. StorageID getStorageID() const; + virtual bool isMergeTree() const { return false; } + /// Returns true if the storage receives data from a remote server or servers. virtual bool isRemote() const { return false; } diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index 670c755cf72..19efd8f908a 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -424,6 +424,8 @@ public: StoragePolicyPtr getStoragePolicy() const override; + bool isMergeTree() const override { return true; } + bool supportsPrewhere() const override { return true; } bool supportsFinal() const override; diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h index e302663597d..30d09312245 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.h @@ -66,6 +66,13 @@ public: size_t num_streams, std::shared_ptr max_block_numbers_to_read = nullptr) const; + static MarkRanges markRangesFromPKRange( + const MergeTreeData::DataPartPtr & part, + const StorageMetadataPtr & metadata_snapshot, + const KeyCondition & key_condition, + const Settings & settings, + Poco::Logger * log); + private: const MergeTreeData & data; Poco::Logger * log; @@ -78,13 +85,6 @@ private: const Settings & settings, Poco::Logger * log); - static MarkRanges markRangesFromPKRange( - const MergeTreeData::DataPartPtr & part, - const StorageMetadataPtr & metadata_snapshot, - const KeyCondition & key_condition, - const Settings & settings, - Poco::Logger * log); - static MarkRanges filterMarksUsingIndex( MergeTreeIndexPtr index_helper, MergeTreeIndexConditionPtr condition, diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp index 9e0c96fd88a..4539e0b36c5 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.cpp +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.cpp @@ -1,9 +1,14 @@ #include #include #include +#include #include +#include +#include #include #include +#include +#include namespace DB { @@ -25,6 +30,8 @@ public: const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, + std::optional mark_ranges_, + bool apply_deleted_mask, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet = false); @@ -56,6 +63,8 @@ private: Poco::Logger * log = &Poco::Logger::get("MergeTreeSequentialSource"); + std::optional mark_ranges; + std::shared_ptr mark_cache; using MergeTreeReaderPtr = std::unique_ptr; MergeTreeReaderPtr reader; @@ -76,6 +85,8 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( const StorageSnapshotPtr & storage_snapshot_, MergeTreeData::DataPartPtr data_part_, Names columns_to_read_, + std::optional mark_ranges_, + bool apply_deleted_mask, bool read_with_direct_io_, bool take_column_types_from_storage, bool quiet) @@ -85,6 +96,7 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( , data_part(std::move(data_part_)) , columns_to_read(std::move(columns_to_read_)) , read_with_direct_io(read_with_direct_io_) + , mark_ranges(std::move(mark_ranges_)) , mark_cache(storage.getContext()->getMarkCache()) { if (!quiet) @@ -126,11 +138,15 @@ MergeTreeSequentialSource::MergeTreeSequentialSource( MergeTreeReaderSettings reader_settings = { .read_settings = read_settings, - .save_marks_in_cache = false + .save_marks_in_cache = false, + .apply_deleted_mask = apply_deleted_mask, }; + if (!mark_ranges) + mark_ranges.emplace(MarkRanges{MarkRange(0, data_part->getMarksCount())}); + reader = data_part->getReader(columns_for_reader, storage_snapshot->metadata, - MarkRanges{MarkRange(0, data_part->getMarksCount())}, + *mark_ranges, /* uncompressed_cache = */ nullptr, mark_cache.get(), reader_settings, {}, {}); } @@ -224,8 +240,10 @@ Pipe createMergeTreeSequentialSource( if (need_to_filter_deleted_rows) columns.emplace_back(LightweightDeleteDescription::FILTER_COLUMN.name); + bool apply_deleted_mask = false; + auto column_part_source = std::make_shared( - storage, storage_snapshot, data_part, columns, read_with_direct_io, take_column_types_from_storage, quiet); + storage, storage_snapshot, data_part, columns, std::optional{}, apply_deleted_mask, read_with_direct_io, take_column_types_from_storage, quiet); Pipe pipe(std::move(column_part_source)); @@ -242,4 +260,92 @@ Pipe createMergeTreeSequentialSource( return pipe; } +/// A Query Plan step to read from a single Merge Tree part +/// using Merge Tree Sequential Source (which reads strictly sequentially in a single thread). +/// This step is used for mutations because the usual reading is too tricky. +/// Previously, sequential reading was achieved by changing some settings like max_threads, +/// however, this approach lead to data corruption after some new settings were introduced. +class ReadFromPart final : public ISourceStep +{ +public: + ReadFromPart( + const MergeTreeData & storage_, + const StorageSnapshotPtr & storage_snapshot_, + MergeTreeData::DataPartPtr data_part_, + Names columns_to_read_, + bool apply_deleted_mask_, + ActionsDAGPtr filter_, + ContextPtr context_, + Poco::Logger * log_) + : ISourceStep(DataStream{.header = storage_snapshot_->getSampleBlockForColumns(columns_to_read_)}) + , storage(storage_) + , storage_snapshot(storage_snapshot_) + , data_part(std::move(data_part_)) + , columns_to_read(std::move(columns_to_read_)) + , apply_deleted_mask(apply_deleted_mask_) + , filter(std::move(filter_)) + , context(std::move(context_)) + , log(log_) + { + } + + String getName() const override { return fmt::format("ReadFromPart({})", data_part->name); } + + void initializePipeline(QueryPipelineBuilder & pipeline, const BuildQueryPipelineSettings &) override + { + std::optional mark_ranges; + + const auto & metadata_snapshot = storage_snapshot->metadata; + if (filter && metadata_snapshot->hasPrimaryKey()) + { + const auto & primary_key = storage_snapshot->metadata->getPrimaryKey(); + const Names & primary_key_column_names = primary_key.column_names; + KeyCondition key_condition(filter, context, primary_key_column_names, primary_key.expression, NameSet{}); + LOG_DEBUG(log, "Key condition: {}", key_condition.toString()); + + if (!key_condition.alwaysFalse()) + mark_ranges = MergeTreeDataSelectExecutor::markRangesFromPKRange( + data_part, metadata_snapshot, key_condition, context->getSettingsRef(), log); + + if (mark_ranges && mark_ranges->empty()) + { + pipeline.init(Pipe(std::make_unique(output_stream->header))); + return; + } + } + + auto source = std::make_unique( + storage, storage_snapshot, data_part, columns_to_read, std::move(mark_ranges), apply_deleted_mask, false, true); + + pipeline.init(Pipe(std::move(source))); + } + +private: + const MergeTreeData & storage; + StorageSnapshotPtr storage_snapshot; + MergeTreeData::DataPartPtr data_part; + Names columns_to_read; + bool apply_deleted_mask; + ActionsDAGPtr filter; + ContextPtr context; + Poco::Logger * log; +}; + +void createMergeTreeSequentialSource( + QueryPlan & plan, + const MergeTreeData & storage, + const StorageSnapshotPtr & storage_snapshot, + MergeTreeData::DataPartPtr data_part, + Names columns_to_read, + bool apply_deleted_mask, + ActionsDAGPtr filter, + ContextPtr context, + Poco::Logger * log) +{ + auto reading = std::make_unique( + storage, storage_snapshot, std::move(data_part), std::move(columns_to_read), apply_deleted_mask, filter, std::move(context), log); + + plan.addStep(std::move(reading)); +} + } diff --git a/src/Storages/MergeTree/MergeTreeSequentialSource.h b/src/Storages/MergeTree/MergeTreeSequentialSource.h index c6c29f9d49a..fb249568e8f 100644 --- a/src/Storages/MergeTree/MergeTreeSequentialSource.h +++ b/src/Storages/MergeTree/MergeTreeSequentialSource.h @@ -20,4 +20,17 @@ Pipe createMergeTreeSequentialSource( bool quiet, std::shared_ptr> filtered_rows_count); +class QueryPlan; + +void createMergeTreeSequentialSource( + QueryPlan & plan, + const MergeTreeData & storage, + const StorageSnapshotPtr & storage_snapshot, + MergeTreeData::DataPartPtr data_part, + Names columns_to_read, + bool apply_deleted_mask, + ActionsDAGPtr filter, + ContextPtr context, + Poco::Logger * log); + } diff --git a/src/Storages/MergeTree/MutateTask.cpp b/src/Storages/MergeTree/MutateTask.cpp index de68cb6f0ba..3ecb790243d 100644 --- a/src/Storages/MergeTree/MutateTask.cpp +++ b/src/Storages/MergeTree/MutateTask.cpp @@ -714,8 +714,6 @@ struct MutationContext FutureMergedMutatedPartPtr future_part; MergeTreeData::DataPartPtr source_part; - - StoragePtr storage_from_source_part; StorageMetadataPtr metadata_snapshot; MutationCommandsConstPtr commands; @@ -1478,10 +1476,9 @@ MutateTask::MutateTask( ctx->storage_columns = metadata_snapshot_->getColumns().getAllPhysical(); ctx->txn = txn; ctx->source_part = ctx->future_part->parts[0]; - ctx->storage_from_source_part = std::make_shared(ctx->source_part); ctx->need_prefix = need_prefix_; - auto storage_snapshot = ctx->storage_from_source_part->getStorageSnapshot(ctx->metadata_snapshot, context_); + auto storage_snapshot = ctx->data->getStorageSnapshot(ctx->metadata_snapshot, context_); extendObjectColumns(ctx->storage_columns, storage_snapshot->object_columns, /*with_subcolumns=*/ false); } @@ -1554,7 +1551,7 @@ bool MutateTask::prepare() } if (ctx->source_part->isStoredOnDisk() && !isStorageTouchedByMutations( - ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) + *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->commands_for_part, Context::createCopy(context_for_reading))) { NameSet files_to_copy_instead_of_hardlinks; auto settings_ptr = ctx->data->getSettings(); @@ -1597,7 +1594,7 @@ bool MutateTask::prepare() if (!ctx->for_interpreter.empty()) { ctx->interpreter = std::make_unique( - ctx->storage_from_source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true); + *ctx->data, ctx->source_part, ctx->metadata_snapshot, ctx->for_interpreter, context_for_reading, true); ctx->materialized_indices = ctx->interpreter->grabMaterializedIndices(); ctx->materialized_projections = ctx->interpreter->grabMaterializedProjections(); ctx->mutation_kind = ctx->interpreter->getMutationKind(); From d945b72d6fe01c216e32f505c5bd85220382b4fb Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 10 Jan 2023 15:40:31 +0000 Subject: [PATCH 141/262] Pull SQLancer image before check run --- tests/ci/sqlancer_check.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index ce6d89a7267..b286d1a63bc 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -29,6 +29,11 @@ from rerun_helper import RerunHelper IMAGE_NAME = "clickhouse/sqlancer-test" +def get_pull_command(docker_image): + return ( + f"docker pull --network=host {docker_image}" + ) + def get_run_command(download_url, workspace_path, image): return ( f"docker run " @@ -92,6 +97,21 @@ if __name__ == "__main__": if not os.path.exists(workspace_path): os.makedirs(workspace_path) + pull_command = get_pull_command(docker_image) + + logging.info("Going to pull image %s", pull_command) + + pull_log_path = os.path.join(workspace_path, "pull.log") + with open(pull_log_path, "w", encoding="utf-8") as log: + with subprocess.Popen( + pull_command, shell=True, stderr=log, stdout=log + ) as process: + retcode = process.wait() + if retcode == 0: + logging.info("Pull successfully") + else: + logging.info("Pull failed") + run_command = get_run_command(build_url, workspace_path, docker_image) logging.info("Going to run %s", run_command) @@ -124,6 +144,7 @@ if __name__ == "__main__": paths = [ run_log_path, + pull_log_path, os.path.join(workspace_path, "clickhouse-server.log"), os.path.join(workspace_path, "stderr.log"), os.path.join(workspace_path, "stdout.log"), From fbba28b31e349450456f144704a3795f78d53707 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Mon, 9 Jan 2023 11:34:47 +0100 Subject: [PATCH 142/262] Analyzer aggregation without column fix --- src/Interpreters/ExpressionActions.cpp | 8 +-- src/Interpreters/ExpressionActions.h | 2 +- src/Interpreters/TreeRewriter.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 61 ++++++++++++++++++- .../QueryPlan/ReadFromMergeTree.cpp | 2 +- src/Storages/HDFS/StorageHDFS.cpp | 2 +- src/Storages/StorageFile.cpp | 2 +- src/Storages/StorageMerge.cpp | 4 +- ...lyzer_aggregation_without_column.reference | 1 + ...21_analyzer_aggregation_without_column.sql | 15 +++++ 10 files changed, 85 insertions(+), 14 deletions(-) create mode 100644 tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference create mode 100644 tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql diff --git a/src/Interpreters/ExpressionActions.cpp b/src/Interpreters/ExpressionActions.cpp index d89be9f3e2e..5ea29615942 100644 --- a/src/Interpreters/ExpressionActions.cpp +++ b/src/Interpreters/ExpressionActions.cpp @@ -790,10 +790,10 @@ void ExpressionActions::assertDeterministic() const } -std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns) +NameAndTypePair ExpressionActions::getSmallestColumn(const NamesAndTypesList & columns) { std::optional min_size; - String res; + NameAndTypePair result; for (const auto & column : columns) { @@ -807,14 +807,14 @@ std::string ExpressionActions::getSmallestColumn(const NamesAndTypesList & colum if (!min_size || size < *min_size) { min_size = size; - res = column.name; + result = column; } } if (!min_size) throw Exception("No available columns", ErrorCodes::LOGICAL_ERROR); - return res; + return result; } std::string ExpressionActions::dumpActions() const diff --git a/src/Interpreters/ExpressionActions.h b/src/Interpreters/ExpressionActions.h index be63b9e0d78..faefe0985f7 100644 --- a/src/Interpreters/ExpressionActions.h +++ b/src/Interpreters/ExpressionActions.h @@ -111,7 +111,7 @@ public: std::string dumpActions() const; JSONBuilder::ItemPtr toTree() const; - static std::string getSmallestColumn(const NamesAndTypesList & columns); + static NameAndTypePair getSmallestColumn(const NamesAndTypesList & columns); /// Check if column is always zero. True if it's definite, false if we can't say for sure. /// Call it only after subqueries for sets were executed. diff --git a/src/Interpreters/TreeRewriter.cpp b/src/Interpreters/TreeRewriter.cpp index 20c14b8d7b6..a1b3c8011cd 100644 --- a/src/Interpreters/TreeRewriter.cpp +++ b/src/Interpreters/TreeRewriter.cpp @@ -1146,7 +1146,7 @@ void TreeRewriterResult::collectUsedColumns(const ASTPtr & query, bool is_select required.insert(std::min_element(columns.begin(), columns.end())->name); else if (!source_columns.empty()) /// If we have no information about columns sizes, choose a column of minimum size of its data type. - required.insert(ExpressionActions::getSmallestColumn(source_columns)); + required.insert(ExpressionActions::getSmallestColumn(source_columns).name); } else if (is_select && storage_snapshot && !columns_context.has_array_join) { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 3584c9d4caa..999aa32d850 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -81,6 +81,63 @@ void checkAccessRights(const TableNode & table_node, const Names & column_names, query_context->checkAccess(AccessType::SELECT, storage_id, column_names); } +NameAndTypePair chooseSmallestColumnToReadFromStorage(const StoragePtr & storage, const StorageSnapshotPtr & storage_snapshot) +{ + /** We need to read at least one column to find the number of rows. + * We will find a column with minimum . + * Because it is the column that is cheapest to read. + */ + class ColumnWithSize + { + public: + ColumnWithSize(NameAndTypePair column_, ColumnSize column_size_) + : column(std::move(column_)) + , compressed_size(column_size_.data_compressed) + , uncompressed_size(column_size_.data_uncompressed) + , type_size(column.type->haveMaximumSizeOfValue() ? column.type->getMaximumSizeOfValueInMemory() : 100) + { + } + + bool operator<(const ColumnWithSize & rhs) const + { + return std::tie(compressed_size, type_size, uncompressed_size) + < std::tie(rhs.compressed_size, rhs.type_size, rhs.uncompressed_size); + } + + NameAndTypePair column; + size_t compressed_size = 0; + size_t uncompressed_size = 0; + size_t type_size = 0; + }; + + std::vector columns_with_sizes; + + auto column_sizes = storage->getColumnSizes(); + auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::AllPhysical).withSubcolumns()); + + if (!column_sizes.empty()) + { + for (auto & column_name_and_type : column_names_and_types) + { + auto it = column_sizes.find(column_name_and_type.name); + if (it == column_sizes.end()) + continue; + + columns_with_sizes.emplace_back(column_name_and_type, it->second); + } + } + + NameAndTypePair result; + + if (!columns_with_sizes.empty()) + result = std::min_element(columns_with_sizes.begin(), columns_with_sizes.end())->column; + else + /// If we have no information about columns sizes, choose a column of minimum size of its data type + result = ExpressionActions::getSmallestColumn(column_names_and_types); + + return result; +} + QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, @@ -127,9 +184,7 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, if (columns_names.empty()) { - auto column_names_and_types = storage_snapshot->getColumns(GetColumnsOptions(GetColumnsOptions::All).withSubcolumns()); - auto additional_column_to_read = column_names_and_types.front(); - + auto additional_column_to_read = chooseSmallestColumnToReadFromStorage(storage, storage_snapshot); const auto & column_identifier = planner_context->getGlobalPlannerContext()->createColumnIdentifier(additional_column_to_read, table_expression); columns_names.push_back(additional_column_to_read.name); table_expression_data.addColumn(additional_column_to_read, column_identifier); diff --git a/src/Processors/QueryPlan/ReadFromMergeTree.cpp b/src/Processors/QueryPlan/ReadFromMergeTree.cpp index 0d8fe84f9d3..e5ad2729e6c 100644 --- a/src/Processors/QueryPlan/ReadFromMergeTree.cpp +++ b/src/Processors/QueryPlan/ReadFromMergeTree.cpp @@ -1023,7 +1023,7 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToReadImpl( if (result.column_names_to_read.empty()) { NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical(); - result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns)); + result.column_names_to_read.push_back(ExpressionActions::getSmallestColumn(available_real_columns).name); } // storage_snapshot->check(result.column_names_to_read); diff --git a/src/Storages/HDFS/StorageHDFS.cpp b/src/Storages/HDFS/StorageHDFS.cpp index bbabd523c45..c7008a317c3 100644 --- a/src/Storages/HDFS/StorageHDFS.cpp +++ b/src/Storages/HDFS/StorageHDFS.cpp @@ -599,7 +599,7 @@ Pipe StorageHDFS::read( { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); diff --git a/src/Storages/StorageFile.cpp b/src/Storages/StorageFile.cpp index 95bd0e7c53e..922754c2d8c 100644 --- a/src/Storages/StorageFile.cpp +++ b/src/Storages/StorageFile.cpp @@ -706,7 +706,7 @@ Pipe StorageFile::read( }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); } else diff --git a/src/Storages/StorageMerge.cpp b/src/Storages/StorageMerge.cpp index 79efab9e9d7..3e279b408d7 100644 --- a/src/Storages/StorageMerge.cpp +++ b/src/Storages/StorageMerge.cpp @@ -488,7 +488,7 @@ void ReadFromMerge::initializePipeline(QueryPipelineBuilder & pipeline, const Bu column_names_as_aliases = alias_actions->getRequiredColumns().getNames(); if (column_names_as_aliases.empty()) - column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical())); + column_names_as_aliases.push_back(ExpressionActions::getSmallestColumn(storage_metadata_snapshot->getColumns().getAllPhysical()).name); } auto source_pipeline = createSources( @@ -574,7 +574,7 @@ QueryPipelineBuilderPtr ReadFromMerge::createSources( { /// If there are only virtual columns in query, you must request at least one other column. if (real_column_names.empty()) - real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + real_column_names.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); QueryPlan plan; if (StorageView * view = dynamic_cast(storage.get())) diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference new file mode 100644 index 00000000000..d00491fd7e5 --- /dev/null +++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.reference @@ -0,0 +1 @@ +1 diff --git a/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql new file mode 100644 index 00000000000..105bce6711c --- /dev/null +++ b/tests/queries/0_stateless/02521_analyzer_aggregation_without_column.sql @@ -0,0 +1,15 @@ +SET allow_experimental_analyzer = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + c0 String ALIAS c1, + c1 String, + c2 String, +) ENGINE = MergeTree ORDER BY c1; + +INSERT INTO test_table VALUES ('a', 'b'); + +SELECT MAX(1) FROM test_table; + +DROP TABLE test_table; From 4571c74fdd4524fb4c7b92ff3b21e40765c4c8fb Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Tue, 10 Jan 2023 12:22:33 +0100 Subject: [PATCH 143/262] Fixed build --- src/Storages/StorageS3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Storages/StorageS3.cpp b/src/Storages/StorageS3.cpp index edd60a364af..9cb992bd24f 100644 --- a/src/Storages/StorageS3.cpp +++ b/src/Storages/StorageS3.cpp @@ -1057,7 +1057,7 @@ Pipe StorageS3::read( { return std::any_of(virtuals.begin(), virtuals.end(), [&](const NameAndTypePair & virtual_col){ return col == virtual_col.name; }); }); if (fetch_columns.empty()) - fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical())); + fetch_columns.push_back(ExpressionActions::getSmallestColumn(storage_snapshot->metadata->getColumns().getAllPhysical()).name); columns_description = storage_snapshot->getDescriptionForColumns(fetch_columns); block_for_format = storage_snapshot->getSampleBlockForColumns(columns_description.getNamesOfPhysical()); From ee86afb1256567bb9259106ce1b2116169925a6d Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 10 Jan 2023 11:14:12 -0500 Subject: [PATCH 144/262] add deltalake --- .../table-functions/deltalake.md | 181 +++--------------- 1 file changed, 24 insertions(+), 157 deletions(-) diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md index 7e3fffe4d8b..af944d70426 100644 --- a/docs/en/sql-reference/table-functions/deltalake.md +++ b/docs/en/sql-reference/table-functions/deltalake.md @@ -3,182 +3,49 @@ slug: /en/sql-reference/table-functions/deltalake sidebar_label: DeltLake --- -# DeltaLake Table Function +# deltaLake Table Function -Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in [Amazon S3](https://aws.amazon.com/s3/). +Provides a read-only table-like interface to [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3. -For example, to query an existing Delta Lake table named `deltalake` in S3: -```sql -CREATE TABLE dl_hits - ENGINE = DeltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/','',''); - -SHOW TABLES; - -DESCRIBE dl_hits; - -SELECT URL, Referer, UserAgent FROM dl_hits WHERE URL IS NOT NULL LIMIT 10; - -SELECT URL, Referer, UserAgent FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/') WHERE URL IS NOT NULL LIMIT 10; - -``` - -**Syntax** +## Syntax ``` sql -s3(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +deltaLake(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) ``` -**Arguments** +## Arguments -- `path` — Bucket url with path to file. Supports following wildcards in readonly mode: `*`, `?`, `{abc,def}` and `{N..M}` where `N`, `M` — numbers, `'abc'`, `'def'` — strings. For more information see [here](../../engines/table-engines/integrations/s3.md#wildcards-in-path). -- `format` — The [format](../../interfaces/formats.md#formats) of the file. -- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. -- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, it will autodetect compression by file extension. +- `path` — Bucket url with path to existing Delta Lake table in S3. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3). +- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension. **Returned value** -A table with the specified structure for reading or writing data in the specified file. +A table with the specified structure for reading data in the specified Delta Lake table in S3. **Examples** -Selecting the first two rows from the table from S3 file `https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv`: +Selecting rows from the table in S3 `https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/`: ``` sql -SELECT * -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32') -LIMIT 2; +SELECT + URL, + UserAgent +FROM deltaLake('https://clickhouse-public-datasets.s3.amazonaws.com/delta_lake/hits/') +WHERE URL IS NOT NULL +LIMIT 2 ``` -``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ +``` response +┌─URL───────────────────────────────────────────────────────────────────┬─UserAgent─┐ +│ http://auto.ria.ua/search/index.kz/jobinmoscow/detail/55089/hasimages │ 1 │ +│ http://auto.ria.ua/search/index.kz/jobinmoscow.ru/gosushi │ 1 │ +└───────────────────────────────────────────────────────────────────────┴───────────┘ ``` -The similar but from file with `gzip` compression: - -``` sql -SELECT * -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/data.csv.gz', 'CSV', 'column1 UInt32, column2 UInt32, column3 UInt32', 'gzip') -LIMIT 2; -``` - -``` text -┌─column1─┬─column2─┬─column3─┐ -│ 1 │ 2 │ 3 │ -│ 3 │ 2 │ 1 │ -└─────────┴─────────┴─────────┘ -``` - -## Usage - -Suppose that we have several files with following URIs on S3: - -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/some_prefix/some_file_4.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_1.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_2.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_3.csv' -- 'https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/another_prefix/some_file_4.csv' - -Count the amount of rows in files ending with numbers from 1 to 3: - -``` sql -SELECT count(*) -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/some_file_{1..3}.csv', 'CSV', 'name String, value UInt32') -``` - -``` text -┌─count()─┐ -│ 18 │ -└─────────┘ -``` - -Count the total amount of rows in all files in these two directories: - -``` sql -SELECT count(*) -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/{some,another}_prefix/*', 'CSV', 'name String, value UInt32') -``` - -``` text -┌─count()─┐ -│ 24 │ -└─────────┘ -``` - -:::warning -If your listing of files contains number ranges with leading zeros, use the construction with braces for each digit separately or use `?`. -::: - -Count the total amount of rows in files named `file-000.csv`, `file-001.csv`, … , `file-999.csv`: - -``` sql -SELECT count(*) -FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/big_prefix/file-{000..999}.csv', 'CSV', 'name String, value UInt32'); -``` - -``` text -┌─count()─┐ -│ 12 │ -└─────────┘ -``` - -Insert data into file `test-data.csv.gz`: - -``` sql -INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -VALUES ('test-data', 1), ('test-data-2', 2); -``` - -Insert data into file `test-data.csv.gz` from existing table: - -``` sql -INSERT INTO FUNCTION s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip') -SELECT name, value FROM existing_table; -``` - -Glob ** can be used for recursive directory traversal. Consider the below example, it will fetch all files from `my-test-bucket-768` directory recursively: - -``` sql -SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**', 'CSV', 'name String, value UInt32', 'gzip'); -``` - -The below get data from all `test-data.csv.gz` files from any folder inside `my-test-bucket` directory recursively: - -``` sql -SELECT * FROM s3('https://clickhouse-public-datasets.s3.amazonaws.com/my-test-bucket-768/**/test-data.csv.gz', 'CSV', 'name String, value UInt32', 'gzip'); -``` - -## Partitioned Write - -If you specify `PARTITION BY` expression when inserting data into `S3` table, a separate file is created for each partition value. Splitting the data into separate files helps to improve reading operations efficiency. - -**Examples** - -1. Using partition ID in a key creates separate files: - -```sql -INSERT INTO TABLE FUNCTION - s3('http://bucket.amazonaws.com/my_bucket/file_{_partition_id}.csv', 'CSV', 'a String, b UInt32, c UInt32') - PARTITION BY a VALUES ('x', 2, 3), ('x', 4, 5), ('y', 11, 12), ('y', 13, 14), ('z', 21, 22), ('z', 23, 24); -``` -As a result, the data is written into three files: `file_x.csv`, `file_y.csv`, and `file_z.csv`. - -2. Using partition ID in a bucket name creates files in different buckets: - -```sql -INSERT INTO TABLE FUNCTION - s3('http://bucket.amazonaws.com/my_bucket_{_partition_id}/file.csv', 'CSV', 'a UInt32, b UInt32, c UInt32') - PARTITION BY a VALUES (1, 2, 3), (1, 4, 5), (10, 11, 12), (10, 13, 14), (20, 21, 22), (20, 23, 24); -``` -As a result, the data is written into three files in different buckets: `my_bucket_1/file.csv`, `my_bucket_10/file.csv`, and `my_bucket_20/file.csv`. - **See Also** -- [S3 engine](../../engines/table-engines/integrations/s3.md) +- [deltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md) -[Original article](https://clickhouse.com/docs/en/sql-reference/table-functions/s3/) From 7cb3e174191e04f288de69e7fc3e4bea16058335 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 10 Jan 2023 16:17:59 +0000 Subject: [PATCH 145/262] black --- tests/ci/sqlancer_check.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index b286d1a63bc..5b268141484 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -30,9 +30,8 @@ IMAGE_NAME = "clickhouse/sqlancer-test" def get_pull_command(docker_image): - return ( - f"docker pull --network=host {docker_image}" - ) + return f"docker pull --network=host {docker_image}" + def get_run_command(download_url, workspace_path, image): return ( From 879ee05218905d1baa1f96e9de2ae4107883c417 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 10 Jan 2023 11:18:33 -0500 Subject: [PATCH 146/262] fix case of names --- docs/en/engines/table-engines/integrations/deltalake.md | 2 +- docs/en/sql-reference/table-functions/deltalake.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index 44407e34e38..5ce044680d4 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -29,5 +29,5 @@ CREATE TABLE deltalake ENGINE=DeltaLake('http://mars-doc-test.s3.amazonaws.com/c ## See also -- [DeltaLake table function](../../../sql-reference/table-functions/deltalake.md) +- [deltaLake table function](../../../sql-reference/table-functions/deltalake.md) diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md index af944d70426..6468e51d757 100644 --- a/docs/en/sql-reference/table-functions/deltalake.md +++ b/docs/en/sql-reference/table-functions/deltalake.md @@ -47,5 +47,5 @@ LIMIT 2 **See Also** -- [deltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md) +- [DeltaLake engine](/docs/en/engines/table-engines/integrations/deltalake.md) From da4e9c94309620285afde47e290088fc24882692 Mon Sep 17 00:00:00 2001 From: serxa Date: Tue, 10 Jan 2023 16:35:46 +0000 Subject: [PATCH 147/262] fix SharedMutex build --- src/Common/SharedMutex.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/Common/SharedMutex.cpp b/src/Common/SharedMutex.cpp index 3a69c106800..31525dbd668 100644 --- a/src/Common/SharedMutex.cpp +++ b/src/Common/SharedMutex.cpp @@ -9,6 +9,11 @@ namespace DB { +SharedMutex::SharedMutex() + : state(0) + , waiters(0) +{} + void SharedMutex::lock() { UInt64 value = state.load(); From 71333afd5a9c3d5c62c19066a24c705e5019d382 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Jan 2023 16:40:10 +0000 Subject: [PATCH 148/262] Fixing a test. --- src/Interpreters/MutationsInterpreter.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index f8627f1ff85..e95e53db41b 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -523,11 +523,8 @@ void MutationsInterpreter::prepare(bool dry_run) NamesAndTypesList all_columns = columns_desc.getAllPhysical(); /// Add _row_exists column if it is physically present in the part - if (auto part_storage = dynamic_pointer_cast(storage)) - { - if (part_storage->hasLightweightDeletedMask()) + if (source.hasLightweightDeleteMask()) all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); - } NameSet updated_columns; bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly(); From 7701dc571e84626c0bf81ee67e1c4daccd5efaf7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Jan 2023 18:19:38 +0100 Subject: [PATCH 149/262] Update MutationsInterpreter.cpp --- src/Interpreters/MutationsInterpreter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Interpreters/MutationsInterpreter.cpp b/src/Interpreters/MutationsInterpreter.cpp index e95e53db41b..cec03863c69 100644 --- a/src/Interpreters/MutationsInterpreter.cpp +++ b/src/Interpreters/MutationsInterpreter.cpp @@ -524,7 +524,7 @@ void MutationsInterpreter::prepare(bool dry_run) /// Add _row_exists column if it is physically present in the part if (source.hasLightweightDeleteMask()) - all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); + all_columns.push_back({LightweightDeleteDescription::FILTER_COLUMN}); NameSet updated_columns; bool materialize_ttl_recalculate_only = source.materializeTTLRecalculateOnly(); From 9a81f27fb22a21be79d36be35b1e28e3be334ed7 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Tue, 10 Jan 2023 17:32:26 +0000 Subject: [PATCH 150/262] Fix additional_table_filters with minmax/count projection. --- src/Storages/MergeTree/MergeTreeData.cpp | 4 ++++ .../01710_projection_additional_filters.reference | 1 + .../0_stateless/01710_projection_additional_filters.sql | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 358d527ae28..6bcfe5a35bd 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -5960,6 +5960,10 @@ std::optional MergeTreeData::getQueryProcessingStageWithAgg if (settings.parallel_replicas_count > 1 || settings.max_parallel_replicas > 1) return std::nullopt; + /// Cannot use projections in case of additional filter. + if (query_info.additional_filter_ast) + return std::nullopt; + auto query_ptr = query_info.original_query; auto * select_query = query_ptr->as(); if (!select_query) diff --git a/tests/queries/0_stateless/01710_projection_additional_filters.reference b/tests/queries/0_stateless/01710_projection_additional_filters.reference index 06b63ea6c2f..31b14cf6359 100644 --- a/tests/queries/0_stateless/01710_projection_additional_filters.reference +++ b/tests/queries/0_stateless/01710_projection_additional_filters.reference @@ -1 +1,2 @@ 0 0 0 +3 diff --git a/tests/queries/0_stateless/01710_projection_additional_filters.sql b/tests/queries/0_stateless/01710_projection_additional_filters.sql index 1633b48ba7e..f12d3e2766b 100644 --- a/tests/queries/0_stateless/01710_projection_additional_filters.sql +++ b/tests/queries/0_stateless/01710_projection_additional_filters.sql @@ -7,3 +7,9 @@ INSERT INTO t SELECT number % 10, number FROM numbers(10000); SELECT count(), min(a), max(a) FROM t SETTINGS additional_table_filters = {'t' : '0'}; DROP TABLE t; + +drop table if exists atf_p; +create table atf_p (x UInt64) engine = MergeTree order by tuple(); +insert into atf_p select number from numbers(10); +select count() from atf_p settings additional_table_filters = {'atf_p': 'x <= 2'}; +drop table atf_p; From 6d86b8dd478e71eeeed957bba8f5f4579e89c46e Mon Sep 17 00:00:00 2001 From: alesapin Date: Tue, 10 Jan 2023 19:05:02 +0100 Subject: [PATCH 151/262] Fix flaky azure test --- .../test.py | 83 ++++++++++--------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/tests/integration/test_merge_tree_azure_blob_storage/test.py b/tests/integration/test_merge_tree_azure_blob_storage/test.py index e41529eb385..6c1733fc72f 100644 --- a/tests/integration/test_merge_tree_azure_blob_storage/test.py +++ b/tests/integration/test_merge_tree_azure_blob_storage/test.py @@ -42,10 +42,10 @@ def cluster(): # For inserts there is no guarantee that retries will not result in duplicates. # But it is better to retry anyway because 'Connection was closed by the server' error # happens in fact only for inserts because reads already have build-in retries in code. -def azure_query(node, query, try_num=3): +def azure_query(node, query, try_num=3, settings={}): for i in range(try_num): try: - return node.query(query) + return node.query(query, settings=settings) except Exception as ex: retriable_errors = [ "DB::Exception: Azure::Core::Http::TransportException: Connection was closed by the server while trying to read a response" @@ -80,7 +80,7 @@ def create_table(node, table_name, **additional_settings): ORDER BY (dt, id) SETTINGS {",".join((k+"="+repr(v) for k, v in settings.items()))}""" - node.query(f"DROP TABLE IF EXISTS {table_name}") + azure_query(node, f"DROP TABLE IF EXISTS {table_name}") azure_query(node, create_table_statement) assert ( azure_query(node, f"SELECT COUNT(*) FROM {table_name} FORMAT Values") == "(0)" @@ -230,9 +230,9 @@ def test_alter_table_columns(cluster): f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096, -1)}", ) - node.query(f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1") + azure_query(node, f"ALTER TABLE {TABLE_NAME} ADD COLUMN col1 UInt64 DEFAULT 1") # To ensure parts have been merged - node.query(f"OPTIMIZE TABLE {TABLE_NAME}") + azure_query(node, f"OPTIMIZE TABLE {TABLE_NAME}") assert ( azure_query(node, f"SELECT sum(col1) FROM {TABLE_NAME} FORMAT Values") @@ -245,7 +245,8 @@ def test_alter_table_columns(cluster): == "(4096)" ) - node.query( + azure_query( + node, f"ALTER TABLE {TABLE_NAME} MODIFY COLUMN col1 String", settings={"mutations_sync": 2}, ) @@ -271,26 +272,27 @@ def test_attach_detach_partition(cluster): == "(8192)" ) - node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-03'") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" ) - node.query(f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} ATTACH PARTITION '2020-01-03'") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" ) - node.query(f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} DROP PARTITION '2020-01-03'") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(4096)" ) - node.query(f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'") - node.query( + azure_query(node, f"ALTER TABLE {TABLE_NAME} DETACH PARTITION '2020-01-04'") + azure_query( + node, f"ALTER TABLE {TABLE_NAME} DROP DETACHED PARTITION '2020-01-04'", settings={"allow_drop_detached": 1}, ) @@ -314,16 +316,18 @@ def test_move_partition_to_another_disk(cluster): == "(8192)" ) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{LOCAL_DISK}'", ) assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(8192)" ) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-04' TO DISK '{AZURE_BLOB_STORAGE_DISK}'", ) assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") @@ -344,14 +348,14 @@ def test_table_manipulations(cluster): f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}" ) - node.query(f"RENAME TABLE {TABLE_NAME} TO {renamed_table}") + azure_query(node, f"RENAME TABLE {TABLE_NAME} TO {renamed_table}") assert ( azure_query(node, f"SELECT count(*) FROM {renamed_table} FORMAT Values") == "(8192)" ) - node.query(f"RENAME TABLE {renamed_table} TO {TABLE_NAME}") - assert node.query(f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)" + azure_query(node, f"RENAME TABLE {renamed_table} TO {TABLE_NAME}") + assert azure_query(node, f"CHECK TABLE {TABLE_NAME} FORMAT Values") == "(1)" node.query(f"DETACH TABLE {TABLE_NAME}") node.query(f"ATTACH TABLE {TABLE_NAME}") @@ -360,7 +364,7 @@ def test_table_manipulations(cluster): == "(8192)" ) - node.query(f"TRUNCATE TABLE {TABLE_NAME}") + azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}") assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(0)" ) @@ -395,11 +399,13 @@ def test_move_replace_partition_to_another_table(cluster): create_table(node, table_clone_name) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-03' TO TABLE {table_clone_name}", ) - node.query( - f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} MOVE PARTITION '2020-01-05' TO TABLE {table_clone_name}", ) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( @@ -428,11 +434,13 @@ def test_move_replace_partition_to_another_table(cluster): == "(1024)" ) - node.query( - f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-03' FROM {table_clone_name}", ) - node.query( - f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} REPLACE PARTITION '2020-01-05' FROM {table_clone_name}", ) assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( @@ -448,16 +456,16 @@ def test_move_replace_partition_to_another_table(cluster): == "(512)" ) - node.query(f"DROP TABLE {table_clone_name} NO DELAY") + azure_query(node, f"DROP TABLE {table_clone_name} NO DELAY") assert azure_query(node, f"SELECT sum(id) FROM {TABLE_NAME} FORMAT Values") == "(0)" assert ( azure_query(node, f"SELECT count(*) FROM {TABLE_NAME} FORMAT Values") == "(1024)" ) - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE") + azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE") - node.query(f"DROP TABLE {TABLE_NAME} NO DELAY") + azure_query(node, f"DROP TABLE {TABLE_NAME} NO DELAY") def test_freeze_unfreeze(cluster): @@ -470,20 +478,21 @@ def test_freeze_unfreeze(cluster): azure_query( node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-03', 4096)}" ) - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup1}'") azure_query( node, f"INSERT INTO {TABLE_NAME} VALUES {generate_values('2020-01-04', 4096)}" ) - node.query(f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} FREEZE WITH NAME '{backup2}'") azure_query(node, f"TRUNCATE TABLE {TABLE_NAME}") # Unfreeze single partition from backup1. - node.query( - f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'" + azure_query( + node, + f"ALTER TABLE {TABLE_NAME} UNFREEZE PARTITION '2020-01-03' WITH NAME '{backup1}'", ) # Unfreeze all partitions from backup2. - node.query(f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'") + azure_query(node, f"ALTER TABLE {TABLE_NAME} UNFREEZE WITH NAME '{backup2}'") def test_apply_new_settings(cluster): @@ -524,8 +533,8 @@ def test_big_insert(cluster): node, f"INSERT INTO {TABLE_NAME} {check_query}", ) - assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == node.query( - check_query + assert azure_query(node, f"SELECT * FROM {TABLE_NAME} ORDER BY id") == azure_query( + node, check_query ) blob_container_client = cluster.blob_service_client.get_container_client( From 8fa1b070c6a02021b7b3cb858cf7185526720ace Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 10 Jan 2023 19:55:06 +0100 Subject: [PATCH 152/262] minor cleanup in stress/run.sh --- docker/test/stress/run.sh | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/docker/test/stress/run.sh b/docker/test/stress/run.sh index f994e6c2269..7f3e551edbc 100644 --- a/docker/test/stress/run.sh +++ b/docker/test/stress/run.sh @@ -128,18 +128,12 @@ EOL function stop() { + local max_tries="${1:-90}" local pid # Preserve the pid, since the server can hung after the PID will be deleted. pid="$(cat /var/run/clickhouse-server/clickhouse-server.pid)" - clickhouse stop $max_tries --do-not-kill && return - - if [ -n "$1" ] - then - # temporarily disable it in BC check - clickhouse stop --force - return - fi + clickhouse stop --max-tries "$max_tries" --do-not-kill && return # We failed to stop the server with SIGTERM. Maybe it hang, let's collect stacktraces. kill -TERM "$(pidof gdb)" ||: @@ -465,7 +459,8 @@ if [ "$DISABLE_BC_CHECK" -ne "1" ]; then clickhouse stop --force ) - stop 1 + # Use bigger timeout for previous version + stop 300 mv /var/log/clickhouse-server/clickhouse-server.log /var/log/clickhouse-server/clickhouse-server.backward.stress.log # Start new server From 2af17ec1c0c1217ab5fda4cc95248b7e8852ed8b Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Tue, 10 Jan 2023 22:19:58 +0300 Subject: [PATCH 153/262] Update clickhouse-test --- tests/clickhouse-test | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/clickhouse-test b/tests/clickhouse-test index 2709ad1eecf..a5c6b3e0bb8 100755 --- a/tests/clickhouse-test +++ b/tests/clickhouse-test @@ -1544,8 +1544,11 @@ def check_server_started(args): print(" OK") sys.stdout.flush() return True - except (ConnectionError, http.client.ImproperConnectionState): - print(".", end="") + except (ConnectionError, http.client.ImproperConnectionState) as e: + if args.hung_check: + print("Connection error, will retry: ", str(e)) + else: + print(".", end="") sys.stdout.flush() retry_count -= 1 sleep(0.5) From 035dc33707f5663f1612130c078434946932b9c7 Mon Sep 17 00:00:00 2001 From: avogar Date: Tue, 10 Jan 2023 19:22:18 +0000 Subject: [PATCH 154/262] Fix builds --- src/Core/Settings.h | 2 +- src/Core/SettingsEnums.cpp | 2 +- src/Core/SettingsEnums.h | 2 +- src/IO/ReadBufferFromFileBase.cpp | 1 - src/IO/ReadBufferFromFileBase.h | 2 +- 5 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/Core/Settings.h b/src/Core/Settings.h index c9ade637340..b8d46244b6c 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -595,7 +595,7 @@ static constexpr UInt64 operator""_GiB(unsigned long long value) M(UInt64, function_range_max_elements_in_block, 500000000, "Maximum number of values generated by function 'range' per block of data (sum of array sizes for every row in a block, see also 'max_block_size' and 'min_insert_block_size_rows'). It is a safety threshold.", 0) \ M(ShortCircuitFunctionEvaluation, short_circuit_function_evaluation, ShortCircuitFunctionEvaluation::ENABLE, "Setting for short-circuit function evaluation configuration. Possible values: 'enable' - use short-circuit function evaluation for functions that are suitable for it, 'disable' - disable short-circuit function evaluation, 'force_enable' - use short-circuit function evaluation for all functions.", 0) \ \ - M(StorageFileReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ + M(LocalFSReadMethod, storage_file_read_method, LocalFSReadMethod::mmap, "Method of reading data from storage file, one of: read, pread, mmap.", 0) \ M(String, local_filesystem_read_method, "pread_threadpool", "Method of reading data from local filesystem, one of: read, pread, mmap, pread_threadpool.", 0) \ M(String, remote_filesystem_read_method, "threadpool", "Method of reading data from remote filesystem, one of: read, threadpool.", 0) \ M(Bool, local_filesystem_read_prefetch, false, "Should use prefetching when reading data from local filesystem.", 0) \ diff --git a/src/Core/SettingsEnums.cpp b/src/Core/SettingsEnums.cpp index 26fcff1d410..3d5326ec0d6 100644 --- a/src/Core/SettingsEnums.cpp +++ b/src/Core/SettingsEnums.cpp @@ -163,7 +163,7 @@ IMPLEMENT_SETTING_ENUM(Dialect, ErrorCodes::BAD_ARGUMENTS, {{"clickhouse", Dialect::clickhouse}, {"kusto", Dialect::kusto}}) -IMPLEMENT_SETTING_ENUM(StorageFileReadMethod, ErrorCodes::BAD_ARGUMENTS, +IMPLEMENT_SETTING_ENUM(LocalFSReadMethod, ErrorCodes::BAD_ARGUMENTS, {{"mmap", LocalFSReadMethod::mmap}, {"pread", LocalFSReadMethod::pread}, {"read", LocalFSReadMethod::read}}) diff --git a/src/Core/SettingsEnums.h b/src/Core/SettingsEnums.h index 236bc7e9b10..8c66c7926a2 100644 --- a/src/Core/SettingsEnums.h +++ b/src/Core/SettingsEnums.h @@ -193,5 +193,5 @@ enum class Dialect DECLARE_SETTING_ENUM(Dialect) -DECLARE_SETTING_ENUM_WITH_RENAME(StorageFileReadMethod, LocalFSReadMethod) +DECLARE_SETTING_ENUM(LocalFSReadMethod) } diff --git a/src/IO/ReadBufferFromFileBase.cpp b/src/IO/ReadBufferFromFileBase.cpp index 7ea16d679bc..d94cf12294b 100644 --- a/src/IO/ReadBufferFromFileBase.cpp +++ b/src/IO/ReadBufferFromFileBase.cpp @@ -1,4 +1,3 @@ -#include #include #include diff --git a/src/IO/ReadBufferFromFileBase.h b/src/IO/ReadBufferFromFileBase.h index cc4a131b10b..b77db29bc23 100644 --- a/src/IO/ReadBufferFromFileBase.h +++ b/src/IO/ReadBufferFromFileBase.h @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include #include From 5cf1b1f61df98fb13b84db39a2e159b80061e1a6 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 10 Jan 2023 16:09:59 -0500 Subject: [PATCH 155/262] feedback --- docs/en/engines/table-engines/integrations/deltalake.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index 5ce044680d4..eb4d8e934a7 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -5,7 +5,7 @@ sidebar_label: DeltaLake # DeltaLake Table Engine -This engine provides a read-only integration with existing Delta Lake tables in Amazon S3. +This engine provides a read-only integration with existing [Delta Lake](https://github.com/delta-io/delta) tables in Amazon S3. ## Create Table From 75c04945bd5cd80328837a3ed3b9b28efd43f103 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 10 Jan 2023 16:18:50 -0500 Subject: [PATCH 156/262] spelling --- docs/en/sql-reference/table-functions/deltalake.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md index 6468e51d757..10e7c20e17a 100644 --- a/docs/en/sql-reference/table-functions/deltalake.md +++ b/docs/en/sql-reference/table-functions/deltalake.md @@ -1,6 +1,6 @@ --- slug: /en/sql-reference/table-functions/deltalake -sidebar_label: DeltLake +sidebar_label: DeltaLake --- # deltaLake Table Function From 563e0e76f929e1366971ab895db7230657eaf802 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Tue, 10 Jan 2023 16:59:34 -0500 Subject: [PATCH 157/262] init --- .../table-engines/integrations/hudi.md | 33 +++++++++++++++++++ docs/en/sql-reference/table-functions/hudi.md | 31 +++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 docs/en/engines/table-engines/integrations/hudi.md create mode 100644 docs/en/sql-reference/table-functions/hudi.md diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md new file mode 100644 index 00000000000..6da1634ba5a --- /dev/null +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -0,0 +1,33 @@ +--- +slug: /en/engines/table-engines/integrations/hudi +sidebar_label: Hudi +--- + +# Hudi Table Engine + +This engine provides a read-only integration with existing Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3. + +## Create Table + +Note that the Hudi table must already exist in S3, this command does not take DDL parameters to create a new table. + +``` sql +CREATE TABLE hudi_table + ENGINE = Hudi(path, [aws_access_key_id, aws_secret_access_key,]) +``` + +**Engine parameters** + +- `path` — Bucket url with the path to an existing Hudi table. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). + +**Example** + +```sql +CREATE TABLE hudi_table ENGINE=Hudi('http://mars-doc-test.s3.amazonaws.com/clickhouse-bucket-3/test_table/', 'ABC123', 'Abc+123') +``` + +## See also + +- [hudi table function](/docs/en/sql-reference/table-functions/hudi.md) + diff --git a/docs/en/sql-reference/table-functions/hudi.md b/docs/en/sql-reference/table-functions/hudi.md new file mode 100644 index 00000000000..c1ccd0cda2f --- /dev/null +++ b/docs/en/sql-reference/table-functions/hudi.md @@ -0,0 +1,31 @@ +--- +slug: /en/sql-reference/table-functions/hudi +sidebar_label: Hudi +--- + +# hudi Table Function + +Provides a read-only table-like interface to Apache [Hudi](https://hudi.apache.org/) tables in Amazon S3. + +## Syntax + +``` sql +hudi(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +``` + +## Arguments + +- `path` — Bucket url with the path to an existing Hudi table in S3. +- `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3). +- `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. +- `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. +- `compression` — Parameter is optional. Supported values: `none`, `gzip/gz`, `brotli/br`, `xz/LZMA`, `zstd/zst`. By default, compression will be autodetected by the file extension. + +**Returned value** + +A table with the specified structure for reading data in the specified Hudi table in S3. + +**See Also** + +- [Hudi engine](/docs/en/engines/table-engines/integrations/hudi.md) + From e75df5e76b9f4e5dcdd139e56e3cc2a282bc49a8 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 10 Jan 2023 22:14:54 +0000 Subject: [PATCH 158/262] Test + fix some review comments --- src/Storages/MergeTree/MergeTreeData.cpp | 48 ++++++++++--------- src/Storages/MergeTree/MergeTreeSettings.h | 1 + ...rrect_dealy_for_insert_bug_44902.reference | 6 +++ ...21_incorrect_dealy_for_insert_bug_44902.sh | 24 ++++++++++ 4 files changed, 57 insertions(+), 22 deletions(-) create mode 100644 tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference create mode 100755 tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 89d90011398..996da3f4b7e 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3739,7 +3739,7 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex toString(parts_count_in_total)); } - size_t outdated_parts_over_threshold = [&]() -> size_t + size_t outdated_parts_over_threshold = 0; { size_t outdated_parts_count_in_partition = 0; if (settings->inactive_parts_to_throw_insert > 0 || settings->inactive_parts_to_delay_insert > 0) @@ -3754,10 +3754,8 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex outdated_parts_count_in_partition); } if (settings->inactive_parts_to_delay_insert > 0 && outdated_parts_count_in_partition >= settings->inactive_parts_to_delay_insert) - return outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1; - - return 0; - }(); + outdated_parts_over_threshold = outdated_parts_count_in_partition - settings->inactive_parts_to_delay_insert + 1; + } auto [parts_count_in_partition, size_of_partition] = getMaxPartsCountAndSizeForPartition(); size_t average_part_size = parts_count_in_partition ? size_of_partition / parts_count_in_partition : 0; @@ -3765,50 +3763,56 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex = query_settings.parts_to_delay_insert ? query_settings.parts_to_delay_insert : settings->parts_to_delay_insert; const auto active_parts_to_throw_insert = query_settings.parts_to_throw_insert ? query_settings.parts_to_throw_insert : settings->parts_to_throw_insert; - size_t active_parts_over_threshold = [&](size_t parts_count) -> size_t + size_t active_parts_over_threshold = 0; { bool parts_are_large_enough_in_average = settings->max_avg_part_size_for_too_many_parts && average_part_size > settings->max_avg_part_size_for_too_many_parts; - if (parts_count >= active_parts_to_throw_insert && !parts_are_large_enough_in_average) + if (parts_count_in_partition >= active_parts_to_throw_insert && !parts_are_large_enough_in_average) { ProfileEvents::increment(ProfileEvents::RejectedInserts); throw Exception( ErrorCodes::TOO_MANY_PARTS, "Too many parts ({} with average size of {}). Merges are processing significantly slower than inserts", - parts_count, + parts_count_in_partition, ReadableSize(average_part_size)); } - if (active_parts_to_delay_insert > 0 && parts_count >= active_parts_to_delay_insert && !parts_are_large_enough_in_average) + if (active_parts_to_delay_insert > 0 && parts_count_in_partition >= active_parts_to_delay_insert + && !parts_are_large_enough_in_average) /// if parts_count == parts_to_delay_insert -> we're 1 part over threshold - return parts_count - active_parts_to_delay_insert + 1; - - return 0; - }(parts_count_in_partition); + active_parts_over_threshold = parts_count_in_partition - active_parts_to_delay_insert + 1; + } /// no need for delay if (!active_parts_over_threshold && !outdated_parts_over_threshold) return; - const UInt64 delay_milliseconds = [&]() -> UInt64 + UInt64 delay_milliseconds = 0; { - size_t parts_over_threshold = std::max(active_parts_over_threshold, outdated_parts_over_threshold); + size_t parts_over_threshold = 0; size_t allowed_parts_over_threshold = 1; if (active_parts_over_threshold >= outdated_parts_over_threshold) + { + parts_over_threshold = active_parts_over_threshold; allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert; + } else - allowed_parts_over_threshold - = (settings->inactive_parts_to_throw_insert > 0 - ? settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert - : outdated_parts_over_threshold); + { + parts_over_threshold = outdated_parts_over_threshold; + allowed_parts_over_threshold = outdated_parts_over_threshold; + if (settings->inactive_parts_to_throw_insert > 0) + allowed_parts_over_threshold = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert; + } - chassert(parts_over_threshold <= allowed_parts_over_threshold); + chassert(allowed_parts_over_threshold > 0 && parts_over_threshold <= allowed_parts_over_threshold); const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000); double delay_factor = static_cast(parts_over_threshold) / allowed_parts_over_threshold; + UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms; /// min() as a save guard here - return std::min(max_delay_milliseconds, static_cast(max_delay_milliseconds * delay_factor)); - }(); + delay_milliseconds = std::max( + min_delay_milliseconds, std::min(max_delay_milliseconds, static_cast(max_delay_milliseconds * delay_factor))); + } ProfileEvents::increment(ProfileEvents::DelayedInserts); ProfileEvents::increment(ProfileEvents::DelayedInsertsMilliseconds, delay_milliseconds); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 37e9bf5779c..0b8188f67c7 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -74,6 +74,7 @@ struct Settings; M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ M(UInt64, max_avg_part_size_for_too_many_parts, 10ULL * 1024 * 1024 * 1024, "The 'too many parts' check according to 'parts_to_delay_insert' and 'parts_to_throw_insert' will be active only if the average part size (in the relevant partition) is not larger than the specified threshold. If it is larger than the specified threshold, the INSERTs will be neither delayed or rejected. This allows to have hundreds of terabytes in a single table on a single server if the parts are successfully merged to larger parts. This does not affect the thresholds on inactive parts or total parts.", 0) \ M(UInt64, max_delay_to_insert, 1, "Max delay of inserting data into MergeTree table in seconds, if there are a lot of unmerged parts in single partition.", 0) \ + M(UInt64, min_delay_to_insert_ms, 10, "Min delay of inserting data into MergeTree table in milliseconds, if there are a lot of unmerged parts in single partition.", 0) \ M(UInt64, max_parts_in_total, 100000, "If more than this number active parts in all partitions in total, throw 'Too many parts ...' exception.", 0) \ \ /* Part removal settings. */ \ diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference new file mode 100644 index 00000000000..c104ff58aff --- /dev/null +++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.reference @@ -0,0 +1,6 @@ +0 +300 +500 +750 +1000 +TOO_MANY_PARTS diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh new file mode 100755 index 00000000000..6cbd77b262a --- /dev/null +++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS test_02521_insert_delay" +# Create MergeTree with settings which allow to insert maximum 5 parts, on 6th it'll throw TOO_MANY_PARTS +$CLICKHOUSE_CLIENT -q "CREATE TABLE test_02521_insert_delay (key UInt32, value String) Engine=MergeTree() ORDER BY tuple() SETTINGS parts_to_delay_insert=1, parts_to_throw_insert=5, max_delay_to_insert=1, min_delay_to_insert_ms=300" +$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES test_02521_insert_delay" + +# Every delay is increased by max_delay_to_insert*1000/(parts_to_throw_insert - parts_to_delay_insert + 1), here it's 250ms +# 0-indexed INSERT - no delay, 1-indexed INSERT - 300ms instead of 250ms due to min_delay_to_insert_ms +for i in {0..4} +do + query_id="${CLICKHOUSE_DATABASE}_02521_${i}_$RANDOM$RANDOM" + $CLICKHOUSE_CLIENT --query_id="$query_id" -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)" + $CLICKHOUSE_CLIENT -q "system flush logs" + $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select ProfileEvents['DelayedInsertsMilliseconds'] as delay from system.query_log where event_date >= yesterday() and query_id = {query_id:String} order by delay desc limit 1" +done + +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS' + +$CLICKHOUSE_CLIENT -q "DROP TABLE test_02521_insert_delay" From a704cf804a222da2c9c1b9a2219659e7fbe7cff7 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Tue, 10 Jan 2023 22:17:28 +0000 Subject: [PATCH 159/262] fix --- tests/ci/sqlancer_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ci/sqlancer_check.py b/tests/ci/sqlancer_check.py index 5b268141484..0e328122b9d 100644 --- a/tests/ci/sqlancer_check.py +++ b/tests/ci/sqlancer_check.py @@ -30,7 +30,7 @@ IMAGE_NAME = "clickhouse/sqlancer-test" def get_pull_command(docker_image): - return f"docker pull --network=host {docker_image}" + return f"docker pull {docker_image}" def get_run_command(download_url, workspace_path, image): From 6027b8ee4df1d2327a9b97d712fbd4e9e12fb2a8 Mon Sep 17 00:00:00 2001 From: Dmitry Novik Date: Tue, 10 Jan 2023 23:23:07 +0000 Subject: [PATCH 160/262] Remove redundant code --- .../Passes/AggregateFunctionsArithmericOperationsPass.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index c25cff117d2..01072e0b3fc 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -156,7 +156,6 @@ private: { argument->getResultType() }, function_aggregate_function->getParameters(), properties); - auto function_result_type = aggregate_function->getReturnType(); function_node.resolveAsAggregateFunction(std::move(aggregate_function)); } From 9b16b3f48f09d949881ef00ad28a6f466662e334 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 11 Jan 2023 01:03:01 +0100 Subject: [PATCH 161/262] Fix potential memory leak --- contrib/azure | 2 +- src/CMakeLists.txt | 5 ++++ src/Disks/tests/gtest_azure_xml_reader.cpp | 27 ++++++++++++++++++++++ 3 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 src/Disks/tests/gtest_azure_xml_reader.cpp diff --git a/contrib/azure b/contrib/azure index ef75afc075f..000f7ee8fd2 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit ef75afc075fc71fbcd8fe28dcda3794ae265fd1c +Subproject commit 000f7ee8fd22fa69e5ddb8fd6fd36b12c7a1bc2f diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8a7dba72ac..b20b4a860d3 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -606,5 +606,10 @@ if (ENABLE_TESTS) target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::yaml_cpp) endif() + if (TARGET ch_contrib::azure_sdk) + target_link_libraries(unit_tests_dbms PRIVATE ch_contrib::azure_sdk) + endif() + + add_check(unit_tests_dbms) endif () diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp new file mode 100644 index 00000000000..3caf34f938a --- /dev/null +++ b/src/Disks/tests/gtest_azure_xml_reader.cpp @@ -0,0 +1,27 @@ +#include +#include +#include + +#include "config.h" + +#if USE_AZURE_BLOB_STORAGE + +#include +#include + +#include + + +TEST(AzureXMLWrapper, TestLeak) +{ + std::string str = "world"; + + { + Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length()); + reader.Read(); + Azure::Storage::_internal::XmlReader reader2(std::move(reader)); + Azure::Storage::_internal::XmlReader reader3 = std::move(reader2); + } +} + +#endif From be4d79e92426e7d0ac899698986d2e08d7d07c3a Mon Sep 17 00:00:00 2001 From: Zhiguo Zhou Date: Wed, 14 Dec 2022 13:25:05 +0800 Subject: [PATCH 162/262] Deallocate memory of profile events out of critical section To further shrink the critical section for releasing memory of the profile events (ProfileEventsCountersAndMemory), this commit puts the dealloaction out of the critical section while keeping the memory move under lock. This change could mitigate the contention for ThreadGroupStatus::mutex. --- src/Interpreters/ThreadStatusExt.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/ThreadStatusExt.cpp b/src/Interpreters/ThreadStatusExt.cpp index e96a8a4b188..4b757e0be7e 100644 --- a/src/Interpreters/ThreadStatusExt.cpp +++ b/src/Interpreters/ThreadStatusExt.cpp @@ -342,11 +342,14 @@ void ThreadStatus::detachQuery(bool exit_if_already_detached, bool thread_exits) query_id.clear(); query_context.reset(); + /// The memory of thread_group->finished_threads_counters_memory is temporarily moved to this vector, which is deallocated out of critical section. + std::vector move_to_temp; + /// Avoid leaking of ThreadGroupStatus::finished_threads_counters_memory /// (this is in case someone uses system thread but did not call getProfileEventsCountersAndMemoryForThreads()) { std::lock_guard guard(thread_group->mutex); - auto stats = std::move(thread_group->finished_threads_counters_memory); + move_to_temp = std::move(thread_group->finished_threads_counters_memory); } thread_group.reset(); From 67943676630fb45e8c68b5dfbe199c80031a2cbb Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 11 Jan 2023 10:38:17 +0800 Subject: [PATCH 163/262] fix uts --- .../0_stateless/00189_time_zones_long.sql | 10 +- ...00921_datetime64_compatibility_long.python | 136 +++++++++--------- 2 files changed, 77 insertions(+), 69 deletions(-) diff --git a/tests/queries/0_stateless/00189_time_zones_long.sql b/tests/queries/0_stateless/00189_time_zones_long.sql index cf1b9e9ae1d..5760f6c0447 100644 --- a/tests/queries/0_stateless/00189_time_zones_long.sql +++ b/tests/queries/0_stateless/00189_time_zones_long.sql @@ -120,11 +120,11 @@ SELECT toDayOfMonth(toDateTime(1412106600), 'Pacific/Pitcairn'); /* toDayOfWeek */ SELECT 'toDayOfWeek'; -SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Istanbul'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/Paris'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Europe/London'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Asia/Tokyo'); -SELECT toDayOfWeek(toDateTime(1412106600), 'Pacific/Pitcairn'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Istanbul'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/Paris'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Europe/London'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Asia/Tokyo'); +SELECT toDayOfWeek(toDateTime(1412106600), 0, 'Pacific/Pitcairn'); /* toHour */ diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python index e3cd7ee6d36..2706c0f5b12 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.python +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.python @@ -7,14 +7,14 @@ import sys import argparse # Create SQL statement to verify dateTime64 is accepted as argument to functions taking DateTime. -FUNCTIONS=""" +FUNCTIONS = """ toTimeZone(N, 'UTC') toYear(N, 'Asia/Istanbul') toQuarter(N, 'Asia/Istanbul') toMonth(N, 'Asia/Istanbul') toDayOfYear(N, 'Asia/Istanbul') toDayOfMonth(N, 'Asia/Istanbul') -toDayOfWeek(N, 'Asia/Istanbul') +toDayOfWeek(N, 0, 'Asia/Istanbul') toHour(N, 'Asia/Istanbul') toMinute(N, 'Asia/Istanbul') toSecond(N, 'Asia/Istanbul') @@ -90,68 +90,51 @@ formatDateTime(N, '%C %d %D %e %F %H %I %j %m %M %p %R %S %T %u %V %w %y %Y %%', extra_ops = [ # With same type: ( - ['N {op} N'], + ["N {op} N"], { - 'op': - [ - '- ', # does not work, but should it? - '+ ', # does not work, but should it? - '!=', '==', # equality and inequality supposed to take sub-second part in account - '< ', - '<=', - '> ', - '>=' + "op": [ + "- ", # does not work, but should it? + "+ ", # does not work, but should it? + "!=", + "==", # equality and inequality supposed to take sub-second part in account + "< ", + "<=", + "> ", + ">=", ] - } + }, ), # With other DateTime types: ( - [ - 'N {op} {arg}', - '{arg} {op} N' - ], + ["N {op} {arg}", "{arg} {op} N"], { - 'op': - [ - '-', # does not work, but should it? - '!=', '==', + "op": [ + "-", # does not work, but should it? + "!=", + "==", # these are naturally expected to work, but they don't: - '< ', - '<=', - '> ', - '>=' + "< ", + "<=", + "> ", + ">=", ], - 'arg': ['DT', 'D', 'DT64'], - } + "arg": ["DT", "D", "DT64"], + }, ), # With arithmetic types ( - [ - 'N {op} {arg}', - '{arg} {op} N' - ], + ["N {op} {arg}", "{arg} {op} N"], { - 'op': - [ - '+ ', - '- ', - '==', - '!=', - '< ', - '<=', - '> ', - '>=' - ], - 'arg': - [ - 'toUInt8(1)', - 'toInt8(-1)', - 'toUInt16(1)', - 'toInt16(-1)', - 'toUInt32(1)', - 'toInt32(-1)', - 'toUInt64(1)', - 'toInt64(-1)' + "op": ["+ ", "- ", "==", "!=", "< ", "<=", "> ", ">="], + "arg": [ + "toUInt8(1)", + "toInt8(-1)", + "toUInt16(1)", + "toInt16(-1)", + "toUInt32(1)", + "toInt32(-1)", + "toUInt64(1)", + "toInt64(-1)", ], }, ), @@ -167,14 +150,17 @@ for funcs, args in extra_ops: # filter out empty lines and commented out lines COMMENTED_OUT_LINE_RE = re.compile(r"^\s*#") -FUNCTIONS = list([f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None]) -TYPES = ['D', 'DT', 'DT64'] +FUNCTIONS = list( + [f for f in FUNCTIONS if len(f) != 0 and COMMENTED_OUT_LINE_RE.match(f) == None] +) +TYPES = ["D", "DT", "DT64"] + def escape_string(s): if sys.version_info[0] > 2: - return s.encode('unicode_escape').decode('utf-8').replace("'", "\\'") + return s.encode("unicode_escape").decode("utf-8").replace("'", "\\'") else: - return s.encode('string-escape').decode('utf-8') + return s.encode("string-escape").decode("utf-8") def execute_functions_for_types(functions, types): @@ -186,18 +172,39 @@ def execute_functions_for_types(functions, types): WITH \ toDateTime64('2019-09-16 19:20:11.234', 3, 'Europe/Minsk') as DT64, \ toDateTime('2019-09-16 19:20:11', 'Europe/Minsk') as DT, \ -toDate('2019-09-16') as D, {X} as N".format(X=dt) - print(("""{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format(prologue=prologue, func=func))) +toDate('2019-09-16') as D, {X} as N".format( + X=dt + ) + print( + ( + """{prologue} SELECT toTypeName(r), {func} as r FORMAT CSV;""".format( + prologue=prologue, func=func + ) + ) + ) print("""SELECT '------------------------------------------';""") + def main(): def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('--functions_re', type=re.compile, help="RE to enable functions", default=None) - parser.add_argument('--types_re', - type=lambda s: re.compile('^(' + s + ')$'), - help="RE to enable types, supported types: " + ",".join(TYPES), default=None) - parser.add_argument('--list_functions', action='store_true', help="List all functions to be tested and exit") + parser.add_argument( + "--functions_re", + type=re.compile, + help="RE to enable functions", + default=None, + ) + parser.add_argument( + "--types_re", + type=lambda s: re.compile("^(" + s + ")$"), + help="RE to enable types, supported types: " + ",".join(TYPES), + default=None, + ) + parser.add_argument( + "--list_functions", + action="store_true", + help="List all functions to be tested and exit", + ) return parser.parse_args() args = parse_args() @@ -223,5 +230,6 @@ def main(): execute_functions_for_types(functions, types) -if __name__ == '__main__': + +if __name__ == "__main__": exit(main()) From 1167ae47e95fcc80da5b793c1dddf004b0e9ef0b Mon Sep 17 00:00:00 2001 From: MeenaRenganathan22 Date: Tue, 10 Jan 2023 20:56:16 -0800 Subject: [PATCH 164/262] Changes to support the CRC32 in PowerPC to address the WeakHash collision issue. Update the reference to support the hash values based on the specific platform --- .gitmodules | 3 + contrib/CMakeLists.txt | 1 + contrib/crc32-vpmsum | 1 + contrib/crc32-vpmsum-cmake/CMakeLists.txt | 12 + contrib/crc32-vpmsum-cmake/README.md | 8 + contrib/crc32-vpmsum-cmake/crc32_constants.h | 1206 +++++++++++++++++ contrib/crc32-vpmsum-cmake/vec_crc32.h | 29 + src/CMakeLists.txt | 4 + src/Common/HashTable/Hash.h | 8 + src/Functions/CMakeLists.txt | 4 + src/Functions/FunctionsStringHash.cpp | 12 + src/Functions/FunctionsStringSimilarity.cpp | 6 + .../01016_simhash_minhash.ppc64le.reference | 148 ++ .../0_stateless/01016_simhash_minhash.python | 394 ++++++ .../0_stateless/01016_simhash_minhash.sh | 8 + .../0_stateless/01016_simhash_minhash.sql | 115 -- ...=> 01016_simhash_minhash.x86_64.reference} | 0 17 files changed, 1844 insertions(+), 115 deletions(-) create mode 160000 contrib/crc32-vpmsum create mode 100644 contrib/crc32-vpmsum-cmake/CMakeLists.txt create mode 100644 contrib/crc32-vpmsum-cmake/README.md create mode 100644 contrib/crc32-vpmsum-cmake/crc32_constants.h create mode 100644 contrib/crc32-vpmsum-cmake/vec_crc32.h create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.python create mode 100755 tests/queries/0_stateless/01016_simhash_minhash.sh delete mode 100644 tests/queries/0_stateless/01016_simhash_minhash.sql rename tests/queries/0_stateless/{01016_simhash_minhash.reference => 01016_simhash_minhash.x86_64.reference} (100%) diff --git a/.gitmodules b/.gitmodules index 26824cb57ff..b4673f113b7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -327,3 +327,6 @@ [submodule "contrib/aws-s2n-tls"] path = contrib/aws-s2n-tls url = https://github.com/ClickHouse/s2n-tls +[submodule "contrib/crc32-vpmsum"] + path = contrib/crc32-vpmsum + url = https://github.com/antonblanchard/crc32-vpmsum.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 27b4a7ddb5c..f5d1315cc02 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -55,6 +55,7 @@ else () endif () add_contrib (miniselect-cmake miniselect) add_contrib (pdqsort-cmake pdqsort) +add_contrib (crc32-vpmsum-cmake crc32-vpmsum) add_contrib (sparsehash-c11-cmake sparsehash-c11) add_contrib (abseil-cpp-cmake abseil-cpp) add_contrib (magic-enum-cmake magic_enum) diff --git a/contrib/crc32-vpmsum b/contrib/crc32-vpmsum new file mode 160000 index 00000000000..45215543938 --- /dev/null +++ b/contrib/crc32-vpmsum @@ -0,0 +1 @@ +Subproject commit 452155439389311fc7d143621eaf56a258e02476 diff --git a/contrib/crc32-vpmsum-cmake/CMakeLists.txt b/contrib/crc32-vpmsum-cmake/CMakeLists.txt new file mode 100644 index 00000000000..bb7d5618410 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt @@ -0,0 +1,12 @@ +if (NOT ARCH_PPC64LE) + message(STATUS "crc32-vpmsum library is only supported on ppc64le") + return() +endif() + +SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/crc32-vpmsum") + +add_library(_crc32-vpmsum + "${LIBRARY_DIR}/vec_crc32.c" + ) +target_include_directories(_crc32-vpmsum SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") +add_library(ch_contrib::crc32-vpmsum ALIAS _crc32-vpmsum) diff --git a/contrib/crc32-vpmsum-cmake/README.md b/contrib/crc32-vpmsum-cmake/README.md new file mode 100644 index 00000000000..9ea8133e331 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/README.md @@ -0,0 +1,8 @@ +# To Generate crc32_constants.h + +- Run make file in `../crc32-vpmsum` diretory using folling options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions +```bash +make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c" +``` +- move the generated `crc32_constants.h` into this directory +- To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/ diff --git a/contrib/crc32-vpmsum-cmake/crc32_constants.h b/contrib/crc32-vpmsum-cmake/crc32_constants.h new file mode 100644 index 00000000000..aea525c9038 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/crc32_constants.h @@ -0,0 +1,1206 @@ +/* +* +* THIS FILE IS GENERATED WITH +./crc32_constants -x -r -c 0x11EDC6F41 + +* This is from https://github.com/antonblanchard/crc32-vpmsum/ +* DO NOT MODIFY IT MANUALLY! +* +*/ + +#define CRC 0x1edc6f41 +#define CRC_XOR +#define REFLECT +#define MAX_SIZE 32768 + +#ifndef __ASSEMBLER__ +#ifdef CRC_TABLE +static const unsigned int crc_table[] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,}; + +#endif /* CRC_TABLE */ +#ifdef POWER8_INTRINSICS + +/* Constants */ + +/* Reduce 262144 kbits to 1024 bits */ +static const __vector unsigned long long vcrc_const[255] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x000000009c37c408, 0x00000000b6ca9e20 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000001b51df26c, 0x00000000350249a8 }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x000000000724b9d0, 0x00000001862dac54 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001c00532fe, 0x00000001d87fb48c }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000000f05a9362, 0x00000001f39b699e }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x00000001e1007970, 0x0000000101da11b4 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000000a57366ee, 0x00000001cab571e0 }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x0000000192011284, 0x00000000c7020cfe }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x0000000162716d9a, 0x00000000cdaed1ae }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x00000000cd97ecde, 0x00000001e804effc }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x0000000058812bc0, 0x0000000077c3ea3a }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x0000000088b8c12e, 0x0000000068df31b4 }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000001230b234c, 0x00000000b059b6c2 }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001120b416e, 0x0000000145fb8ed8 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x00000001974aecb0, 0x00000000cbc09168 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000008ee3f226, 0x000000005ceeedc2 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x00000001089aba9a, 0x0000000047d74e86 }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x0000000065113872, 0x00000001407e9e22 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x000000005c07ec10, 0x00000001da967bda }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x0000000187590924, 0x000000006c898368 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000000e35da7c6, 0x00000000f2d14c98 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x000000000415855a, 0x00000001993c6ad4 }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x0000000073617758, 0x000000014683d1ac }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x0000000176021d28, 0x00000001a7c93e6c }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000001c358fd0a, 0x000000010211e90a }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x00000001ff7a2c18, 0x000000001119403e }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000000f2d9f7e4, 0x000000001c3261aa }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x000000016cf1f9c8, 0x000000014e37a634 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x000000010af9279a, 0x0000000073786c0c }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x0000000004f101e8, 0x000000011dc037f8 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000070bcf184, 0x0000000031433dfc }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000000a8de642, 0x000000009cde8348 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000062ea130c, 0x0000000038d3c2a6 }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x00000001eb31cbb2, 0x000000011b25f260 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x0000000170783448, 0x000000001629e6f0 }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000001a684b4c6, 0x0000000160838b4c }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x00000000253ca5b4, 0x000000007a44011c }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x0000000057b4b1e2, 0x00000000226f417a }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x00000000b6bd084c, 0x0000000045eb2eb4 }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x0000000123c2d592, 0x000000014459d70c }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000000159dafce, 0x00000001d406ed82 }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000127e1a64e, 0x0000000160c8e1a8 }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x0000000056860754, 0x0000000027ba8098 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x00000001e661aae8, 0x000000006d92d018 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x00000000f82c6166, 0x000000012ed7e3f2 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000000c4f9c7ae, 0x000000002dc87788 }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000074203d20, 0x0000000018240bb8 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x0000000198173052, 0x000000001ad38158 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001ce8aba54, 0x00000001396b78f2 }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x00000001850d5d94, 0x000000011a681334 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x00000001d609239c, 0x000000012104732e }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x000000001595f048, 0x00000000a140d90c }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x0000000042ccee08, 0x00000001b7215eda }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x000000010a389d74, 0x00000001aaf1df3c }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x000000012a840da6, 0x0000000029d15b8a }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x000000001d181c0c, 0x00000000f1a96922 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x0000000068b7d1f6, 0x00000001ac80d03c }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x000000005b0f14fc, 0x000000000f11d56a }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x0000000179e9e730, 0x00000001f1c022a2 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x00000001ce1368d6, 0x0000000173d00ae2 }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x0000000112c3a84c, 0x00000001d4ffe4ac }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x00000000de940fee, 0x000000016edc5ae4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000000fe896b7e, 0x00000001f1a02140 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000001f797431c, 0x00000000ca0b28a0 }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x0000000053e989ba, 0x00000001928e30a2 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x000000003920cd16, 0x0000000097b1b002 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000001e6f579b8, 0x00000000b15bf906 }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x000000007493cb0a, 0x00000000411c5d52 }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x00000001bdd376d8, 0x00000001c36f3300 }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x000000016badfee6, 0x00000001119227e0 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x0000000071de5c58, 0x00000000114d4702 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000000453f317c, 0x00000000458b5b98 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000121675cce, 0x000000012e31fb8e }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x00000001f409ee92, 0x000000005cf619d8 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x00000000f36b9c88, 0x0000000063f4d8b2 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x0000000036b398f4, 0x000000004138dc8a }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001748f9adc, 0x00000001d29ee8e0 }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x00000001be94ec00, 0x000000006a08ace8 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x00000000b74370d6, 0x0000000127d42010 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x00000001174d0b98, 0x0000000019d76b62 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000000befc06a4, 0x00000001b1471f6e }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x00000001ae125288, 0x00000001f64c19cc }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x0000000095c19b34, 0x00000000003c0ea0 }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x00000001a78496f2, 0x000000014d73abf6 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x00000001ac5390a0, 0x00000001620eb844 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x000000002a80ed6e, 0x0000000147655048 }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x00000001fa9b0128, 0x0000000067b5077e }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x00000001ea94929e, 0x0000000010ffe206 }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x0000000125f4305c, 0x000000000fee8f1e }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001471e2002, 0x00000001da26fbae }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x0000000132d2253a, 0x00000001b3a8bd88 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x00000000f26b3592, 0x00000000e8f3898e }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x00000000bc8b67b0, 0x00000000b0d0d28c }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000013a826ef2, 0x0000000030f2a798 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x0000000081482c84, 0x000000000fba1002 }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000e77307c2, 0x00000000bdb9bd72 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x00000000d4a07ec8, 0x0000000075d3bf5a }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x0000000017102100, 0x00000000ef1f98a0 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000000db406486, 0x00000000689c7602 }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x0000000192db7f88, 0x000000016d5fa5fe }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x000000018bf67b1e, 0x00000001d0d2b9ca }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x000000007c09163e, 0x0000000041e7b470 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x000000000adac060, 0x00000001cbb6495e }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x00000000bd8316ae, 0x000000010052a0b0 }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x000000019f09ab54, 0x00000001d8effb5c }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x0000000125155542, 0x00000001d969853c }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x000000018fdb5882, 0x00000000523ccce2 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x00000000e794b3f4, 0x000000001e2436bc }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x000000016f9bb022, 0x00000000ddd1c3a2 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x00000000290c9978, 0x0000000019fcfe38 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x0000000083c0f350, 0x00000001ce95db64 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x0000000173ea6628, 0x00000000af582806 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000001c8b4e00a, 0x00000001006388f6 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x00000000de95d6aa, 0x0000000179eca00a }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000010b7f7248, 0x0000000122410a6a }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x00000001326e3a06, 0x000000004288e87c }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x00000000bb62c2e6, 0x000000016c5490da }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x0000000156a4b2c2, 0x00000000d1c71f6e }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x000000011dfe763a, 0x00000001b4ce08a6 }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x000000007bcca8e2, 0x00000001466ba60c }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x0000000186118faa, 0x00000001f6c488a4 }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x0000000111a65a88, 0x000000013bfb0682 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x000000003565e1c4, 0x00000000690e9e54 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x000000012ed02a82, 0x00000000281346b6 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x00000000c486ecfc, 0x0000000156464024 }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x0000000001b951b2, 0x000000016063a8dc }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000048143916, 0x0000000116a66362 }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x00000001dc2ae124, 0x000000017e8aa4d2 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x00000001416c58d6, 0x00000001728eb10c }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000000a479744a, 0x00000001b08fd7fa }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x0000000096ca3a26, 0x00000001092a16e8 }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000000ff223d4e, 0x00000000a505637c }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x000000010e84da42, 0x00000000d94869b2 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000001b61ba3d0, 0x00000001c8b203ae }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000000680f2de8, 0x000000005704aea0 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x000000008772a9a8, 0x000000012e295fa2 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x0000000155f295bc, 0x000000011d0908bc }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x00000000595f9282, 0x0000000193ed97ea }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x0000000164b1c25a, 0x000000013a0f1c52 }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x00000000fbd67c50, 0x000000010c2c40c0 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x0000000096076268, 0x00000000ff6fac3e }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x00000001d288e4cc, 0x000000017b3609c0 }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x00000001eaac1bdc, 0x0000000088c8c922 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000001f1ea39e2, 0x00000001751baae6 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x00000001eb6506fc, 0x0000000107952972 }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x000000010f806ffe, 0x0000000162b00abe }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000010408481e, 0x000000000d7b404c }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x0000000188260534, 0x00000000763b13d4 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x0000000058fc73e0, 0x00000000f6dc22d8 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x00000000391c59b8, 0x000000007daae060 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000018b638400, 0x000000013359ab7c }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x000000011738f5c4, 0x000000008add438a }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x000000008cf7c6da, 0x00000001edbefdea }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x00000001ef97fb16, 0x000000004104e0f8 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x0000000102130e20, 0x00000000b48a8222 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000db968898, 0x00000001bcb46844 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x00000000b5047b5e, 0x000000013293ce0a }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x000000010b90fdb2, 0x00000001710d0844 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x000000004834a32e, 0x0000000117907f6e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000059c8f2b0, 0x0000000087ddf93e }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x0000000122cec508, 0x000000005970e9b0 }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x000000000a330cda, 0x0000000185b2b7d0 }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x000000014a47148c, 0x00000001dcee0efc }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000042c61cb8, 0x0000000030da2722 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x0000000012fe6960, 0x000000012f925a18 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x00000000dbda2c20, 0x00000000dd2e357c }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x000000011122410c, 0x00000000071c80de }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000977b2070, 0x000000011513140a }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x000000014050438e, 0x00000001df876e8e }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x0000000147c840e8, 0x000000015f81d6ce }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000001cc7c88ce, 0x000000019dd94dbe }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000001476b35a4, 0x00000001373d206e }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x000000013d52d508, 0x00000000668ccade }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x000000008e4be32e, 0x00000001b192d268 }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000024120fe, 0x00000000e30f3a78 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x00000000ddecddb4, 0x000000010ef1f7bc }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000000d4d403bc, 0x00000001f5ac7380 }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000001734b89aa, 0x000000011822ea70 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x000000010e7a58d6, 0x00000000c3a33848 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x00000001f9f04e9c, 0x00000001bd151c24 }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x00000000b692225e, 0x0000000056002d76 }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000019b8d3f3e, 0x000000014657c4f4 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x00000001a874f11e, 0x0000000113742d7c }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000010d5a4254, 0x000000019c5920ba }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000000bbb2f5d6, 0x000000005216d2d6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x0000000179cc0e36, 0x0000000136f5ad8a }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x00000001dca1da4a, 0x000000018b07beb6 }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x00000000feb1a192, 0x00000000db1e93b0 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x00000000d1eeedd6, 0x000000000b96fa3a }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x000000008fad9bb4, 0x00000001d9968af0 }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x00000001884938e4, 0x000000000e4a77a2 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000001bc2e9bc0, 0x00000000508c2ac8 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x00000001f9658a68, 0x0000000021572a80 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x000000001b9224fc, 0x00000001b859daf2 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x0000000055b2fb84, 0x000000016f788474 }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x000000018b090348, 0x00000001b438810e }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x000000011ccbd5ea, 0x0000000095ddc6f2 }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x0000000007ae47f8, 0x00000001d977c20c }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x0000000172acbec0, 0x00000000ebedb99a }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001c6e3ff20, 0x00000001df9e9e92 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000000e1b38744, 0x00000001a4a3f952 }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x00000000791585b2, 0x00000000e2f51220 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x00000000ac53b894, 0x000000004aa01f3e }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000001ed5f2cf4, 0x00000000b3e90a58 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x00000001df48b2e0, 0x000000000c9ca2aa }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x00000000049c1c62, 0x0000000151682316 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x000000017c460c12, 0x0000000036fce78c }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x000000015be4da7e, 0x000000009037dc10 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x000000010f38f668, 0x00000000d3298582 }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x0000000039f40a00, 0x00000001b42e8ad6 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x00000000bd4c10c4, 0x00000000142a9838 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x0000000042db1d98, 0x0000000109c7f190 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000001c905bae6, 0x0000000056ff9310 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000000069d40ea, 0x00000001594513aa }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x000000008e4fbad0, 0x00000001e3b5b1e8 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000047bedd46, 0x000000011dd5fc08 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x0000000026396bf8, 0x00000001675f0cc2 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000379beb92, 0x00000000d1c8dd44 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x000000000abae54a, 0x0000000115ebd3d8 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x0000000007e6a128, 0x00000001ecbd0dac }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x000000000ade29d2, 0x00000000cdf67af2 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x00000000f974c45c, 0x000000004c01ff4c }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000000e77ac60a, 0x00000000f2d8657e }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x0000000145895816, 0x000000006bae74c4 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000038e362be, 0x0000000152af8aa0 }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x000000007f991a64, 0x0000000004663802 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000000fa366d3a, 0x00000001ab2f5afc }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x00000001a2bb34f0, 0x0000000074a4ebd4 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x0000000028a9981e, 0x00000001d7ab3a4c }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001dbc672be, 0x00000001a8da60c6 }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x00000000b04d77f6, 0x000000013cf63820 }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x0000000124400d96, 0x00000000bec12e1e }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x000000014ca4b414, 0x00000001c6368010 }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x000000012fe2c938, 0x00000001e6e78758 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x00000001faed01e6, 0x000000008d7f2b3c }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x000000007e80ecfe, 0x000000016b4a156e }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x0000000098daee94, 0x00000001c63cfeb6 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000010a04edea, 0x000000015f902670 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001c00b4524, 0x00000001cd5de11e }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x0000000170296550, 0x000000001acaec54 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x0000000181afaa48, 0x000000002bd0ca78 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x0000000185a31ffa, 0x0000000032d63d5c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000002469f608, 0x000000001c6d4e4c }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000006980102a, 0x0000000106a60b92 }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x0000000111ea9ca8, 0x00000000d3855e12 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000001bd1d29ce, 0x00000000e3125636 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x00000001b34b9580, 0x000000009e8f7ea4 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x000000003076054e, 0x00000001c82e562c }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x000000012a608ea4, 0x00000000ca9f09ce }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000784d05fe, 0x00000000c63764e6 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x000000016ef0d82a, 0x0000000168d2e49e }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x0000000075bda454, 0x00000000e986c148 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000003dc0a1c4, 0x00000000cfb65894 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000000e9a5d8be, 0x0000000111cadee4 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x00000001609bc4b4, 0x0000000171fb63ce } +#else /* __LITTLE_ENDIAN__ */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x00000000b6ca9e20, 0x000000009c37c408 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000000350249a8, 0x00000001b51df26c }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x00000001862dac54, 0x000000000724b9d0 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001d87fb48c, 0x00000001c00532fe }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000001f39b699e, 0x00000000f05a9362 }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x0000000101da11b4, 0x00000001e1007970 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000001cab571e0, 0x00000000a57366ee }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000000c7020cfe, 0x0000000192011284 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x00000000cdaed1ae, 0x0000000162716d9a }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x00000001e804effc, 0x00000000cd97ecde }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x0000000077c3ea3a, 0x0000000058812bc0 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x0000000068df31b4, 0x0000000088b8c12e }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000000b059b6c2, 0x00000001230b234c }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x0000000145fb8ed8, 0x00000001120b416e }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x00000000cbc09168, 0x00000001974aecb0 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000005ceeedc2, 0x000000008ee3f226 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x0000000047d74e86, 0x00000001089aba9a }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x00000001407e9e22, 0x0000000065113872 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x00000001da967bda, 0x000000005c07ec10 }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x000000006c898368, 0x0000000187590924 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000000f2d14c98, 0x00000000e35da7c6 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x00000001993c6ad4, 0x000000000415855a }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x000000014683d1ac, 0x0000000073617758 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x00000001a7c93e6c, 0x0000000176021d28 }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x000000010211e90a, 0x00000001c358fd0a }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x000000001119403e, 0x00000001ff7a2c18 }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x000000001c3261aa, 0x00000000f2d9f7e4 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x000000014e37a634, 0x000000016cf1f9c8 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000073786c0c, 0x000000010af9279a }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x000000011dc037f8, 0x0000000004f101e8 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000031433dfc, 0x0000000070bcf184 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000009cde8348, 0x000000000a8de642 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000038d3c2a6, 0x0000000062ea130c }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x000000011b25f260, 0x00000001eb31cbb2 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x000000001629e6f0, 0x0000000170783448 }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x0000000160838b4c, 0x00000001a684b4c6 }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x000000007a44011c, 0x00000000253ca5b4 }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x00000000226f417a, 0x0000000057b4b1e2 }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x0000000045eb2eb4, 0x00000000b6bd084c }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x000000014459d70c, 0x0000000123c2d592 }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000001d406ed82, 0x00000000159dafce }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000160c8e1a8, 0x0000000127e1a64e }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x0000000027ba8098, 0x0000000056860754 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x000000006d92d018, 0x00000001e661aae8 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x000000012ed7e3f2, 0x00000000f82c6166 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x000000002dc87788, 0x00000000c4f9c7ae }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000018240bb8, 0x0000000074203d20 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x000000001ad38158, 0x0000000198173052 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001396b78f2, 0x00000001ce8aba54 }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x000000011a681334, 0x00000001850d5d94 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x000000012104732e, 0x00000001d609239c }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x00000000a140d90c, 0x000000001595f048 }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000001b7215eda, 0x0000000042ccee08 }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x00000001aaf1df3c, 0x000000010a389d74 }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x0000000029d15b8a, 0x000000012a840da6 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x00000000f1a96922, 0x000000001d181c0c }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x00000001ac80d03c, 0x0000000068b7d1f6 }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x000000000f11d56a, 0x000000005b0f14fc }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x00000001f1c022a2, 0x0000000179e9e730 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x0000000173d00ae2, 0x00000001ce1368d6 }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000001d4ffe4ac, 0x0000000112c3a84c }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000016edc5ae4, 0x00000000de940fee }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000001f1a02140, 0x00000000fe896b7e }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000000ca0b28a0, 0x00000001f797431c }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x00000001928e30a2, 0x0000000053e989ba }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x0000000097b1b002, 0x000000003920cd16 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000000b15bf906, 0x00000001e6f579b8 }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000000411c5d52, 0x000000007493cb0a }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x00000001c36f3300, 0x00000001bdd376d8 }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x00000001119227e0, 0x000000016badfee6 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x00000000114d4702, 0x0000000071de5c58 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000000458b5b98, 0x00000000453f317c }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x000000012e31fb8e, 0x0000000121675cce }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x000000005cf619d8, 0x00000001f409ee92 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x0000000063f4d8b2, 0x00000000f36b9c88 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000004138dc8a, 0x0000000036b398f4 }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001d29ee8e0, 0x00000001748f9adc }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x000000006a08ace8, 0x00000001be94ec00 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x0000000127d42010, 0x00000000b74370d6 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x0000000019d76b62, 0x00000001174d0b98 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000001b1471f6e, 0x00000000befc06a4 }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x00000001f64c19cc, 0x00000001ae125288 }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x00000000003c0ea0, 0x0000000095c19b34 }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x000000014d73abf6, 0x00000001a78496f2 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x00000001620eb844, 0x00000001ac5390a0 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x0000000147655048, 0x000000002a80ed6e }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x0000000067b5077e, 0x00000001fa9b0128 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x0000000010ffe206, 0x00000001ea94929e }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x000000000fee8f1e, 0x0000000125f4305c }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001da26fbae, 0x00000001471e2002 }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x00000001b3a8bd88, 0x0000000132d2253a }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x00000000e8f3898e, 0x00000000f26b3592 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x00000000b0d0d28c, 0x00000000bc8b67b0 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x0000000030f2a798, 0x000000013a826ef2 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x000000000fba1002, 0x0000000081482c84 }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000bdb9bd72, 0x00000000e77307c2 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000075d3bf5a, 0x00000000d4a07ec8 }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x00000000ef1f98a0, 0x0000000017102100 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000000689c7602, 0x00000000db406486 }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000016d5fa5fe, 0x0000000192db7f88 }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000001d0d2b9ca, 0x000000018bf67b1e }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x0000000041e7b470, 0x000000007c09163e }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000001cbb6495e, 0x000000000adac060 }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x000000010052a0b0, 0x00000000bd8316ae }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x00000001d8effb5c, 0x000000019f09ab54 }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x00000001d969853c, 0x0000000125155542 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x00000000523ccce2, 0x000000018fdb5882 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x000000001e2436bc, 0x00000000e794b3f4 }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x00000000ddd1c3a2, 0x000000016f9bb022 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x0000000019fcfe38, 0x00000000290c9978 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x00000001ce95db64, 0x0000000083c0f350 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x00000000af582806, 0x0000000173ea6628 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000001006388f6, 0x00000001c8b4e00a }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x0000000179eca00a, 0x00000000de95d6aa }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x0000000122410a6a, 0x000000010b7f7248 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x000000004288e87c, 0x00000001326e3a06 }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x000000016c5490da, 0x00000000bb62c2e6 }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000d1c71f6e, 0x0000000156a4b2c2 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x00000001b4ce08a6, 0x000000011dfe763a }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x00000001466ba60c, 0x000000007bcca8e2 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x00000001f6c488a4, 0x0000000186118faa }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000013bfb0682, 0x0000000111a65a88 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x00000000690e9e54, 0x000000003565e1c4 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x00000000281346b6, 0x000000012ed02a82 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x0000000156464024, 0x00000000c486ecfc }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x000000016063a8dc, 0x0000000001b951b2 }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000116a66362, 0x0000000048143916 }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x000000017e8aa4d2, 0x00000001dc2ae124 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x00000001728eb10c, 0x00000001416c58d6 }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001b08fd7fa, 0x00000000a479744a }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x00000001092a16e8, 0x0000000096ca3a26 }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000000a505637c, 0x00000000ff223d4e }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x00000000d94869b2, 0x000000010e84da42 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000001c8b203ae, 0x00000001b61ba3d0 }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x000000005704aea0, 0x00000000680f2de8 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x000000012e295fa2, 0x000000008772a9a8 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x000000011d0908bc, 0x0000000155f295bc }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x0000000193ed97ea, 0x00000000595f9282 }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x000000013a0f1c52, 0x0000000164b1c25a }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000010c2c40c0, 0x00000000fbd67c50 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000000ff6fac3e, 0x0000000096076268 }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x000000017b3609c0, 0x00000001d288e4cc }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x0000000088c8c922, 0x00000001eaac1bdc }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000001751baae6, 0x00000001f1ea39e2 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x0000000107952972, 0x00000001eb6506fc }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x0000000162b00abe, 0x000000010f806ffe }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000000d7b404c, 0x000000010408481e }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000000763b13d4, 0x0000000188260534 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000000f6dc22d8, 0x0000000058fc73e0 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x000000007daae060, 0x00000000391c59b8 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000013359ab7c, 0x000000018b638400 }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x000000008add438a, 0x000000011738f5c4 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x00000001edbefdea, 0x000000008cf7c6da }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x000000004104e0f8, 0x00000001ef97fb16 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000000b48a8222, 0x0000000102130e20 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000001bcb46844, 0x00000000db968898 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x000000013293ce0a, 0x00000000b5047b5e }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x00000001710d0844, 0x000000010b90fdb2 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x0000000117907f6e, 0x000000004834a32e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000087ddf93e, 0x0000000059c8f2b0 }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x000000005970e9b0, 0x0000000122cec508 }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x0000000185b2b7d0, 0x000000000a330cda }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000001dcee0efc, 0x000000014a47148c }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000030da2722, 0x0000000042c61cb8 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x000000012f925a18, 0x0000000012fe6960 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x00000000dd2e357c, 0x00000000dbda2c20 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x00000000071c80de, 0x000000011122410c }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x000000011513140a, 0x00000000977b2070 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001df876e8e, 0x000000014050438e }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x000000015f81d6ce, 0x0000000147c840e8 }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x000000019dd94dbe, 0x00000001cc7c88ce }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000001373d206e, 0x00000001476b35a4 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000000668ccade, 0x000000013d52d508 }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x00000001b192d268, 0x000000008e4be32e }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000e30f3a78, 0x00000000024120fe }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000010ef1f7bc, 0x00000000ddecddb4 }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000001f5ac7380, 0x00000000d4d403bc }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x000000011822ea70, 0x00000001734b89aa }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x00000000c3a33848, 0x000000010e7a58d6 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x00000001bd151c24, 0x00000001f9f04e9c }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x0000000056002d76, 0x00000000b692225e }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000014657c4f4, 0x000000019b8d3f3e }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x0000000113742d7c, 0x00000001a874f11e }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000019c5920ba, 0x000000010d5a4254 }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x000000005216d2d6, 0x00000000bbb2f5d6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x0000000136f5ad8a, 0x0000000179cc0e36 }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x000000018b07beb6, 0x00000001dca1da4a }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x00000000db1e93b0, 0x00000000feb1a192 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x000000000b96fa3a, 0x00000000d1eeedd6 }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x00000001d9968af0, 0x000000008fad9bb4 }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x000000000e4a77a2, 0x00000001884938e4 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000000508c2ac8, 0x00000001bc2e9bc0 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x0000000021572a80, 0x00000001f9658a68 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000001b859daf2, 0x000000001b9224fc }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x000000016f788474, 0x0000000055b2fb84 }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x00000001b438810e, 0x000000018b090348 }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x0000000095ddc6f2, 0x000000011ccbd5ea }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x00000001d977c20c, 0x0000000007ae47f8 }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x00000000ebedb99a, 0x0000000172acbec0 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001df9e9e92, 0x00000001c6e3ff20 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000001a4a3f952, 0x00000000e1b38744 }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x00000000e2f51220, 0x00000000791585b2 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x000000004aa01f3e, 0x00000000ac53b894 }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000000b3e90a58, 0x00000001ed5f2cf4 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000000c9ca2aa, 0x00000001df48b2e0 }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x0000000151682316, 0x00000000049c1c62 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x0000000036fce78c, 0x000000017c460c12 }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x000000009037dc10, 0x000000015be4da7e }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x00000000d3298582, 0x000000010f38f668 }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x00000001b42e8ad6, 0x0000000039f40a00 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x00000000142a9838, 0x00000000bd4c10c4 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x0000000109c7f190, 0x0000000042db1d98 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x0000000056ff9310, 0x00000001c905bae6 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000001594513aa, 0x00000000069d40ea }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x00000001e3b5b1e8, 0x000000008e4fbad0 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x000000011dd5fc08, 0x0000000047bedd46 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x00000001675f0cc2, 0x0000000026396bf8 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000d1c8dd44, 0x00000000379beb92 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x0000000115ebd3d8, 0x000000000abae54a }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x00000001ecbd0dac, 0x0000000007e6a128 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x00000000cdf67af2, 0x000000000ade29d2 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x000000004c01ff4c, 0x00000000f974c45c }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000000f2d8657e, 0x00000000e77ac60a }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x000000006bae74c4, 0x0000000145895816 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000152af8aa0, 0x0000000038e362be }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x0000000004663802, 0x000000007f991a64 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000001ab2f5afc, 0x00000000fa366d3a }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000074a4ebd4, 0x00000001a2bb34f0 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x00000001d7ab3a4c, 0x0000000028a9981e }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001a8da60c6, 0x00000001dbc672be }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x000000013cf63820, 0x00000000b04d77f6 }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x00000000bec12e1e, 0x0000000124400d96 }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x00000001c6368010, 0x000000014ca4b414 }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x00000001e6e78758, 0x000000012fe2c938 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x000000008d7f2b3c, 0x00000001faed01e6 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x000000016b4a156e, 0x000000007e80ecfe }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x00000001c63cfeb6, 0x0000000098daee94 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000015f902670, 0x000000010a04edea }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001cd5de11e, 0x00000001c00b4524 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000001acaec54, 0x0000000170296550 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x000000002bd0ca78, 0x0000000181afaa48 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x0000000032d63d5c, 0x0000000185a31ffa }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000001c6d4e4c, 0x000000002469f608 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x0000000106a60b92, 0x000000006980102a }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x00000000d3855e12, 0x0000000111ea9ca8 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000000e3125636, 0x00000001bd1d29ce }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x000000009e8f7ea4, 0x00000001b34b9580 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000001c82e562c, 0x000000003076054e }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x00000000ca9f09ce, 0x000000012a608ea4 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000c63764e6, 0x00000000784d05fe }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x0000000168d2e49e, 0x000000016ef0d82a }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x00000000e986c148, 0x0000000075bda454 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x00000000cfb65894, 0x000000003dc0a1c4 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x0000000111cadee4, 0x00000000e9a5d8be }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x0000000171fb63ce, 0x00000001609bc4b4 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + +static const __vector unsigned long long vcrc_short_const[16] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x5cf015c388e56f72, 0x7fec2963e5bf8048 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0x963a18920246e2e6, 0x38e888d4844752a9 }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x419a441956993a31, 0x42316c00730206ad }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0x924752ba2b830011, 0x543d5c543e65ddf9 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x55bd7f9518e4a304, 0x78e87aaf56767c92 }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x6d76739fe0553f1e, 0x8f68fcec1903da7f }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xc133722b1fe0b5c3, 0x3f4840246791d588 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x64b67ee0e55ef1f3, 0x34c96751b04de25a }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0x069db049b8fdb1e7, 0x156c8e180b4a395b }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xa11bfaf3c9e90b9e, 0xe0b99ccbe661f7be }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0x817cdc5119b29a35, 0x041d37768cd75659 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x1ce9d94b36c41f1c, 0x3a0777818cfaa965 }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x4f256efcb82be955, 0x0e148e8252377a55 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0xec1631edb2dea967, 0x9c25531d19e65dde }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0x5d27e147510ac59a, 0x790606ff9957c0a6 }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0xa66805eb18b8ea18, 0x82f63b786ea2d55c } +#else /* __LITTLE_ENDIAN__ */ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x7fec2963e5bf8048, 0x5cf015c388e56f72 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0x38e888d4844752a9, 0x963a18920246e2e6 }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x42316c00730206ad, 0x419a441956993a31 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0x543d5c543e65ddf9, 0x924752ba2b830011 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x78e87aaf56767c92, 0x55bd7f9518e4a304 }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x8f68fcec1903da7f, 0x6d76739fe0553f1e }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0x3f4840246791d588, 0xc133722b1fe0b5c3 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x34c96751b04de25a, 0x64b67ee0e55ef1f3 }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0x156c8e180b4a395b, 0x069db049b8fdb1e7 }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xe0b99ccbe661f7be, 0xa11bfaf3c9e90b9e }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0x041d37768cd75659, 0x817cdc5119b29a35 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x3a0777818cfaa965, 0x1ce9d94b36c41f1c }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x0e148e8252377a55, 0x4f256efcb82be955 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0x9c25531d19e65dde, 0xec1631edb2dea967 }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0x790606ff9957c0a6, 0x5d27e147510ac59a }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0x82f63b786ea2d55c, 0xa66805eb18b8ea18 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Barrett constants */ +/* 33 bit reflected Barrett constant m - (4^32)/n */ + +static const __vector unsigned long long v_Barrett_const[2] + __attribute__((aligned (16))) = { + /* x^64 div p(x) */ +#ifdef __LITTLE_ENDIAN__ + { 0x00000000dea713f1, 0x0000000000000000 }, + { 0x0000000105ec76f1, 0x0000000000000000 } +#else /* __LITTLE_ENDIAN__ */ + { 0x0000000000000000, 0x00000000dea713f1 }, + { 0x0000000000000000, 0x0000000105ec76f1 } +#endif /* __LITTLE_ENDIAN__ */ + }; +#endif /* POWER8_INTRINSICS */ + +#endif /* __ASSEMBLER__ */ diff --git a/contrib/crc32-vpmsum-cmake/vec_crc32.h b/contrib/crc32-vpmsum-cmake/vec_crc32.h new file mode 100644 index 00000000000..0ef13616b34 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h @@ -0,0 +1,29 @@ +#ifndef VEC_CRC32 +#define VEC_CRC32 + + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len); + +static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len) +{ + unsigned char *emptybuffer; + if (!buffer) { + emptybuffer = (unsigned char *)malloc(len); + bzero(emptybuffer, len); + crc = crc32_vpmsum(crc, emptybuffer, len); + free(emptybuffer); + } else { + crc = crc32_vpmsum(crc, buffer, (unsigned long)len); + } + return crc; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8a7dba72ac..1bc1151b90b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -364,6 +364,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::crc32-vpmsum) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum) + endif() + dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables) diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 01758c1b9fb..c7342d061d8 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x) #include #endif +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + #if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ #include @@ -89,6 +93,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x) return __crc32cd(-1U, x); #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return s390x_crc32(-1U, x) +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(-1U, reinterpret_cast(&x), sizeof(x)); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. /// NOTE: consider using intHash32() @@ -103,6 +109,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) return __crc32cd(static_cast(updated_value), x); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32(updated_value, x); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(updated_value, reinterpret_cast(&x), sizeof(x)); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. return intHash64(x) ^ updated_value; diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index e9810e918b4..45543f57b37 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -86,6 +86,10 @@ if (TARGET ch_contrib::rapidjson) list (APPEND PRIVATE_LIBS ch_contrib::rapidjson) endif() +if (TARGET ch_contrib::crc32-vpmsum) + list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum) +endif() + add_subdirectory(GatherUtils) list (APPEND PRIVATE_LIBS clickhouse_functions_gatherutils) diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index 174acebe979..bf0b7463a5d 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -14,6 +14,10 @@ #include +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + namespace DB { @@ -38,6 +42,8 @@ struct Hash return __crc32cd(static_cast(crc), val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -51,6 +57,8 @@ struct Hash return __crc32cw(crc, val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u32(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -64,6 +72,8 @@ struct Hash return __crc32ch(crc, val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u16(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -77,6 +87,8 @@ struct Hash return __crc32cb(crc, val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u8(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 802aafc2042..87aa0f4b3f7 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -24,6 +24,10 @@ # include #endif +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + namespace DB { /** Distance function implementation. @@ -72,6 +76,8 @@ struct NgramDistanceImpl return __crc32cd(code_points[2], combined) & 0xFFFFu; #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return s390x_crc32(code_points[2], combined) & 0xFFFFu; +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(code_points[2], reinterpret_cast(&combined), sizeof(combined)) & 0xFFFFu; #else return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu; #endif diff --git a/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference new file mode 100644 index 00000000000..2acad33320b --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference @@ -0,0 +1,148 @@ +18446744073709551615 +1737075136 +1737075136 +4018781633 +4018781633 +1846985414 +1846985414 +1846985414 +1846985414 +(10693559443859979498,10693559443859979498) +(12279482788274235946,6436413987527322272) +(12279482788274235946,6436413987527322272) +(13257488272755813409,6436413987527322272) +(13257488272755813409,6436413987527322272) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +3023525975 +3040303199 +3023509591 +3023510623 +3040303191 +3040303191 +3023510615 +3023510615 +1999952988 +926211140 +1999699532 +1999683148 +1999952988 +926211140 +1999699532 +1999683148 +(16071125717475221203,9592059329600248798) +(16071125717475221203,1914899959549098907) +(16071125717475221203,7986182634218042944) +(16071125717475221203,7986182634218042944) +(16071125717475221203,9592059329600248798) +(16071125717475221203,1914899959549098907) +(16071125717475221203,7986182634218042944) +(16071125717475221203,7986182634218042944) +(10576877560263640956,4278250516018530743) +(16211512098526494023,11479872370566432466) +(13515070557027359649,17725505493832406849) +(12589381623326290380,575343713614534202) +(10576877560263640956,4278250516018530743) +(16211512098526494023,11479872370566432466) +(13515070557027359649,17725505493832406849) +(12589381623326290380,575343713614534202) +uniqExact 6 +ngramSimHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 1211135069 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1546679389 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2293265501 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3392173149 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054169 +ngramSimHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2291168349 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 3358618717 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3425727581 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054429 +ngramSimHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 1211135069 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1546679389 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2284876893 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3459282013 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3694163037 +ngramSimHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2291168349 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 3358618717 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3425727581 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054429 +wordShingleSimHash +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 10637533 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 171136201 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 209864029 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353165 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353677 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 418595033 +wordShingleSimHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 218252892 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1218592985 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1613919433 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2080524225 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2088912577 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2094163657 +wordShingleSimHashUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 10637533 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 171136201 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 209864029 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353165 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353677 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 418595033 +wordShingleSimHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 218252892 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1218592985 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1613919433 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2080524225 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2088912577 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2094163657 +ngramMinHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashCaseInsensitive +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +wordShingleMinHash +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (3409292695558556998,3242671779450421938) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (11981468198903037199,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (12852656749419794093,678630951345180105) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,410122209669519134) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,3365040177160857031) +wordShingleMinHashCaseInsensitive +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (712181695272576370,125062659592971094) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (3404326999173181417,12067981913120463876) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (13918035273694643957,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,12467125901844798869) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,17567683680214055861) +wordShingleMinHashUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (3409292695558556998,3242671779450421938) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (11981468198903037199,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (12852656749419794093,678630951345180105) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,410122209669519134) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,3365040177160857031) +wordShingleMinHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (712181695272576370,125062659592971094) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (3404326999173181417,12067981913120463876) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (13918035273694643957,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,12467125901844798869) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,17567683680214055861) +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None diff --git a/tests/queries/0_stateless/01016_simhash_minhash.python b/tests/queries/0_stateless/01016_simhash_minhash.python new file mode 100644 index 00000000000..1d6eae456c1 --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.python @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 +import os +import socket +import sys +from scipy import stats +import pandas as pd +import numpy as np +import shutil +import platform + +import uuid + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) +CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + +if platform.machine() == "ppc64le": + shutil.copyfile(CURDIR + "/01016_simhash_minhash.ppc64le.reference", CURDIR + "/01016_simhash_minhash.reference") +elif platform.machine() == "x86_64" : + shutil.copyfile(CURDIR + "/01016_simhash_minhash.x86_64.reference", CURDIR + "/01016_simhash_minhash.reference") + +def writeVarUInt(x, ba): + for _ in range(0, 9): + + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + +def writeStringBinary(s, ba): + b = bytes(s, 'utf-8') + writeVarUInt(len(s), ba) + ba.extend(b) + +def readStrict(s, size = 1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + +def readUInt8(s): + return readUInt(s) + +def readUInt16(s): + return readUInt(s, 2) + +def readUInt32(s): + return readUInt(s, 4) + +def readUInt64(s): + return readUInt(s, 8) + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode('utf-8') + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary('simple native protocol', ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('default', ba) # database + writeStringBinary('default', ba) # user + writeStringBinary('', ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert (p_type == 0) # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + +def serializeClientInfo(ba, query_id): + writeStringBinary('default', ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary('127.0.0.1:9000', ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary('os_user', ba) # os_user + writeStringBinary('client_hostname', ba) # client_hostname + writeStringBinary('client_name', ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('', ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + +def sendQuery(s, query): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + writeStringBinary('', ba) # No settings + writeStringBinary('', ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + +def assertPacket(packet, expected): + assert(packet == expected), packet + +def readException(s): + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + sys.stdout.write("code {}: {}".format(code, text.replace('DB::Exception:', ''))) + + +def test(): + client = ClickHouseClient() + + res = client.query("SELECT ngramSimHash('')") + sys.stdout.write(res) + res=client.query("SELECT ngramSimHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + + res = client.query("SELECT ngramMinHash('')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + + client.query("DROP TABLE IF EXISTS defaults") + client.query("CREATE TABLE defaults(s String) ENGINE = Memory()") + client.query("INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.')") + + res = client.query("SELECT ngramSimHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + + res = client.query("SELECT ngramMinHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + + client.query("TRUNCATE TABLE defaults") + client.query("INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', 'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'))") + + res = client.query("SELECT 'uniqExact', uniqExact(s) FROM defaults") + sys.stdout.write(res) + + res = client.query("SELECT 'ngramSimHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramSimHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramSimHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramSimHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + + res = client.query("SELECT 'ngramMinHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramMinHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramMinHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramMinHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + + wordShingleSimHashInvalidArg1() + + wordShingleSimHashInvalidArg2() + + wordShingleSimHashInvalidArg3() + #client.query("DROP TABLE defaults") + +def wordShingleSimHashInvalidArg1(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "SELECT wordShingleSimHash('foobar', 9223372036854775807)") + + # Fin block + sendEmptyBlock(s) + + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() + + +def wordShingleSimHashInvalidArg2(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "SELECT wordShingleSimHash('foobar', 1001)") + + # Fin block + sendEmptyBlock(s) + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() + + +def wordShingleSimHashInvalidArg3(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "SELECT wordShingleSimHash('foobar', 0)") + + # Fin block + sendEmptyBlock(s) + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() + +if __name__ == "__main__": + test() + #wordShingleSimHashInvalidArg1() diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sh b/tests/queries/0_stateless/01016_simhash_minhash.sh new file mode 100755 index 00000000000..94bac7efacb --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +python3 "$CURDIR"/01016_simhash_minhash.python + diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql deleted file mode 100644 index 1e77b487851..00000000000 --- a/tests/queries/0_stateless/01016_simhash_minhash.sql +++ /dev/null @@ -1,115 +0,0 @@ -SELECT ngramSimHash(''); -SELECT ngramSimHash('what a cute cat.'); -SELECT ngramSimHashCaseInsensitive('what a cute cat.'); -SELECT ngramSimHashUTF8('what a cute cat.'); -SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.'); -SELECT wordShingleSimHash('what a cute cat.'); -SELECT wordShingleSimHashCaseInsensitive('what a cute cat.'); -SELECT wordShingleSimHashUTF8('what a cute cat.'); -SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.'); - -SELECT ngramMinHash(''); -SELECT ngramMinHash('what a cute cat.'); -SELECT ngramMinHashCaseInsensitive('what a cute cat.'); -SELECT ngramMinHashUTF8('what a cute cat.'); -SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.'); -SELECT wordShingleMinHash('what a cute cat.'); -SELECT wordShingleMinHashCaseInsensitive('what a cute cat.'); -SELECT wordShingleMinHashUTF8('what a cute cat.'); -SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.'); - -DROP TABLE IF EXISTS defaults; -CREATE TABLE defaults -( - s String -)ENGINE = Memory(); - -INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.'); - -SELECT ngramSimHash(s) FROM defaults; -SELECT ngramSimHashCaseInsensitive(s) FROM defaults; -SELECT ngramSimHashUTF8(s) FROM defaults; -SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults; -SELECT wordShingleSimHash(s) FROM defaults; -SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults; -SELECT wordShingleSimHashUTF8(s) FROM defaults; -SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults; - -SELECT ngramMinHash(s) FROM defaults; -SELECT ngramMinHashCaseInsensitive(s) FROM defaults; -SELECT ngramMinHashUTF8(s) FROM defaults; -SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults; -SELECT wordShingleMinHash(s) FROM defaults; -SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults; -SELECT wordShingleMinHashUTF8(s) FROM defaults; -SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults; - -TRUNCATE TABLE defaults; -INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', -'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency. -ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes. -ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. - -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability. -ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability. -ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. -ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. -ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. -ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.' -)); - -SELECT 'uniqExact', uniqExact(s) FROM defaults; - - -SELECT 'ngramSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; - -SELECT 'ngramMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; - -SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 } -SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 } -SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 } - -DROP TABLE defaults; diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference similarity index 100% rename from tests/queries/0_stateless/01016_simhash_minhash.reference rename to tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference From 25c94dfa8300520555ef21b5aa8ca76f7a5a574d Mon Sep 17 00:00:00 2001 From: MeenaRenganathan22 Date: Tue, 10 Jan 2023 20:56:16 -0800 Subject: [PATCH 165/262] Changes to support the CRC32 in PowerPC to address the WeakHash collision issue. Update the reference to support the hash values based on the specific platform --- .gitmodules | 3 + contrib/CMakeLists.txt | 1 + contrib/crc32-vpmsum | 1 + contrib/crc32-vpmsum-cmake/CMakeLists.txt | 12 + contrib/crc32-vpmsum-cmake/README.md | 8 + contrib/crc32-vpmsum-cmake/crc32_constants.h | 1206 +++++++++++++++++ contrib/crc32-vpmsum-cmake/vec_crc32.h | 29 + src/CMakeLists.txt | 4 + src/Common/HashTable/Hash.h | 8 + src/Functions/CMakeLists.txt | 4 + src/Functions/FunctionsStringHash.cpp | 12 + src/Functions/FunctionsStringSimilarity.cpp | 6 + .../01016_simhash_minhash.ppc64le.reference | 148 ++ .../0_stateless/01016_simhash_minhash.python | 394 ++++++ .../0_stateless/01016_simhash_minhash.sh | 8 + .../0_stateless/01016_simhash_minhash.sql | 115 -- ...=> 01016_simhash_minhash.x86_64.reference} | 0 17 files changed, 1844 insertions(+), 115 deletions(-) create mode 160000 contrib/crc32-vpmsum create mode 100644 contrib/crc32-vpmsum-cmake/CMakeLists.txt create mode 100644 contrib/crc32-vpmsum-cmake/README.md create mode 100644 contrib/crc32-vpmsum-cmake/crc32_constants.h create mode 100644 contrib/crc32-vpmsum-cmake/vec_crc32.h create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.python create mode 100755 tests/queries/0_stateless/01016_simhash_minhash.sh delete mode 100644 tests/queries/0_stateless/01016_simhash_minhash.sql rename tests/queries/0_stateless/{01016_simhash_minhash.reference => 01016_simhash_minhash.x86_64.reference} (100%) diff --git a/.gitmodules b/.gitmodules index 26824cb57ff..b4673f113b7 100644 --- a/.gitmodules +++ b/.gitmodules @@ -327,3 +327,6 @@ [submodule "contrib/aws-s2n-tls"] path = contrib/aws-s2n-tls url = https://github.com/ClickHouse/s2n-tls +[submodule "contrib/crc32-vpmsum"] + path = contrib/crc32-vpmsum + url = https://github.com/antonblanchard/crc32-vpmsum.git diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 27b4a7ddb5c..f5d1315cc02 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -55,6 +55,7 @@ else () endif () add_contrib (miniselect-cmake miniselect) add_contrib (pdqsort-cmake pdqsort) +add_contrib (crc32-vpmsum-cmake crc32-vpmsum) add_contrib (sparsehash-c11-cmake sparsehash-c11) add_contrib (abseil-cpp-cmake abseil-cpp) add_contrib (magic-enum-cmake magic_enum) diff --git a/contrib/crc32-vpmsum b/contrib/crc32-vpmsum new file mode 160000 index 00000000000..45215543938 --- /dev/null +++ b/contrib/crc32-vpmsum @@ -0,0 +1 @@ +Subproject commit 452155439389311fc7d143621eaf56a258e02476 diff --git a/contrib/crc32-vpmsum-cmake/CMakeLists.txt b/contrib/crc32-vpmsum-cmake/CMakeLists.txt new file mode 100644 index 00000000000..bb7d5618410 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/CMakeLists.txt @@ -0,0 +1,12 @@ +if (NOT ARCH_PPC64LE) + message(STATUS "crc32-vpmsum library is only supported on ppc64le") + return() +endif() + +SET(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/crc32-vpmsum") + +add_library(_crc32-vpmsum + "${LIBRARY_DIR}/vec_crc32.c" + ) +target_include_directories(_crc32-vpmsum SYSTEM BEFORE PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}") +add_library(ch_contrib::crc32-vpmsum ALIAS _crc32-vpmsum) diff --git a/contrib/crc32-vpmsum-cmake/README.md b/contrib/crc32-vpmsum-cmake/README.md new file mode 100644 index 00000000000..9ea8133e331 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/README.md @@ -0,0 +1,8 @@ +# To Generate crc32_constants.h + +- Run make file in `../crc32-vpmsum` diretory using folling options and CRC polynomial. These options should use the same polynomial and order used by intel intrinisic functions +```bash +make crc32_constants.h CRC="0x11EDC6F41" OPTIONS="-x -r -c" +``` +- move the generated `crc32_constants.h` into this directory +- To understand more about this go here: https://masterchef2209.wordpress.com/2020/06/17/guide-to-intel-sse4-2-crc-intrinisics-implementation-for-simde/ diff --git a/contrib/crc32-vpmsum-cmake/crc32_constants.h b/contrib/crc32-vpmsum-cmake/crc32_constants.h new file mode 100644 index 00000000000..aea525c9038 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/crc32_constants.h @@ -0,0 +1,1206 @@ +/* +* +* THIS FILE IS GENERATED WITH +./crc32_constants -x -r -c 0x11EDC6F41 + +* This is from https://github.com/antonblanchard/crc32-vpmsum/ +* DO NOT MODIFY IT MANUALLY! +* +*/ + +#define CRC 0x1edc6f41 +#define CRC_XOR +#define REFLECT +#define MAX_SIZE 32768 + +#ifndef __ASSEMBLER__ +#ifdef CRC_TABLE +static const unsigned int crc_table[] = { + 0x00000000, 0xf26b8303, 0xe13b70f7, 0x1350f3f4, + 0xc79a971f, 0x35f1141c, 0x26a1e7e8, 0xd4ca64eb, + 0x8ad958cf, 0x78b2dbcc, 0x6be22838, 0x9989ab3b, + 0x4d43cfd0, 0xbf284cd3, 0xac78bf27, 0x5e133c24, + 0x105ec76f, 0xe235446c, 0xf165b798, 0x030e349b, + 0xd7c45070, 0x25afd373, 0x36ff2087, 0xc494a384, + 0x9a879fa0, 0x68ec1ca3, 0x7bbcef57, 0x89d76c54, + 0x5d1d08bf, 0xaf768bbc, 0xbc267848, 0x4e4dfb4b, + 0x20bd8ede, 0xd2d60ddd, 0xc186fe29, 0x33ed7d2a, + 0xe72719c1, 0x154c9ac2, 0x061c6936, 0xf477ea35, + 0xaa64d611, 0x580f5512, 0x4b5fa6e6, 0xb93425e5, + 0x6dfe410e, 0x9f95c20d, 0x8cc531f9, 0x7eaeb2fa, + 0x30e349b1, 0xc288cab2, 0xd1d83946, 0x23b3ba45, + 0xf779deae, 0x05125dad, 0x1642ae59, 0xe4292d5a, + 0xba3a117e, 0x4851927d, 0x5b016189, 0xa96ae28a, + 0x7da08661, 0x8fcb0562, 0x9c9bf696, 0x6ef07595, + 0x417b1dbc, 0xb3109ebf, 0xa0406d4b, 0x522bee48, + 0x86e18aa3, 0x748a09a0, 0x67dafa54, 0x95b17957, + 0xcba24573, 0x39c9c670, 0x2a993584, 0xd8f2b687, + 0x0c38d26c, 0xfe53516f, 0xed03a29b, 0x1f682198, + 0x5125dad3, 0xa34e59d0, 0xb01eaa24, 0x42752927, + 0x96bf4dcc, 0x64d4cecf, 0x77843d3b, 0x85efbe38, + 0xdbfc821c, 0x2997011f, 0x3ac7f2eb, 0xc8ac71e8, + 0x1c661503, 0xee0d9600, 0xfd5d65f4, 0x0f36e6f7, + 0x61c69362, 0x93ad1061, 0x80fde395, 0x72966096, + 0xa65c047d, 0x5437877e, 0x4767748a, 0xb50cf789, + 0xeb1fcbad, 0x197448ae, 0x0a24bb5a, 0xf84f3859, + 0x2c855cb2, 0xdeeedfb1, 0xcdbe2c45, 0x3fd5af46, + 0x7198540d, 0x83f3d70e, 0x90a324fa, 0x62c8a7f9, + 0xb602c312, 0x44694011, 0x5739b3e5, 0xa55230e6, + 0xfb410cc2, 0x092a8fc1, 0x1a7a7c35, 0xe811ff36, + 0x3cdb9bdd, 0xceb018de, 0xdde0eb2a, 0x2f8b6829, + 0x82f63b78, 0x709db87b, 0x63cd4b8f, 0x91a6c88c, + 0x456cac67, 0xb7072f64, 0xa457dc90, 0x563c5f93, + 0x082f63b7, 0xfa44e0b4, 0xe9141340, 0x1b7f9043, + 0xcfb5f4a8, 0x3dde77ab, 0x2e8e845f, 0xdce5075c, + 0x92a8fc17, 0x60c37f14, 0x73938ce0, 0x81f80fe3, + 0x55326b08, 0xa759e80b, 0xb4091bff, 0x466298fc, + 0x1871a4d8, 0xea1a27db, 0xf94ad42f, 0x0b21572c, + 0xdfeb33c7, 0x2d80b0c4, 0x3ed04330, 0xccbbc033, + 0xa24bb5a6, 0x502036a5, 0x4370c551, 0xb11b4652, + 0x65d122b9, 0x97baa1ba, 0x84ea524e, 0x7681d14d, + 0x2892ed69, 0xdaf96e6a, 0xc9a99d9e, 0x3bc21e9d, + 0xef087a76, 0x1d63f975, 0x0e330a81, 0xfc588982, + 0xb21572c9, 0x407ef1ca, 0x532e023e, 0xa145813d, + 0x758fe5d6, 0x87e466d5, 0x94b49521, 0x66df1622, + 0x38cc2a06, 0xcaa7a905, 0xd9f75af1, 0x2b9cd9f2, + 0xff56bd19, 0x0d3d3e1a, 0x1e6dcdee, 0xec064eed, + 0xc38d26c4, 0x31e6a5c7, 0x22b65633, 0xd0ddd530, + 0x0417b1db, 0xf67c32d8, 0xe52cc12c, 0x1747422f, + 0x49547e0b, 0xbb3ffd08, 0xa86f0efc, 0x5a048dff, + 0x8ecee914, 0x7ca56a17, 0x6ff599e3, 0x9d9e1ae0, + 0xd3d3e1ab, 0x21b862a8, 0x32e8915c, 0xc083125f, + 0x144976b4, 0xe622f5b7, 0xf5720643, 0x07198540, + 0x590ab964, 0xab613a67, 0xb831c993, 0x4a5a4a90, + 0x9e902e7b, 0x6cfbad78, 0x7fab5e8c, 0x8dc0dd8f, + 0xe330a81a, 0x115b2b19, 0x020bd8ed, 0xf0605bee, + 0x24aa3f05, 0xd6c1bc06, 0xc5914ff2, 0x37faccf1, + 0x69e9f0d5, 0x9b8273d6, 0x88d28022, 0x7ab90321, + 0xae7367ca, 0x5c18e4c9, 0x4f48173d, 0xbd23943e, + 0xf36e6f75, 0x0105ec76, 0x12551f82, 0xe03e9c81, + 0x34f4f86a, 0xc69f7b69, 0xd5cf889d, 0x27a40b9e, + 0x79b737ba, 0x8bdcb4b9, 0x988c474d, 0x6ae7c44e, + 0xbe2da0a5, 0x4c4623a6, 0x5f16d052, 0xad7d5351,}; + +#endif /* CRC_TABLE */ +#ifdef POWER8_INTRINSICS + +/* Constants */ + +/* Reduce 262144 kbits to 1024 bits */ +static const __vector unsigned long long vcrc_const[255] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x000000009c37c408, 0x00000000b6ca9e20 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000001b51df26c, 0x00000000350249a8 }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x000000000724b9d0, 0x00000001862dac54 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001c00532fe, 0x00000001d87fb48c }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000000f05a9362, 0x00000001f39b699e }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x00000001e1007970, 0x0000000101da11b4 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000000a57366ee, 0x00000001cab571e0 }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x0000000192011284, 0x00000000c7020cfe }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x0000000162716d9a, 0x00000000cdaed1ae }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x00000000cd97ecde, 0x00000001e804effc }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x0000000058812bc0, 0x0000000077c3ea3a }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x0000000088b8c12e, 0x0000000068df31b4 }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000001230b234c, 0x00000000b059b6c2 }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x00000001120b416e, 0x0000000145fb8ed8 }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x00000001974aecb0, 0x00000000cbc09168 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000008ee3f226, 0x000000005ceeedc2 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x00000001089aba9a, 0x0000000047d74e86 }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x0000000065113872, 0x00000001407e9e22 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x000000005c07ec10, 0x00000001da967bda }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x0000000187590924, 0x000000006c898368 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000000e35da7c6, 0x00000000f2d14c98 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x000000000415855a, 0x00000001993c6ad4 }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x0000000073617758, 0x000000014683d1ac }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x0000000176021d28, 0x00000001a7c93e6c }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x00000001c358fd0a, 0x000000010211e90a }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x00000001ff7a2c18, 0x000000001119403e }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x00000000f2d9f7e4, 0x000000001c3261aa }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x000000016cf1f9c8, 0x000000014e37a634 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x000000010af9279a, 0x0000000073786c0c }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x0000000004f101e8, 0x000000011dc037f8 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000070bcf184, 0x0000000031433dfc }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000000a8de642, 0x000000009cde8348 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000062ea130c, 0x0000000038d3c2a6 }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x00000001eb31cbb2, 0x000000011b25f260 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x0000000170783448, 0x000000001629e6f0 }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x00000001a684b4c6, 0x0000000160838b4c }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x00000000253ca5b4, 0x000000007a44011c }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x0000000057b4b1e2, 0x00000000226f417a }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x00000000b6bd084c, 0x0000000045eb2eb4 }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x0000000123c2d592, 0x000000014459d70c }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000000159dafce, 0x00000001d406ed82 }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000127e1a64e, 0x0000000160c8e1a8 }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x0000000056860754, 0x0000000027ba8098 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x00000001e661aae8, 0x000000006d92d018 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x00000000f82c6166, 0x000000012ed7e3f2 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x00000000c4f9c7ae, 0x000000002dc87788 }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000074203d20, 0x0000000018240bb8 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x0000000198173052, 0x000000001ad38158 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001ce8aba54, 0x00000001396b78f2 }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x00000001850d5d94, 0x000000011a681334 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x00000001d609239c, 0x000000012104732e }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x000000001595f048, 0x00000000a140d90c }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x0000000042ccee08, 0x00000001b7215eda }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x000000010a389d74, 0x00000001aaf1df3c }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x000000012a840da6, 0x0000000029d15b8a }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x000000001d181c0c, 0x00000000f1a96922 }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x0000000068b7d1f6, 0x00000001ac80d03c }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x000000005b0f14fc, 0x000000000f11d56a }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x0000000179e9e730, 0x00000001f1c022a2 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x00000001ce1368d6, 0x0000000173d00ae2 }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x0000000112c3a84c, 0x00000001d4ffe4ac }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x00000000de940fee, 0x000000016edc5ae4 }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000000fe896b7e, 0x00000001f1a02140 }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000001f797431c, 0x00000000ca0b28a0 }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x0000000053e989ba, 0x00000001928e30a2 }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x000000003920cd16, 0x0000000097b1b002 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000001e6f579b8, 0x00000000b15bf906 }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x000000007493cb0a, 0x00000000411c5d52 }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x00000001bdd376d8, 0x00000001c36f3300 }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x000000016badfee6, 0x00000001119227e0 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x0000000071de5c58, 0x00000000114d4702 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000000453f317c, 0x00000000458b5b98 }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x0000000121675cce, 0x000000012e31fb8e }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x00000001f409ee92, 0x000000005cf619d8 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x00000000f36b9c88, 0x0000000063f4d8b2 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x0000000036b398f4, 0x000000004138dc8a }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001748f9adc, 0x00000001d29ee8e0 }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x00000001be94ec00, 0x000000006a08ace8 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x00000000b74370d6, 0x0000000127d42010 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x00000001174d0b98, 0x0000000019d76b62 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000000befc06a4, 0x00000001b1471f6e }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x00000001ae125288, 0x00000001f64c19cc }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x0000000095c19b34, 0x00000000003c0ea0 }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x00000001a78496f2, 0x000000014d73abf6 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x00000001ac5390a0, 0x00000001620eb844 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x000000002a80ed6e, 0x0000000147655048 }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x00000001fa9b0128, 0x0000000067b5077e }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x00000001ea94929e, 0x0000000010ffe206 }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x0000000125f4305c, 0x000000000fee8f1e }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001471e2002, 0x00000001da26fbae }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x0000000132d2253a, 0x00000001b3a8bd88 }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x00000000f26b3592, 0x00000000e8f3898e }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x00000000bc8b67b0, 0x00000000b0d0d28c }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x000000013a826ef2, 0x0000000030f2a798 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x0000000081482c84, 0x000000000fba1002 }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000e77307c2, 0x00000000bdb9bd72 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x00000000d4a07ec8, 0x0000000075d3bf5a }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x0000000017102100, 0x00000000ef1f98a0 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000000db406486, 0x00000000689c7602 }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x0000000192db7f88, 0x000000016d5fa5fe }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x000000018bf67b1e, 0x00000001d0d2b9ca }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x000000007c09163e, 0x0000000041e7b470 }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x000000000adac060, 0x00000001cbb6495e }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x00000000bd8316ae, 0x000000010052a0b0 }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x000000019f09ab54, 0x00000001d8effb5c }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x0000000125155542, 0x00000001d969853c }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x000000018fdb5882, 0x00000000523ccce2 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x00000000e794b3f4, 0x000000001e2436bc }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x000000016f9bb022, 0x00000000ddd1c3a2 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x00000000290c9978, 0x0000000019fcfe38 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x0000000083c0f350, 0x00000001ce95db64 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x0000000173ea6628, 0x00000000af582806 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000001c8b4e00a, 0x00000001006388f6 }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x00000000de95d6aa, 0x0000000179eca00a }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x000000010b7f7248, 0x0000000122410a6a }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x00000001326e3a06, 0x000000004288e87c }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x00000000bb62c2e6, 0x000000016c5490da }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x0000000156a4b2c2, 0x00000000d1c71f6e }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x000000011dfe763a, 0x00000001b4ce08a6 }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x000000007bcca8e2, 0x00000001466ba60c }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x0000000186118faa, 0x00000001f6c488a4 }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x0000000111a65a88, 0x000000013bfb0682 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x000000003565e1c4, 0x00000000690e9e54 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x000000012ed02a82, 0x00000000281346b6 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x00000000c486ecfc, 0x0000000156464024 }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x0000000001b951b2, 0x000000016063a8dc }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000048143916, 0x0000000116a66362 }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x00000001dc2ae124, 0x000000017e8aa4d2 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x00000001416c58d6, 0x00000001728eb10c }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000000a479744a, 0x00000001b08fd7fa }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x0000000096ca3a26, 0x00000001092a16e8 }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000000ff223d4e, 0x00000000a505637c }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x000000010e84da42, 0x00000000d94869b2 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000001b61ba3d0, 0x00000001c8b203ae }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x00000000680f2de8, 0x000000005704aea0 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x000000008772a9a8, 0x000000012e295fa2 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x0000000155f295bc, 0x000000011d0908bc }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x00000000595f9282, 0x0000000193ed97ea }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x0000000164b1c25a, 0x000000013a0f1c52 }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x00000000fbd67c50, 0x000000010c2c40c0 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x0000000096076268, 0x00000000ff6fac3e }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x00000001d288e4cc, 0x000000017b3609c0 }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x00000001eaac1bdc, 0x0000000088c8c922 }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000001f1ea39e2, 0x00000001751baae6 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x00000001eb6506fc, 0x0000000107952972 }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x000000010f806ffe, 0x0000000162b00abe }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000010408481e, 0x000000000d7b404c }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x0000000188260534, 0x00000000763b13d4 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x0000000058fc73e0, 0x00000000f6dc22d8 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x00000000391c59b8, 0x000000007daae060 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000018b638400, 0x000000013359ab7c }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x000000011738f5c4, 0x000000008add438a }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x000000008cf7c6da, 0x00000001edbefdea }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x00000001ef97fb16, 0x000000004104e0f8 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x0000000102130e20, 0x00000000b48a8222 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000000db968898, 0x00000001bcb46844 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x00000000b5047b5e, 0x000000013293ce0a }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x000000010b90fdb2, 0x00000001710d0844 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x000000004834a32e, 0x0000000117907f6e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000059c8f2b0, 0x0000000087ddf93e }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x0000000122cec508, 0x000000005970e9b0 }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x000000000a330cda, 0x0000000185b2b7d0 }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x000000014a47148c, 0x00000001dcee0efc }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000042c61cb8, 0x0000000030da2722 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x0000000012fe6960, 0x000000012f925a18 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x00000000dbda2c20, 0x00000000dd2e357c }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x000000011122410c, 0x00000000071c80de }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x00000000977b2070, 0x000000011513140a }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x000000014050438e, 0x00000001df876e8e }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x0000000147c840e8, 0x000000015f81d6ce }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x00000001cc7c88ce, 0x000000019dd94dbe }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000001476b35a4, 0x00000001373d206e }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x000000013d52d508, 0x00000000668ccade }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x000000008e4be32e, 0x00000001b192d268 }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000024120fe, 0x00000000e30f3a78 }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x00000000ddecddb4, 0x000000010ef1f7bc }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000000d4d403bc, 0x00000001f5ac7380 }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x00000001734b89aa, 0x000000011822ea70 }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x000000010e7a58d6, 0x00000000c3a33848 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x00000001f9f04e9c, 0x00000001bd151c24 }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x00000000b692225e, 0x0000000056002d76 }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000019b8d3f3e, 0x000000014657c4f4 }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x00000001a874f11e, 0x0000000113742d7c }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000010d5a4254, 0x000000019c5920ba }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x00000000bbb2f5d6, 0x000000005216d2d6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x0000000179cc0e36, 0x0000000136f5ad8a }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x00000001dca1da4a, 0x000000018b07beb6 }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x00000000feb1a192, 0x00000000db1e93b0 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x00000000d1eeedd6, 0x000000000b96fa3a }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x000000008fad9bb4, 0x00000001d9968af0 }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x00000001884938e4, 0x000000000e4a77a2 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000001bc2e9bc0, 0x00000000508c2ac8 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x00000001f9658a68, 0x0000000021572a80 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x000000001b9224fc, 0x00000001b859daf2 }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x0000000055b2fb84, 0x000000016f788474 }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x000000018b090348, 0x00000001b438810e }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x000000011ccbd5ea, 0x0000000095ddc6f2 }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x0000000007ae47f8, 0x00000001d977c20c }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x0000000172acbec0, 0x00000000ebedb99a }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001c6e3ff20, 0x00000001df9e9e92 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000000e1b38744, 0x00000001a4a3f952 }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x00000000791585b2, 0x00000000e2f51220 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x00000000ac53b894, 0x000000004aa01f3e }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000001ed5f2cf4, 0x00000000b3e90a58 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x00000001df48b2e0, 0x000000000c9ca2aa }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x00000000049c1c62, 0x0000000151682316 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x000000017c460c12, 0x0000000036fce78c }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x000000015be4da7e, 0x000000009037dc10 }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x000000010f38f668, 0x00000000d3298582 }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x0000000039f40a00, 0x00000001b42e8ad6 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x00000000bd4c10c4, 0x00000000142a9838 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x0000000042db1d98, 0x0000000109c7f190 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x00000001c905bae6, 0x0000000056ff9310 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000000069d40ea, 0x00000001594513aa }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x000000008e4fbad0, 0x00000001e3b5b1e8 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x0000000047bedd46, 0x000000011dd5fc08 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x0000000026396bf8, 0x00000001675f0cc2 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000379beb92, 0x00000000d1c8dd44 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x000000000abae54a, 0x0000000115ebd3d8 }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x0000000007e6a128, 0x00000001ecbd0dac }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x000000000ade29d2, 0x00000000cdf67af2 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x00000000f974c45c, 0x000000004c01ff4c }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000000e77ac60a, 0x00000000f2d8657e }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x0000000145895816, 0x000000006bae74c4 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000038e362be, 0x0000000152af8aa0 }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x000000007f991a64, 0x0000000004663802 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000000fa366d3a, 0x00000001ab2f5afc }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x00000001a2bb34f0, 0x0000000074a4ebd4 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x0000000028a9981e, 0x00000001d7ab3a4c }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001dbc672be, 0x00000001a8da60c6 }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x00000000b04d77f6, 0x000000013cf63820 }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x0000000124400d96, 0x00000000bec12e1e }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x000000014ca4b414, 0x00000001c6368010 }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x000000012fe2c938, 0x00000001e6e78758 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x00000001faed01e6, 0x000000008d7f2b3c }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x000000007e80ecfe, 0x000000016b4a156e }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x0000000098daee94, 0x00000001c63cfeb6 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000010a04edea, 0x000000015f902670 }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001c00b4524, 0x00000001cd5de11e }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x0000000170296550, 0x000000001acaec54 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x0000000181afaa48, 0x000000002bd0ca78 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x0000000185a31ffa, 0x0000000032d63d5c }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000002469f608, 0x000000001c6d4e4c }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x000000006980102a, 0x0000000106a60b92 }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x0000000111ea9ca8, 0x00000000d3855e12 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000001bd1d29ce, 0x00000000e3125636 }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x00000001b34b9580, 0x000000009e8f7ea4 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x000000003076054e, 0x00000001c82e562c }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x000000012a608ea4, 0x00000000ca9f09ce }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000784d05fe, 0x00000000c63764e6 }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x000000016ef0d82a, 0x0000000168d2e49e }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x0000000075bda454, 0x00000000e986c148 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x000000003dc0a1c4, 0x00000000cfb65894 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x00000000e9a5d8be, 0x0000000111cadee4 }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x00000001609bc4b4, 0x0000000171fb63ce } +#else /* __LITTLE_ENDIAN__ */ + /* x^261120 mod p(x)` << 1, x^261184 mod p(x)` << 1 */ + { 0x00000000b6ca9e20, 0x000000009c37c408 }, + /* x^260096 mod p(x)` << 1, x^260160 mod p(x)` << 1 */ + { 0x00000000350249a8, 0x00000001b51df26c }, + /* x^259072 mod p(x)` << 1, x^259136 mod p(x)` << 1 */ + { 0x00000001862dac54, 0x000000000724b9d0 }, + /* x^258048 mod p(x)` << 1, x^258112 mod p(x)` << 1 */ + { 0x00000001d87fb48c, 0x00000001c00532fe }, + /* x^257024 mod p(x)` << 1, x^257088 mod p(x)` << 1 */ + { 0x00000001f39b699e, 0x00000000f05a9362 }, + /* x^256000 mod p(x)` << 1, x^256064 mod p(x)` << 1 */ + { 0x0000000101da11b4, 0x00000001e1007970 }, + /* x^254976 mod p(x)` << 1, x^255040 mod p(x)` << 1 */ + { 0x00000001cab571e0, 0x00000000a57366ee }, + /* x^253952 mod p(x)` << 1, x^254016 mod p(x)` << 1 */ + { 0x00000000c7020cfe, 0x0000000192011284 }, + /* x^252928 mod p(x)` << 1, x^252992 mod p(x)` << 1 */ + { 0x00000000cdaed1ae, 0x0000000162716d9a }, + /* x^251904 mod p(x)` << 1, x^251968 mod p(x)` << 1 */ + { 0x00000001e804effc, 0x00000000cd97ecde }, + /* x^250880 mod p(x)` << 1, x^250944 mod p(x)` << 1 */ + { 0x0000000077c3ea3a, 0x0000000058812bc0 }, + /* x^249856 mod p(x)` << 1, x^249920 mod p(x)` << 1 */ + { 0x0000000068df31b4, 0x0000000088b8c12e }, + /* x^248832 mod p(x)` << 1, x^248896 mod p(x)` << 1 */ + { 0x00000000b059b6c2, 0x00000001230b234c }, + /* x^247808 mod p(x)` << 1, x^247872 mod p(x)` << 1 */ + { 0x0000000145fb8ed8, 0x00000001120b416e }, + /* x^246784 mod p(x)` << 1, x^246848 mod p(x)` << 1 */ + { 0x00000000cbc09168, 0x00000001974aecb0 }, + /* x^245760 mod p(x)` << 1, x^245824 mod p(x)` << 1 */ + { 0x000000005ceeedc2, 0x000000008ee3f226 }, + /* x^244736 mod p(x)` << 1, x^244800 mod p(x)` << 1 */ + { 0x0000000047d74e86, 0x00000001089aba9a }, + /* x^243712 mod p(x)` << 1, x^243776 mod p(x)` << 1 */ + { 0x00000001407e9e22, 0x0000000065113872 }, + /* x^242688 mod p(x)` << 1, x^242752 mod p(x)` << 1 */ + { 0x00000001da967bda, 0x000000005c07ec10 }, + /* x^241664 mod p(x)` << 1, x^241728 mod p(x)` << 1 */ + { 0x000000006c898368, 0x0000000187590924 }, + /* x^240640 mod p(x)` << 1, x^240704 mod p(x)` << 1 */ + { 0x00000000f2d14c98, 0x00000000e35da7c6 }, + /* x^239616 mod p(x)` << 1, x^239680 mod p(x)` << 1 */ + { 0x00000001993c6ad4, 0x000000000415855a }, + /* x^238592 mod p(x)` << 1, x^238656 mod p(x)` << 1 */ + { 0x000000014683d1ac, 0x0000000073617758 }, + /* x^237568 mod p(x)` << 1, x^237632 mod p(x)` << 1 */ + { 0x00000001a7c93e6c, 0x0000000176021d28 }, + /* x^236544 mod p(x)` << 1, x^236608 mod p(x)` << 1 */ + { 0x000000010211e90a, 0x00000001c358fd0a }, + /* x^235520 mod p(x)` << 1, x^235584 mod p(x)` << 1 */ + { 0x000000001119403e, 0x00000001ff7a2c18 }, + /* x^234496 mod p(x)` << 1, x^234560 mod p(x)` << 1 */ + { 0x000000001c3261aa, 0x00000000f2d9f7e4 }, + /* x^233472 mod p(x)` << 1, x^233536 mod p(x)` << 1 */ + { 0x000000014e37a634, 0x000000016cf1f9c8 }, + /* x^232448 mod p(x)` << 1, x^232512 mod p(x)` << 1 */ + { 0x0000000073786c0c, 0x000000010af9279a }, + /* x^231424 mod p(x)` << 1, x^231488 mod p(x)` << 1 */ + { 0x000000011dc037f8, 0x0000000004f101e8 }, + /* x^230400 mod p(x)` << 1, x^230464 mod p(x)` << 1 */ + { 0x0000000031433dfc, 0x0000000070bcf184 }, + /* x^229376 mod p(x)` << 1, x^229440 mod p(x)` << 1 */ + { 0x000000009cde8348, 0x000000000a8de642 }, + /* x^228352 mod p(x)` << 1, x^228416 mod p(x)` << 1 */ + { 0x0000000038d3c2a6, 0x0000000062ea130c }, + /* x^227328 mod p(x)` << 1, x^227392 mod p(x)` << 1 */ + { 0x000000011b25f260, 0x00000001eb31cbb2 }, + /* x^226304 mod p(x)` << 1, x^226368 mod p(x)` << 1 */ + { 0x000000001629e6f0, 0x0000000170783448 }, + /* x^225280 mod p(x)` << 1, x^225344 mod p(x)` << 1 */ + { 0x0000000160838b4c, 0x00000001a684b4c6 }, + /* x^224256 mod p(x)` << 1, x^224320 mod p(x)` << 1 */ + { 0x000000007a44011c, 0x00000000253ca5b4 }, + /* x^223232 mod p(x)` << 1, x^223296 mod p(x)` << 1 */ + { 0x00000000226f417a, 0x0000000057b4b1e2 }, + /* x^222208 mod p(x)` << 1, x^222272 mod p(x)` << 1 */ + { 0x0000000045eb2eb4, 0x00000000b6bd084c }, + /* x^221184 mod p(x)` << 1, x^221248 mod p(x)` << 1 */ + { 0x000000014459d70c, 0x0000000123c2d592 }, + /* x^220160 mod p(x)` << 1, x^220224 mod p(x)` << 1 */ + { 0x00000001d406ed82, 0x00000000159dafce }, + /* x^219136 mod p(x)` << 1, x^219200 mod p(x)` << 1 */ + { 0x0000000160c8e1a8, 0x0000000127e1a64e }, + /* x^218112 mod p(x)` << 1, x^218176 mod p(x)` << 1 */ + { 0x0000000027ba8098, 0x0000000056860754 }, + /* x^217088 mod p(x)` << 1, x^217152 mod p(x)` << 1 */ + { 0x000000006d92d018, 0x00000001e661aae8 }, + /* x^216064 mod p(x)` << 1, x^216128 mod p(x)` << 1 */ + { 0x000000012ed7e3f2, 0x00000000f82c6166 }, + /* x^215040 mod p(x)` << 1, x^215104 mod p(x)` << 1 */ + { 0x000000002dc87788, 0x00000000c4f9c7ae }, + /* x^214016 mod p(x)` << 1, x^214080 mod p(x)` << 1 */ + { 0x0000000018240bb8, 0x0000000074203d20 }, + /* x^212992 mod p(x)` << 1, x^213056 mod p(x)` << 1 */ + { 0x000000001ad38158, 0x0000000198173052 }, + /* x^211968 mod p(x)` << 1, x^212032 mod p(x)` << 1 */ + { 0x00000001396b78f2, 0x00000001ce8aba54 }, + /* x^210944 mod p(x)` << 1, x^211008 mod p(x)` << 1 */ + { 0x000000011a681334, 0x00000001850d5d94 }, + /* x^209920 mod p(x)` << 1, x^209984 mod p(x)` << 1 */ + { 0x000000012104732e, 0x00000001d609239c }, + /* x^208896 mod p(x)` << 1, x^208960 mod p(x)` << 1 */ + { 0x00000000a140d90c, 0x000000001595f048 }, + /* x^207872 mod p(x)` << 1, x^207936 mod p(x)` << 1 */ + { 0x00000001b7215eda, 0x0000000042ccee08 }, + /* x^206848 mod p(x)` << 1, x^206912 mod p(x)` << 1 */ + { 0x00000001aaf1df3c, 0x000000010a389d74 }, + /* x^205824 mod p(x)` << 1, x^205888 mod p(x)` << 1 */ + { 0x0000000029d15b8a, 0x000000012a840da6 }, + /* x^204800 mod p(x)` << 1, x^204864 mod p(x)` << 1 */ + { 0x00000000f1a96922, 0x000000001d181c0c }, + /* x^203776 mod p(x)` << 1, x^203840 mod p(x)` << 1 */ + { 0x00000001ac80d03c, 0x0000000068b7d1f6 }, + /* x^202752 mod p(x)` << 1, x^202816 mod p(x)` << 1 */ + { 0x000000000f11d56a, 0x000000005b0f14fc }, + /* x^201728 mod p(x)` << 1, x^201792 mod p(x)` << 1 */ + { 0x00000001f1c022a2, 0x0000000179e9e730 }, + /* x^200704 mod p(x)` << 1, x^200768 mod p(x)` << 1 */ + { 0x0000000173d00ae2, 0x00000001ce1368d6 }, + /* x^199680 mod p(x)` << 1, x^199744 mod p(x)` << 1 */ + { 0x00000001d4ffe4ac, 0x0000000112c3a84c }, + /* x^198656 mod p(x)` << 1, x^198720 mod p(x)` << 1 */ + { 0x000000016edc5ae4, 0x00000000de940fee }, + /* x^197632 mod p(x)` << 1, x^197696 mod p(x)` << 1 */ + { 0x00000001f1a02140, 0x00000000fe896b7e }, + /* x^196608 mod p(x)` << 1, x^196672 mod p(x)` << 1 */ + { 0x00000000ca0b28a0, 0x00000001f797431c }, + /* x^195584 mod p(x)` << 1, x^195648 mod p(x)` << 1 */ + { 0x00000001928e30a2, 0x0000000053e989ba }, + /* x^194560 mod p(x)` << 1, x^194624 mod p(x)` << 1 */ + { 0x0000000097b1b002, 0x000000003920cd16 }, + /* x^193536 mod p(x)` << 1, x^193600 mod p(x)` << 1 */ + { 0x00000000b15bf906, 0x00000001e6f579b8 }, + /* x^192512 mod p(x)` << 1, x^192576 mod p(x)` << 1 */ + { 0x00000000411c5d52, 0x000000007493cb0a }, + /* x^191488 mod p(x)` << 1, x^191552 mod p(x)` << 1 */ + { 0x00000001c36f3300, 0x00000001bdd376d8 }, + /* x^190464 mod p(x)` << 1, x^190528 mod p(x)` << 1 */ + { 0x00000001119227e0, 0x000000016badfee6 }, + /* x^189440 mod p(x)` << 1, x^189504 mod p(x)` << 1 */ + { 0x00000000114d4702, 0x0000000071de5c58 }, + /* x^188416 mod p(x)` << 1, x^188480 mod p(x)` << 1 */ + { 0x00000000458b5b98, 0x00000000453f317c }, + /* x^187392 mod p(x)` << 1, x^187456 mod p(x)` << 1 */ + { 0x000000012e31fb8e, 0x0000000121675cce }, + /* x^186368 mod p(x)` << 1, x^186432 mod p(x)` << 1 */ + { 0x000000005cf619d8, 0x00000001f409ee92 }, + /* x^185344 mod p(x)` << 1, x^185408 mod p(x)` << 1 */ + { 0x0000000063f4d8b2, 0x00000000f36b9c88 }, + /* x^184320 mod p(x)` << 1, x^184384 mod p(x)` << 1 */ + { 0x000000004138dc8a, 0x0000000036b398f4 }, + /* x^183296 mod p(x)` << 1, x^183360 mod p(x)` << 1 */ + { 0x00000001d29ee8e0, 0x00000001748f9adc }, + /* x^182272 mod p(x)` << 1, x^182336 mod p(x)` << 1 */ + { 0x000000006a08ace8, 0x00000001be94ec00 }, + /* x^181248 mod p(x)` << 1, x^181312 mod p(x)` << 1 */ + { 0x0000000127d42010, 0x00000000b74370d6 }, + /* x^180224 mod p(x)` << 1, x^180288 mod p(x)` << 1 */ + { 0x0000000019d76b62, 0x00000001174d0b98 }, + /* x^179200 mod p(x)` << 1, x^179264 mod p(x)` << 1 */ + { 0x00000001b1471f6e, 0x00000000befc06a4 }, + /* x^178176 mod p(x)` << 1, x^178240 mod p(x)` << 1 */ + { 0x00000001f64c19cc, 0x00000001ae125288 }, + /* x^177152 mod p(x)` << 1, x^177216 mod p(x)` << 1 */ + { 0x00000000003c0ea0, 0x0000000095c19b34 }, + /* x^176128 mod p(x)` << 1, x^176192 mod p(x)` << 1 */ + { 0x000000014d73abf6, 0x00000001a78496f2 }, + /* x^175104 mod p(x)` << 1, x^175168 mod p(x)` << 1 */ + { 0x00000001620eb844, 0x00000001ac5390a0 }, + /* x^174080 mod p(x)` << 1, x^174144 mod p(x)` << 1 */ + { 0x0000000147655048, 0x000000002a80ed6e }, + /* x^173056 mod p(x)` << 1, x^173120 mod p(x)` << 1 */ + { 0x0000000067b5077e, 0x00000001fa9b0128 }, + /* x^172032 mod p(x)` << 1, x^172096 mod p(x)` << 1 */ + { 0x0000000010ffe206, 0x00000001ea94929e }, + /* x^171008 mod p(x)` << 1, x^171072 mod p(x)` << 1 */ + { 0x000000000fee8f1e, 0x0000000125f4305c }, + /* x^169984 mod p(x)` << 1, x^170048 mod p(x)` << 1 */ + { 0x00000001da26fbae, 0x00000001471e2002 }, + /* x^168960 mod p(x)` << 1, x^169024 mod p(x)` << 1 */ + { 0x00000001b3a8bd88, 0x0000000132d2253a }, + /* x^167936 mod p(x)` << 1, x^168000 mod p(x)` << 1 */ + { 0x00000000e8f3898e, 0x00000000f26b3592 }, + /* x^166912 mod p(x)` << 1, x^166976 mod p(x)` << 1 */ + { 0x00000000b0d0d28c, 0x00000000bc8b67b0 }, + /* x^165888 mod p(x)` << 1, x^165952 mod p(x)` << 1 */ + { 0x0000000030f2a798, 0x000000013a826ef2 }, + /* x^164864 mod p(x)` << 1, x^164928 mod p(x)` << 1 */ + { 0x000000000fba1002, 0x0000000081482c84 }, + /* x^163840 mod p(x)` << 1, x^163904 mod p(x)` << 1 */ + { 0x00000000bdb9bd72, 0x00000000e77307c2 }, + /* x^162816 mod p(x)` << 1, x^162880 mod p(x)` << 1 */ + { 0x0000000075d3bf5a, 0x00000000d4a07ec8 }, + /* x^161792 mod p(x)` << 1, x^161856 mod p(x)` << 1 */ + { 0x00000000ef1f98a0, 0x0000000017102100 }, + /* x^160768 mod p(x)` << 1, x^160832 mod p(x)` << 1 */ + { 0x00000000689c7602, 0x00000000db406486 }, + /* x^159744 mod p(x)` << 1, x^159808 mod p(x)` << 1 */ + { 0x000000016d5fa5fe, 0x0000000192db7f88 }, + /* x^158720 mod p(x)` << 1, x^158784 mod p(x)` << 1 */ + { 0x00000001d0d2b9ca, 0x000000018bf67b1e }, + /* x^157696 mod p(x)` << 1, x^157760 mod p(x)` << 1 */ + { 0x0000000041e7b470, 0x000000007c09163e }, + /* x^156672 mod p(x)` << 1, x^156736 mod p(x)` << 1 */ + { 0x00000001cbb6495e, 0x000000000adac060 }, + /* x^155648 mod p(x)` << 1, x^155712 mod p(x)` << 1 */ + { 0x000000010052a0b0, 0x00000000bd8316ae }, + /* x^154624 mod p(x)` << 1, x^154688 mod p(x)` << 1 */ + { 0x00000001d8effb5c, 0x000000019f09ab54 }, + /* x^153600 mod p(x)` << 1, x^153664 mod p(x)` << 1 */ + { 0x00000001d969853c, 0x0000000125155542 }, + /* x^152576 mod p(x)` << 1, x^152640 mod p(x)` << 1 */ + { 0x00000000523ccce2, 0x000000018fdb5882 }, + /* x^151552 mod p(x)` << 1, x^151616 mod p(x)` << 1 */ + { 0x000000001e2436bc, 0x00000000e794b3f4 }, + /* x^150528 mod p(x)` << 1, x^150592 mod p(x)` << 1 */ + { 0x00000000ddd1c3a2, 0x000000016f9bb022 }, + /* x^149504 mod p(x)` << 1, x^149568 mod p(x)` << 1 */ + { 0x0000000019fcfe38, 0x00000000290c9978 }, + /* x^148480 mod p(x)` << 1, x^148544 mod p(x)` << 1 */ + { 0x00000001ce95db64, 0x0000000083c0f350 }, + /* x^147456 mod p(x)` << 1, x^147520 mod p(x)` << 1 */ + { 0x00000000af582806, 0x0000000173ea6628 }, + /* x^146432 mod p(x)` << 1, x^146496 mod p(x)` << 1 */ + { 0x00000001006388f6, 0x00000001c8b4e00a }, + /* x^145408 mod p(x)` << 1, x^145472 mod p(x)` << 1 */ + { 0x0000000179eca00a, 0x00000000de95d6aa }, + /* x^144384 mod p(x)` << 1, x^144448 mod p(x)` << 1 */ + { 0x0000000122410a6a, 0x000000010b7f7248 }, + /* x^143360 mod p(x)` << 1, x^143424 mod p(x)` << 1 */ + { 0x000000004288e87c, 0x00000001326e3a06 }, + /* x^142336 mod p(x)` << 1, x^142400 mod p(x)` << 1 */ + { 0x000000016c5490da, 0x00000000bb62c2e6 }, + /* x^141312 mod p(x)` << 1, x^141376 mod p(x)` << 1 */ + { 0x00000000d1c71f6e, 0x0000000156a4b2c2 }, + /* x^140288 mod p(x)` << 1, x^140352 mod p(x)` << 1 */ + { 0x00000001b4ce08a6, 0x000000011dfe763a }, + /* x^139264 mod p(x)` << 1, x^139328 mod p(x)` << 1 */ + { 0x00000001466ba60c, 0x000000007bcca8e2 }, + /* x^138240 mod p(x)` << 1, x^138304 mod p(x)` << 1 */ + { 0x00000001f6c488a4, 0x0000000186118faa }, + /* x^137216 mod p(x)` << 1, x^137280 mod p(x)` << 1 */ + { 0x000000013bfb0682, 0x0000000111a65a88 }, + /* x^136192 mod p(x)` << 1, x^136256 mod p(x)` << 1 */ + { 0x00000000690e9e54, 0x000000003565e1c4 }, + /* x^135168 mod p(x)` << 1, x^135232 mod p(x)` << 1 */ + { 0x00000000281346b6, 0x000000012ed02a82 }, + /* x^134144 mod p(x)` << 1, x^134208 mod p(x)` << 1 */ + { 0x0000000156464024, 0x00000000c486ecfc }, + /* x^133120 mod p(x)` << 1, x^133184 mod p(x)` << 1 */ + { 0x000000016063a8dc, 0x0000000001b951b2 }, + /* x^132096 mod p(x)` << 1, x^132160 mod p(x)` << 1 */ + { 0x0000000116a66362, 0x0000000048143916 }, + /* x^131072 mod p(x)` << 1, x^131136 mod p(x)` << 1 */ + { 0x000000017e8aa4d2, 0x00000001dc2ae124 }, + /* x^130048 mod p(x)` << 1, x^130112 mod p(x)` << 1 */ + { 0x00000001728eb10c, 0x00000001416c58d6 }, + /* x^129024 mod p(x)` << 1, x^129088 mod p(x)` << 1 */ + { 0x00000001b08fd7fa, 0x00000000a479744a }, + /* x^128000 mod p(x)` << 1, x^128064 mod p(x)` << 1 */ + { 0x00000001092a16e8, 0x0000000096ca3a26 }, + /* x^126976 mod p(x)` << 1, x^127040 mod p(x)` << 1 */ + { 0x00000000a505637c, 0x00000000ff223d4e }, + /* x^125952 mod p(x)` << 1, x^126016 mod p(x)` << 1 */ + { 0x00000000d94869b2, 0x000000010e84da42 }, + /* x^124928 mod p(x)` << 1, x^124992 mod p(x)` << 1 */ + { 0x00000001c8b203ae, 0x00000001b61ba3d0 }, + /* x^123904 mod p(x)` << 1, x^123968 mod p(x)` << 1 */ + { 0x000000005704aea0, 0x00000000680f2de8 }, + /* x^122880 mod p(x)` << 1, x^122944 mod p(x)` << 1 */ + { 0x000000012e295fa2, 0x000000008772a9a8 }, + /* x^121856 mod p(x)` << 1, x^121920 mod p(x)` << 1 */ + { 0x000000011d0908bc, 0x0000000155f295bc }, + /* x^120832 mod p(x)` << 1, x^120896 mod p(x)` << 1 */ + { 0x0000000193ed97ea, 0x00000000595f9282 }, + /* x^119808 mod p(x)` << 1, x^119872 mod p(x)` << 1 */ + { 0x000000013a0f1c52, 0x0000000164b1c25a }, + /* x^118784 mod p(x)` << 1, x^118848 mod p(x)` << 1 */ + { 0x000000010c2c40c0, 0x00000000fbd67c50 }, + /* x^117760 mod p(x)` << 1, x^117824 mod p(x)` << 1 */ + { 0x00000000ff6fac3e, 0x0000000096076268 }, + /* x^116736 mod p(x)` << 1, x^116800 mod p(x)` << 1 */ + { 0x000000017b3609c0, 0x00000001d288e4cc }, + /* x^115712 mod p(x)` << 1, x^115776 mod p(x)` << 1 */ + { 0x0000000088c8c922, 0x00000001eaac1bdc }, + /* x^114688 mod p(x)` << 1, x^114752 mod p(x)` << 1 */ + { 0x00000001751baae6, 0x00000001f1ea39e2 }, + /* x^113664 mod p(x)` << 1, x^113728 mod p(x)` << 1 */ + { 0x0000000107952972, 0x00000001eb6506fc }, + /* x^112640 mod p(x)` << 1, x^112704 mod p(x)` << 1 */ + { 0x0000000162b00abe, 0x000000010f806ffe }, + /* x^111616 mod p(x)` << 1, x^111680 mod p(x)` << 1 */ + { 0x000000000d7b404c, 0x000000010408481e }, + /* x^110592 mod p(x)` << 1, x^110656 mod p(x)` << 1 */ + { 0x00000000763b13d4, 0x0000000188260534 }, + /* x^109568 mod p(x)` << 1, x^109632 mod p(x)` << 1 */ + { 0x00000000f6dc22d8, 0x0000000058fc73e0 }, + /* x^108544 mod p(x)` << 1, x^108608 mod p(x)` << 1 */ + { 0x000000007daae060, 0x00000000391c59b8 }, + /* x^107520 mod p(x)` << 1, x^107584 mod p(x)` << 1 */ + { 0x000000013359ab7c, 0x000000018b638400 }, + /* x^106496 mod p(x)` << 1, x^106560 mod p(x)` << 1 */ + { 0x000000008add438a, 0x000000011738f5c4 }, + /* x^105472 mod p(x)` << 1, x^105536 mod p(x)` << 1 */ + { 0x00000001edbefdea, 0x000000008cf7c6da }, + /* x^104448 mod p(x)` << 1, x^104512 mod p(x)` << 1 */ + { 0x000000004104e0f8, 0x00000001ef97fb16 }, + /* x^103424 mod p(x)` << 1, x^103488 mod p(x)` << 1 */ + { 0x00000000b48a8222, 0x0000000102130e20 }, + /* x^102400 mod p(x)` << 1, x^102464 mod p(x)` << 1 */ + { 0x00000001bcb46844, 0x00000000db968898 }, + /* x^101376 mod p(x)` << 1, x^101440 mod p(x)` << 1 */ + { 0x000000013293ce0a, 0x00000000b5047b5e }, + /* x^100352 mod p(x)` << 1, x^100416 mod p(x)` << 1 */ + { 0x00000001710d0844, 0x000000010b90fdb2 }, + /* x^99328 mod p(x)` << 1, x^99392 mod p(x)` << 1 */ + { 0x0000000117907f6e, 0x000000004834a32e }, + /* x^98304 mod p(x)` << 1, x^98368 mod p(x)` << 1 */ + { 0x0000000087ddf93e, 0x0000000059c8f2b0 }, + /* x^97280 mod p(x)` << 1, x^97344 mod p(x)` << 1 */ + { 0x000000005970e9b0, 0x0000000122cec508 }, + /* x^96256 mod p(x)` << 1, x^96320 mod p(x)` << 1 */ + { 0x0000000185b2b7d0, 0x000000000a330cda }, + /* x^95232 mod p(x)` << 1, x^95296 mod p(x)` << 1 */ + { 0x00000001dcee0efc, 0x000000014a47148c }, + /* x^94208 mod p(x)` << 1, x^94272 mod p(x)` << 1 */ + { 0x0000000030da2722, 0x0000000042c61cb8 }, + /* x^93184 mod p(x)` << 1, x^93248 mod p(x)` << 1 */ + { 0x000000012f925a18, 0x0000000012fe6960 }, + /* x^92160 mod p(x)` << 1, x^92224 mod p(x)` << 1 */ + { 0x00000000dd2e357c, 0x00000000dbda2c20 }, + /* x^91136 mod p(x)` << 1, x^91200 mod p(x)` << 1 */ + { 0x00000000071c80de, 0x000000011122410c }, + /* x^90112 mod p(x)` << 1, x^90176 mod p(x)` << 1 */ + { 0x000000011513140a, 0x00000000977b2070 }, + /* x^89088 mod p(x)` << 1, x^89152 mod p(x)` << 1 */ + { 0x00000001df876e8e, 0x000000014050438e }, + /* x^88064 mod p(x)` << 1, x^88128 mod p(x)` << 1 */ + { 0x000000015f81d6ce, 0x0000000147c840e8 }, + /* x^87040 mod p(x)` << 1, x^87104 mod p(x)` << 1 */ + { 0x000000019dd94dbe, 0x00000001cc7c88ce }, + /* x^86016 mod p(x)` << 1, x^86080 mod p(x)` << 1 */ + { 0x00000001373d206e, 0x00000001476b35a4 }, + /* x^84992 mod p(x)` << 1, x^85056 mod p(x)` << 1 */ + { 0x00000000668ccade, 0x000000013d52d508 }, + /* x^83968 mod p(x)` << 1, x^84032 mod p(x)` << 1 */ + { 0x00000001b192d268, 0x000000008e4be32e }, + /* x^82944 mod p(x)` << 1, x^83008 mod p(x)` << 1 */ + { 0x00000000e30f3a78, 0x00000000024120fe }, + /* x^81920 mod p(x)` << 1, x^81984 mod p(x)` << 1 */ + { 0x000000010ef1f7bc, 0x00000000ddecddb4 }, + /* x^80896 mod p(x)` << 1, x^80960 mod p(x)` << 1 */ + { 0x00000001f5ac7380, 0x00000000d4d403bc }, + /* x^79872 mod p(x)` << 1, x^79936 mod p(x)` << 1 */ + { 0x000000011822ea70, 0x00000001734b89aa }, + /* x^78848 mod p(x)` << 1, x^78912 mod p(x)` << 1 */ + { 0x00000000c3a33848, 0x000000010e7a58d6 }, + /* x^77824 mod p(x)` << 1, x^77888 mod p(x)` << 1 */ + { 0x00000001bd151c24, 0x00000001f9f04e9c }, + /* x^76800 mod p(x)` << 1, x^76864 mod p(x)` << 1 */ + { 0x0000000056002d76, 0x00000000b692225e }, + /* x^75776 mod p(x)` << 1, x^75840 mod p(x)` << 1 */ + { 0x000000014657c4f4, 0x000000019b8d3f3e }, + /* x^74752 mod p(x)` << 1, x^74816 mod p(x)` << 1 */ + { 0x0000000113742d7c, 0x00000001a874f11e }, + /* x^73728 mod p(x)` << 1, x^73792 mod p(x)` << 1 */ + { 0x000000019c5920ba, 0x000000010d5a4254 }, + /* x^72704 mod p(x)` << 1, x^72768 mod p(x)` << 1 */ + { 0x000000005216d2d6, 0x00000000bbb2f5d6 }, + /* x^71680 mod p(x)` << 1, x^71744 mod p(x)` << 1 */ + { 0x0000000136f5ad8a, 0x0000000179cc0e36 }, + /* x^70656 mod p(x)` << 1, x^70720 mod p(x)` << 1 */ + { 0x000000018b07beb6, 0x00000001dca1da4a }, + /* x^69632 mod p(x)` << 1, x^69696 mod p(x)` << 1 */ + { 0x00000000db1e93b0, 0x00000000feb1a192 }, + /* x^68608 mod p(x)` << 1, x^68672 mod p(x)` << 1 */ + { 0x000000000b96fa3a, 0x00000000d1eeedd6 }, + /* x^67584 mod p(x)` << 1, x^67648 mod p(x)` << 1 */ + { 0x00000001d9968af0, 0x000000008fad9bb4 }, + /* x^66560 mod p(x)` << 1, x^66624 mod p(x)` << 1 */ + { 0x000000000e4a77a2, 0x00000001884938e4 }, + /* x^65536 mod p(x)` << 1, x^65600 mod p(x)` << 1 */ + { 0x00000000508c2ac8, 0x00000001bc2e9bc0 }, + /* x^64512 mod p(x)` << 1, x^64576 mod p(x)` << 1 */ + { 0x0000000021572a80, 0x00000001f9658a68 }, + /* x^63488 mod p(x)` << 1, x^63552 mod p(x)` << 1 */ + { 0x00000001b859daf2, 0x000000001b9224fc }, + /* x^62464 mod p(x)` << 1, x^62528 mod p(x)` << 1 */ + { 0x000000016f788474, 0x0000000055b2fb84 }, + /* x^61440 mod p(x)` << 1, x^61504 mod p(x)` << 1 */ + { 0x00000001b438810e, 0x000000018b090348 }, + /* x^60416 mod p(x)` << 1, x^60480 mod p(x)` << 1 */ + { 0x0000000095ddc6f2, 0x000000011ccbd5ea }, + /* x^59392 mod p(x)` << 1, x^59456 mod p(x)` << 1 */ + { 0x00000001d977c20c, 0x0000000007ae47f8 }, + /* x^58368 mod p(x)` << 1, x^58432 mod p(x)` << 1 */ + { 0x00000000ebedb99a, 0x0000000172acbec0 }, + /* x^57344 mod p(x)` << 1, x^57408 mod p(x)` << 1 */ + { 0x00000001df9e9e92, 0x00000001c6e3ff20 }, + /* x^56320 mod p(x)` << 1, x^56384 mod p(x)` << 1 */ + { 0x00000001a4a3f952, 0x00000000e1b38744 }, + /* x^55296 mod p(x)` << 1, x^55360 mod p(x)` << 1 */ + { 0x00000000e2f51220, 0x00000000791585b2 }, + /* x^54272 mod p(x)` << 1, x^54336 mod p(x)` << 1 */ + { 0x000000004aa01f3e, 0x00000000ac53b894 }, + /* x^53248 mod p(x)` << 1, x^53312 mod p(x)` << 1 */ + { 0x00000000b3e90a58, 0x00000001ed5f2cf4 }, + /* x^52224 mod p(x)` << 1, x^52288 mod p(x)` << 1 */ + { 0x000000000c9ca2aa, 0x00000001df48b2e0 }, + /* x^51200 mod p(x)` << 1, x^51264 mod p(x)` << 1 */ + { 0x0000000151682316, 0x00000000049c1c62 }, + /* x^50176 mod p(x)` << 1, x^50240 mod p(x)` << 1 */ + { 0x0000000036fce78c, 0x000000017c460c12 }, + /* x^49152 mod p(x)` << 1, x^49216 mod p(x)` << 1 */ + { 0x000000009037dc10, 0x000000015be4da7e }, + /* x^48128 mod p(x)` << 1, x^48192 mod p(x)` << 1 */ + { 0x00000000d3298582, 0x000000010f38f668 }, + /* x^47104 mod p(x)` << 1, x^47168 mod p(x)` << 1 */ + { 0x00000001b42e8ad6, 0x0000000039f40a00 }, + /* x^46080 mod p(x)` << 1, x^46144 mod p(x)` << 1 */ + { 0x00000000142a9838, 0x00000000bd4c10c4 }, + /* x^45056 mod p(x)` << 1, x^45120 mod p(x)` << 1 */ + { 0x0000000109c7f190, 0x0000000042db1d98 }, + /* x^44032 mod p(x)` << 1, x^44096 mod p(x)` << 1 */ + { 0x0000000056ff9310, 0x00000001c905bae6 }, + /* x^43008 mod p(x)` << 1, x^43072 mod p(x)` << 1 */ + { 0x00000001594513aa, 0x00000000069d40ea }, + /* x^41984 mod p(x)` << 1, x^42048 mod p(x)` << 1 */ + { 0x00000001e3b5b1e8, 0x000000008e4fbad0 }, + /* x^40960 mod p(x)` << 1, x^41024 mod p(x)` << 1 */ + { 0x000000011dd5fc08, 0x0000000047bedd46 }, + /* x^39936 mod p(x)` << 1, x^40000 mod p(x)` << 1 */ + { 0x00000001675f0cc2, 0x0000000026396bf8 }, + /* x^38912 mod p(x)` << 1, x^38976 mod p(x)` << 1 */ + { 0x00000000d1c8dd44, 0x00000000379beb92 }, + /* x^37888 mod p(x)` << 1, x^37952 mod p(x)` << 1 */ + { 0x0000000115ebd3d8, 0x000000000abae54a }, + /* x^36864 mod p(x)` << 1, x^36928 mod p(x)` << 1 */ + { 0x00000001ecbd0dac, 0x0000000007e6a128 }, + /* x^35840 mod p(x)` << 1, x^35904 mod p(x)` << 1 */ + { 0x00000000cdf67af2, 0x000000000ade29d2 }, + /* x^34816 mod p(x)` << 1, x^34880 mod p(x)` << 1 */ + { 0x000000004c01ff4c, 0x00000000f974c45c }, + /* x^33792 mod p(x)` << 1, x^33856 mod p(x)` << 1 */ + { 0x00000000f2d8657e, 0x00000000e77ac60a }, + /* x^32768 mod p(x)` << 1, x^32832 mod p(x)` << 1 */ + { 0x000000006bae74c4, 0x0000000145895816 }, + /* x^31744 mod p(x)` << 1, x^31808 mod p(x)` << 1 */ + { 0x0000000152af8aa0, 0x0000000038e362be }, + /* x^30720 mod p(x)` << 1, x^30784 mod p(x)` << 1 */ + { 0x0000000004663802, 0x000000007f991a64 }, + /* x^29696 mod p(x)` << 1, x^29760 mod p(x)` << 1 */ + { 0x00000001ab2f5afc, 0x00000000fa366d3a }, + /* x^28672 mod p(x)` << 1, x^28736 mod p(x)` << 1 */ + { 0x0000000074a4ebd4, 0x00000001a2bb34f0 }, + /* x^27648 mod p(x)` << 1, x^27712 mod p(x)` << 1 */ + { 0x00000001d7ab3a4c, 0x0000000028a9981e }, + /* x^26624 mod p(x)` << 1, x^26688 mod p(x)` << 1 */ + { 0x00000001a8da60c6, 0x00000001dbc672be }, + /* x^25600 mod p(x)` << 1, x^25664 mod p(x)` << 1 */ + { 0x000000013cf63820, 0x00000000b04d77f6 }, + /* x^24576 mod p(x)` << 1, x^24640 mod p(x)` << 1 */ + { 0x00000000bec12e1e, 0x0000000124400d96 }, + /* x^23552 mod p(x)` << 1, x^23616 mod p(x)` << 1 */ + { 0x00000001c6368010, 0x000000014ca4b414 }, + /* x^22528 mod p(x)` << 1, x^22592 mod p(x)` << 1 */ + { 0x00000001e6e78758, 0x000000012fe2c938 }, + /* x^21504 mod p(x)` << 1, x^21568 mod p(x)` << 1 */ + { 0x000000008d7f2b3c, 0x00000001faed01e6 }, + /* x^20480 mod p(x)` << 1, x^20544 mod p(x)` << 1 */ + { 0x000000016b4a156e, 0x000000007e80ecfe }, + /* x^19456 mod p(x)` << 1, x^19520 mod p(x)` << 1 */ + { 0x00000001c63cfeb6, 0x0000000098daee94 }, + /* x^18432 mod p(x)` << 1, x^18496 mod p(x)` << 1 */ + { 0x000000015f902670, 0x000000010a04edea }, + /* x^17408 mod p(x)` << 1, x^17472 mod p(x)` << 1 */ + { 0x00000001cd5de11e, 0x00000001c00b4524 }, + /* x^16384 mod p(x)` << 1, x^16448 mod p(x)` << 1 */ + { 0x000000001acaec54, 0x0000000170296550 }, + /* x^15360 mod p(x)` << 1, x^15424 mod p(x)` << 1 */ + { 0x000000002bd0ca78, 0x0000000181afaa48 }, + /* x^14336 mod p(x)` << 1, x^14400 mod p(x)` << 1 */ + { 0x0000000032d63d5c, 0x0000000185a31ffa }, + /* x^13312 mod p(x)` << 1, x^13376 mod p(x)` << 1 */ + { 0x000000001c6d4e4c, 0x000000002469f608 }, + /* x^12288 mod p(x)` << 1, x^12352 mod p(x)` << 1 */ + { 0x0000000106a60b92, 0x000000006980102a }, + /* x^11264 mod p(x)` << 1, x^11328 mod p(x)` << 1 */ + { 0x00000000d3855e12, 0x0000000111ea9ca8 }, + /* x^10240 mod p(x)` << 1, x^10304 mod p(x)` << 1 */ + { 0x00000000e3125636, 0x00000001bd1d29ce }, + /* x^9216 mod p(x)` << 1, x^9280 mod p(x)` << 1 */ + { 0x000000009e8f7ea4, 0x00000001b34b9580 }, + /* x^8192 mod p(x)` << 1, x^8256 mod p(x)` << 1 */ + { 0x00000001c82e562c, 0x000000003076054e }, + /* x^7168 mod p(x)` << 1, x^7232 mod p(x)` << 1 */ + { 0x00000000ca9f09ce, 0x000000012a608ea4 }, + /* x^6144 mod p(x)` << 1, x^6208 mod p(x)` << 1 */ + { 0x00000000c63764e6, 0x00000000784d05fe }, + /* x^5120 mod p(x)` << 1, x^5184 mod p(x)` << 1 */ + { 0x0000000168d2e49e, 0x000000016ef0d82a }, + /* x^4096 mod p(x)` << 1, x^4160 mod p(x)` << 1 */ + { 0x00000000e986c148, 0x0000000075bda454 }, + /* x^3072 mod p(x)` << 1, x^3136 mod p(x)` << 1 */ + { 0x00000000cfb65894, 0x000000003dc0a1c4 }, + /* x^2048 mod p(x)` << 1, x^2112 mod p(x)` << 1 */ + { 0x0000000111cadee4, 0x00000000e9a5d8be }, + /* x^1024 mod p(x)` << 1, x^1088 mod p(x)` << 1 */ + { 0x0000000171fb63ce, 0x00000001609bc4b4 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Reduce final 1024-2048 bits to 64 bits, shifting 32 bits to include the trailing 32 bits of zeros */ + +static const __vector unsigned long long vcrc_short_const[16] + __attribute__((aligned (16))) = { +#ifdef __LITTLE_ENDIAN__ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x5cf015c388e56f72, 0x7fec2963e5bf8048 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0x963a18920246e2e6, 0x38e888d4844752a9 }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x419a441956993a31, 0x42316c00730206ad }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0x924752ba2b830011, 0x543d5c543e65ddf9 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x55bd7f9518e4a304, 0x78e87aaf56767c92 }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x6d76739fe0553f1e, 0x8f68fcec1903da7f }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0xc133722b1fe0b5c3, 0x3f4840246791d588 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x64b67ee0e55ef1f3, 0x34c96751b04de25a }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0x069db049b8fdb1e7, 0x156c8e180b4a395b }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xa11bfaf3c9e90b9e, 0xe0b99ccbe661f7be }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0x817cdc5119b29a35, 0x041d37768cd75659 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x1ce9d94b36c41f1c, 0x3a0777818cfaa965 }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x4f256efcb82be955, 0x0e148e8252377a55 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0xec1631edb2dea967, 0x9c25531d19e65dde }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0x5d27e147510ac59a, 0x790606ff9957c0a6 }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0xa66805eb18b8ea18, 0x82f63b786ea2d55c } +#else /* __LITTLE_ENDIAN__ */ + /* x^1952 mod p(x) , x^1984 mod p(x) , x^2016 mod p(x) , x^2048 mod p(x) */ + { 0x7fec2963e5bf8048, 0x5cf015c388e56f72 }, + /* x^1824 mod p(x) , x^1856 mod p(x) , x^1888 mod p(x) , x^1920 mod p(x) */ + { 0x38e888d4844752a9, 0x963a18920246e2e6 }, + /* x^1696 mod p(x) , x^1728 mod p(x) , x^1760 mod p(x) , x^1792 mod p(x) */ + { 0x42316c00730206ad, 0x419a441956993a31 }, + /* x^1568 mod p(x) , x^1600 mod p(x) , x^1632 mod p(x) , x^1664 mod p(x) */ + { 0x543d5c543e65ddf9, 0x924752ba2b830011 }, + /* x^1440 mod p(x) , x^1472 mod p(x) , x^1504 mod p(x) , x^1536 mod p(x) */ + { 0x78e87aaf56767c92, 0x55bd7f9518e4a304 }, + /* x^1312 mod p(x) , x^1344 mod p(x) , x^1376 mod p(x) , x^1408 mod p(x) */ + { 0x8f68fcec1903da7f, 0x6d76739fe0553f1e }, + /* x^1184 mod p(x) , x^1216 mod p(x) , x^1248 mod p(x) , x^1280 mod p(x) */ + { 0x3f4840246791d588, 0xc133722b1fe0b5c3 }, + /* x^1056 mod p(x) , x^1088 mod p(x) , x^1120 mod p(x) , x^1152 mod p(x) */ + { 0x34c96751b04de25a, 0x64b67ee0e55ef1f3 }, + /* x^928 mod p(x) , x^960 mod p(x) , x^992 mod p(x) , x^1024 mod p(x) */ + { 0x156c8e180b4a395b, 0x069db049b8fdb1e7 }, + /* x^800 mod p(x) , x^832 mod p(x) , x^864 mod p(x) , x^896 mod p(x) */ + { 0xe0b99ccbe661f7be, 0xa11bfaf3c9e90b9e }, + /* x^672 mod p(x) , x^704 mod p(x) , x^736 mod p(x) , x^768 mod p(x) */ + { 0x041d37768cd75659, 0x817cdc5119b29a35 }, + /* x^544 mod p(x) , x^576 mod p(x) , x^608 mod p(x) , x^640 mod p(x) */ + { 0x3a0777818cfaa965, 0x1ce9d94b36c41f1c }, + /* x^416 mod p(x) , x^448 mod p(x) , x^480 mod p(x) , x^512 mod p(x) */ + { 0x0e148e8252377a55, 0x4f256efcb82be955 }, + /* x^288 mod p(x) , x^320 mod p(x) , x^352 mod p(x) , x^384 mod p(x) */ + { 0x9c25531d19e65dde, 0xec1631edb2dea967 }, + /* x^160 mod p(x) , x^192 mod p(x) , x^224 mod p(x) , x^256 mod p(x) */ + { 0x790606ff9957c0a6, 0x5d27e147510ac59a }, + /* x^32 mod p(x) , x^64 mod p(x) , x^96 mod p(x) , x^128 mod p(x) */ + { 0x82f63b786ea2d55c, 0xa66805eb18b8ea18 } +#endif /* __LITTLE_ENDIAN__ */ + }; + +/* Barrett constants */ +/* 33 bit reflected Barrett constant m - (4^32)/n */ + +static const __vector unsigned long long v_Barrett_const[2] + __attribute__((aligned (16))) = { + /* x^64 div p(x) */ +#ifdef __LITTLE_ENDIAN__ + { 0x00000000dea713f1, 0x0000000000000000 }, + { 0x0000000105ec76f1, 0x0000000000000000 } +#else /* __LITTLE_ENDIAN__ */ + { 0x0000000000000000, 0x00000000dea713f1 }, + { 0x0000000000000000, 0x0000000105ec76f1 } +#endif /* __LITTLE_ENDIAN__ */ + }; +#endif /* POWER8_INTRINSICS */ + +#endif /* __ASSEMBLER__ */ diff --git a/contrib/crc32-vpmsum-cmake/vec_crc32.h b/contrib/crc32-vpmsum-cmake/vec_crc32.h new file mode 100644 index 00000000000..0ef13616b34 --- /dev/null +++ b/contrib/crc32-vpmsum-cmake/vec_crc32.h @@ -0,0 +1,29 @@ +#ifndef VEC_CRC32 +#define VEC_CRC32 + + +#ifdef __cplusplus +extern "C" { +#endif + +unsigned int crc32_vpmsum(unsigned int crc, const unsigned char *p, unsigned long len); + +static inline uint32_t crc32_ppc(uint64_t crc, unsigned char const *buffer, size_t len) +{ + unsigned char *emptybuffer; + if (!buffer) { + emptybuffer = (unsigned char *)malloc(len); + bzero(emptybuffer, len); + crc = crc32_vpmsum(crc, emptybuffer, len); + free(emptybuffer); + } else { + crc = crc32_vpmsum(crc, buffer, (unsigned long)len); + } + return crc; +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d8a7dba72ac..1bc1151b90b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -364,6 +364,10 @@ if (TARGET ch_contrib::crc32_s390x) target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32_s390x) endif() +if (TARGET ch_contrib::crc32-vpmsum) + target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::crc32-vpmsum) + endif() + dbms_target_link_libraries(PUBLIC ch_contrib::abseil_swiss_tables) target_link_libraries (clickhouse_common_io PUBLIC ch_contrib::abseil_swiss_tables) diff --git a/src/Common/HashTable/Hash.h b/src/Common/HashTable/Hash.h index 01758c1b9fb..c7342d061d8 100644 --- a/src/Common/HashTable/Hash.h +++ b/src/Common/HashTable/Hash.h @@ -48,6 +48,10 @@ inline DB::UInt64 intHash64(DB::UInt64 x) #include #endif +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + #if defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ #include @@ -89,6 +93,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x) return __crc32cd(-1U, x); #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return s390x_crc32(-1U, x) +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(-1U, reinterpret_cast(&x), sizeof(x)); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. /// NOTE: consider using intHash32() @@ -103,6 +109,8 @@ inline DB::UInt64 intHashCRC32(DB::UInt64 x, DB::UInt64 updated_value) return __crc32cd(static_cast(updated_value), x); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32(updated_value, x); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(updated_value, reinterpret_cast(&x), sizeof(x)); #else /// On other platforms we do not have CRC32. NOTE This can be confusing. return intHash64(x) ^ updated_value; diff --git a/src/Functions/CMakeLists.txt b/src/Functions/CMakeLists.txt index e9810e918b4..45543f57b37 100644 --- a/src/Functions/CMakeLists.txt +++ b/src/Functions/CMakeLists.txt @@ -86,6 +86,10 @@ if (TARGET ch_contrib::rapidjson) list (APPEND PRIVATE_LIBS ch_contrib::rapidjson) endif() +if (TARGET ch_contrib::crc32-vpmsum) + list (APPEND PUBLIC_LIBS ch_contrib::crc32-vpmsum) +endif() + add_subdirectory(GatherUtils) list (APPEND PRIVATE_LIBS clickhouse_functions_gatherutils) diff --git a/src/Functions/FunctionsStringHash.cpp b/src/Functions/FunctionsStringHash.cpp index 174acebe979..bf0b7463a5d 100644 --- a/src/Functions/FunctionsStringHash.cpp +++ b/src/Functions/FunctionsStringHash.cpp @@ -14,6 +14,10 @@ #include +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + namespace DB { @@ -38,6 +42,8 @@ struct Hash return __crc32cd(static_cast(crc), val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -51,6 +57,8 @@ struct Hash return __crc32cw(crc, val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u32(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -64,6 +72,8 @@ struct Hash return __crc32ch(crc, val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u16(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif @@ -77,6 +87,8 @@ struct Hash return __crc32cb(crc, val); #elif defined(__s390x__) && __BYTE_ORDER__==__ORDER_BIG_ENDIAN__ return s390x_crc32_u8(crc, val); +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(crc, reinterpret_cast(&val), sizeof(val)); #else throw Exception("String hash is not implemented without sse4.2 support", ErrorCodes::NOT_IMPLEMENTED); #endif diff --git a/src/Functions/FunctionsStringSimilarity.cpp b/src/Functions/FunctionsStringSimilarity.cpp index 802aafc2042..87aa0f4b3f7 100644 --- a/src/Functions/FunctionsStringSimilarity.cpp +++ b/src/Functions/FunctionsStringSimilarity.cpp @@ -24,6 +24,10 @@ # include #endif +#if (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#include "vec_crc32.h" +#endif + namespace DB { /** Distance function implementation. @@ -72,6 +76,8 @@ struct NgramDistanceImpl return __crc32cd(code_points[2], combined) & 0xFFFFu; #elif defined(__s390x__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ return s390x_crc32(code_points[2], combined) & 0xFFFFu; +#elif (defined(__PPC64__) || defined(__powerpc64__)) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return crc32_ppc(code_points[2], reinterpret_cast(&combined), sizeof(combined)) & 0xFFFFu; #else return (intHashCRC32(combined) ^ intHashCRC32(code_points[2])) & 0xFFFFu; #endif diff --git a/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference new file mode 100644 index 00000000000..2acad33320b --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.ppc64le.reference @@ -0,0 +1,148 @@ +18446744073709551615 +1737075136 +1737075136 +4018781633 +4018781633 +1846985414 +1846985414 +1846985414 +1846985414 +(10693559443859979498,10693559443859979498) +(12279482788274235946,6436413987527322272) +(12279482788274235946,6436413987527322272) +(13257488272755813409,6436413987527322272) +(13257488272755813409,6436413987527322272) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +(13762864994695140861,13762864994695140861) +3023525975 +3040303199 +3023509591 +3023510623 +3040303191 +3040303191 +3023510615 +3023510615 +1999952988 +926211140 +1999699532 +1999683148 +1999952988 +926211140 +1999699532 +1999683148 +(16071125717475221203,9592059329600248798) +(16071125717475221203,1914899959549098907) +(16071125717475221203,7986182634218042944) +(16071125717475221203,7986182634218042944) +(16071125717475221203,9592059329600248798) +(16071125717475221203,1914899959549098907) +(16071125717475221203,7986182634218042944) +(16071125717475221203,7986182634218042944) +(10576877560263640956,4278250516018530743) +(16211512098526494023,11479872370566432466) +(13515070557027359649,17725505493832406849) +(12589381623326290380,575343713614534202) +(10576877560263640956,4278250516018530743) +(16211512098526494023,11479872370566432466) +(13515070557027359649,17725505493832406849) +(12589381623326290380,575343713614534202) +uniqExact 6 +ngramSimHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 1211135069 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1546679389 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2293265501 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3392173149 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054169 +ngramSimHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2291168349 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 3358618717 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3425727581 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054429 +ngramSimHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 1211135069 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1546679389 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2284876893 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 3459282013 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3694163037 +ngramSimHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 2291168349 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 3358618717 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3425727581 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3627054429 +wordShingleSimHash +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 10637533 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 171136201 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 209864029 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353165 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353677 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 418595033 +wordShingleSimHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 218252892 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1218592985 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1613919433 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2080524225 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2088912577 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2094163657 +wordShingleSimHashUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 10637533 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 171136201 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 209864029 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353165 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 413353677 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 418595033 +wordShingleSimHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 218252892 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1218592985 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1613919433 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2080524225 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2088912577 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2094163657 +ngramMinHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashCaseInsensitive +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +ngramMinHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (2793448378579182412,5526633106516004292) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (8530889421347045182,5150364204263408031) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (8992738078100405992,5526633106516004292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (15193387305258759701,5526633106516004292) +wordShingleMinHash +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (3409292695558556998,3242671779450421938) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (11981468198903037199,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (12852656749419794093,678630951345180105) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,410122209669519134) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,3365040177160857031) +wordShingleMinHashCaseInsensitive +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (712181695272576370,125062659592971094) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (3404326999173181417,12067981913120463876) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (13918035273694643957,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,12467125901844798869) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,17567683680214055861) +wordShingleMinHashUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (3409292695558556998,3242671779450421938) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (11981468198903037199,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (12852656749419794093,678630951345180105) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,410122209669519134) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13105381013738345838,3365040177160857031) +wordShingleMinHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (712181695272576370,125062659592971094) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 (3404326999173181417,12067981913120463876) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (13918035273694643957,5500630346333489583) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,12467125901844798869) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (14132553626943388792,17567683680214055861) +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None diff --git a/tests/queries/0_stateless/01016_simhash_minhash.python b/tests/queries/0_stateless/01016_simhash_minhash.python new file mode 100644 index 00000000000..1d6eae456c1 --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.python @@ -0,0 +1,394 @@ +#!/usr/bin/env python3 +import os +import socket +import sys +from scipy import stats +import pandas as pd +import numpy as np +import shutil +import platform + +import uuid + +CLICKHOUSE_HOST = os.environ.get('CLICKHOUSE_HOST', '127.0.0.1') +CLICKHOUSE_PORT = int(os.environ.get('CLICKHOUSE_PORT_TCP', '900000')) +CLICKHOUSE_DATABASE = os.environ.get('CLICKHOUSE_DATABASE', 'default') + + +CURDIR = os.path.dirname(os.path.realpath(__file__)) +sys.path.insert(0, os.path.join(CURDIR, 'helpers')) + +from pure_http_client import ClickHouseClient + +if platform.machine() == "ppc64le": + shutil.copyfile(CURDIR + "/01016_simhash_minhash.ppc64le.reference", CURDIR + "/01016_simhash_minhash.reference") +elif platform.machine() == "x86_64" : + shutil.copyfile(CURDIR + "/01016_simhash_minhash.x86_64.reference", CURDIR + "/01016_simhash_minhash.reference") + +def writeVarUInt(x, ba): + for _ in range(0, 9): + + byte = x & 0x7F + if x > 0x7F: + byte |= 0x80 + + ba.append(byte) + + x >>= 7 + if x == 0: + return + +def writeStringBinary(s, ba): + b = bytes(s, 'utf-8') + writeVarUInt(len(s), ba) + ba.extend(b) + +def readStrict(s, size = 1): + res = bytearray() + while size: + cur = s.recv(size) + # if not res: + # raise "Socket is closed" + size -= len(cur) + res.extend(cur) + + return res + +def readUInt(s, size=1): + res = readStrict(s, size) + val = 0 + for i in range(len(res)): + val += res[i] << (i * 8) + return val + +def readUInt8(s): + return readUInt(s) + +def readUInt16(s): + return readUInt(s, 2) + +def readUInt32(s): + return readUInt(s, 4) + +def readUInt64(s): + return readUInt(s, 8) + +def readVarUInt(s): + x = 0 + for i in range(9): + byte = readStrict(s)[0] + x |= (byte & 0x7F) << (7 * i) + + if not byte & 0x80: + return x + + return x + +def readStringBinary(s): + size = readVarUInt(s) + s = readStrict(s, size) + return s.decode('utf-8') + +def sendHello(s): + ba = bytearray() + writeVarUInt(0, ba) # Hello + writeStringBinary('simple native protocol', ba) + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('default', ba) # database + writeStringBinary('default', ba) # user + writeStringBinary('', ba) # pwd + s.sendall(ba) + + +def receiveHello(s): + p_type = readVarUInt(s) + assert (p_type == 0) # Hello + server_name = readStringBinary(s) + # print("Server name: ", server_name) + server_version_major = readVarUInt(s) + # print("Major: ", server_version_major) + server_version_minor = readVarUInt(s) + # print("Minor: ", server_version_minor) + server_revision = readVarUInt(s) + # print("Revision: ", server_revision) + server_timezone = readStringBinary(s) + # print("Timezone: ", server_timezone) + server_display_name = readStringBinary(s) + # print("Display name: ", server_display_name) + server_version_patch = readVarUInt(s) + # print("Version patch: ", server_version_patch) + +def serializeClientInfo(ba, query_id): + writeStringBinary('default', ba) # initial_user + writeStringBinary(query_id, ba) # initial_query_id + writeStringBinary('127.0.0.1:9000', ba) # initial_address + ba.extend([0] * 8) # initial_query_start_time_microseconds + ba.append(1) # TCP + writeStringBinary('os_user', ba) # os_user + writeStringBinary('client_hostname', ba) # client_hostname + writeStringBinary('client_name', ba) # client_name + writeVarUInt(21, ba) + writeVarUInt(9, ba) + writeVarUInt(54449, ba) + writeStringBinary('', ba) # quota_key + writeVarUInt(0, ba) # distributed_depth + writeVarUInt(1, ba) # client_version_patch + ba.append(0) # No telemetry + +def sendQuery(s, query): + ba = bytearray() + query_id = uuid.uuid4().hex + writeVarUInt(1, ba) # query + writeStringBinary(query_id, ba) + + ba.append(1) # INITIAL_QUERY + + # client info + serializeClientInfo(ba, query_id) + + writeStringBinary('', ba) # No settings + writeStringBinary('', ba) # No interserver secret + writeVarUInt(2, ba) # Stage - Complete + ba.append(0) # No compression + writeStringBinary(query, ba) # query, finally + s.sendall(ba) + +def serializeBlockInfo(ba): + writeVarUInt(1, ba) # 1 + ba.append(0) # is_overflows + writeVarUInt(2, ba) # 2 + writeVarUInt(0, ba) # 0 + ba.extend([0] * 4) # bucket_num + +def sendEmptyBlock(s): + ba = bytearray() + writeVarUInt(2, ba) # Data + writeStringBinary('', ba) + serializeBlockInfo(ba) + writeVarUInt(0, ba) # rows + writeVarUInt(0, ba) # columns + s.sendall(ba) + +def assertPacket(packet, expected): + assert(packet == expected), packet + +def readException(s): + code = readUInt32(s) + name = readStringBinary(s) + text = readStringBinary(s) + readStringBinary(s) # trace + assertPacket(readUInt8(s), 0) # has_nested + sys.stdout.write("code {}: {}".format(code, text.replace('DB::Exception:', ''))) + + +def test(): + client = ClickHouseClient() + + res = client.query("SELECT ngramSimHash('')") + sys.stdout.write(res) + res=client.query("SELECT ngramSimHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + + res = client.query("SELECT ngramMinHash('')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHash('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitive('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashUTF8('what a cute cat.')") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.')") + sys.stdout.write(res) + + client.query("DROP TABLE IF EXISTS defaults") + client.query("CREATE TABLE defaults(s String) ENGINE = Memory()") + client.query("INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.')") + + res = client.query("SELECT ngramSimHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + + res = client.query("SELECT ngramMinHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHash(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashUTF8(s) FROM defaults") + sys.stdout.write(res) + res = client.query("SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults") + sys.stdout.write(res) + + client.query("TRUNCATE TABLE defaults") + client.query("INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', 'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.'))") + + res = client.query("SELECT 'uniqExact', uniqExact(s) FROM defaults") + sys.stdout.write(res) + + res = client.query("SELECT 'ngramSimHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramSimHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramSimHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramSimHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleSimHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + + res = client.query("SELECT 'ngramMinHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramMinHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramMinHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'ngramMinHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHash'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHashCaseInsensitive'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHashUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + res = client.query("SELECT 'wordShingleMinHashCaseInsensitiveUTF8'") + sys.stdout.write(res) + res = client.query("SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h") + sys.stdout.write(res) + + wordShingleSimHashInvalidArg1() + + wordShingleSimHashInvalidArg2() + + wordShingleSimHashInvalidArg3() + #client.query("DROP TABLE defaults") + +def wordShingleSimHashInvalidArg1(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "SELECT wordShingleSimHash('foobar', 9223372036854775807)") + + # Fin block + sendEmptyBlock(s) + + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() + + +def wordShingleSimHashInvalidArg2(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "SELECT wordShingleSimHash('foobar', 1001)") + + # Fin block + sendEmptyBlock(s) + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() + + +def wordShingleSimHashInvalidArg3(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.settimeout(30) + s.connect((CLICKHOUSE_HOST, CLICKHOUSE_PORT)) + sendHello(s) + receiveHello(s) + sendQuery(s, "SELECT wordShingleSimHash('foobar', 0)") + + # Fin block + sendEmptyBlock(s) + + assertPacket(readVarUInt(s), 2) + print(readException(s)) + s.close() + +if __name__ == "__main__": + test() + #wordShingleSimHashInvalidArg1() diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sh b/tests/queries/0_stateless/01016_simhash_minhash.sh new file mode 100755 index 00000000000..94bac7efacb --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.sh @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + +python3 "$CURDIR"/01016_simhash_minhash.python + diff --git a/tests/queries/0_stateless/01016_simhash_minhash.sql b/tests/queries/0_stateless/01016_simhash_minhash.sql deleted file mode 100644 index 1e77b487851..00000000000 --- a/tests/queries/0_stateless/01016_simhash_minhash.sql +++ /dev/null @@ -1,115 +0,0 @@ -SELECT ngramSimHash(''); -SELECT ngramSimHash('what a cute cat.'); -SELECT ngramSimHashCaseInsensitive('what a cute cat.'); -SELECT ngramSimHashUTF8('what a cute cat.'); -SELECT ngramSimHashCaseInsensitiveUTF8('what a cute cat.'); -SELECT wordShingleSimHash('what a cute cat.'); -SELECT wordShingleSimHashCaseInsensitive('what a cute cat.'); -SELECT wordShingleSimHashUTF8('what a cute cat.'); -SELECT wordShingleSimHashCaseInsensitiveUTF8('what a cute cat.'); - -SELECT ngramMinHash(''); -SELECT ngramMinHash('what a cute cat.'); -SELECT ngramMinHashCaseInsensitive('what a cute cat.'); -SELECT ngramMinHashUTF8('what a cute cat.'); -SELECT ngramMinHashCaseInsensitiveUTF8('what a cute cat.'); -SELECT wordShingleMinHash('what a cute cat.'); -SELECT wordShingleMinHashCaseInsensitive('what a cute cat.'); -SELECT wordShingleMinHashUTF8('what a cute cat.'); -SELECT wordShingleMinHashCaseInsensitiveUTF8('what a cute cat.'); - -DROP TABLE IF EXISTS defaults; -CREATE TABLE defaults -( - s String -)ENGINE = Memory(); - -INSERT INTO defaults values ('It is the latest occurrence of the Southeast European haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.') ('It is the latest occurrence of the Southeast Asian haze, the issue that occurs in constant intensity during every wet season. It has mainly been caused by forest fires resulting from illegal slash-and-burn clearing performed on behalf of the palm oil industry in Kazakhstan, principally on the islands, which then spread quickly in the dry season.'); - -SELECT ngramSimHash(s) FROM defaults; -SELECT ngramSimHashCaseInsensitive(s) FROM defaults; -SELECT ngramSimHashUTF8(s) FROM defaults; -SELECT ngramSimHashCaseInsensitiveUTF8(s) FROM defaults; -SELECT wordShingleSimHash(s) FROM defaults; -SELECT wordShingleSimHashCaseInsensitive(s) FROM defaults; -SELECT wordShingleSimHashUTF8(s) FROM defaults; -SELECT wordShingleSimHashCaseInsensitiveUTF8(s) FROM defaults; - -SELECT ngramMinHash(s) FROM defaults; -SELECT ngramMinHashCaseInsensitive(s) FROM defaults; -SELECT ngramMinHashUTF8(s) FROM defaults; -SELECT ngramMinHashCaseInsensitiveUTF8(s) FROM defaults; -SELECT wordShingleMinHash(s) FROM defaults; -SELECT wordShingleMinHashCaseInsensitive(s) FROM defaults; -SELECT wordShingleMinHashUTF8(s) FROM defaults; -SELECT wordShingleMinHashCaseInsensitiveUTF8(s) FROM defaults; - -TRUNCATE TABLE defaults; -INSERT INTO defaults SELECT arrayJoin(splitByString('\n\n', -'ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency. -ClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes. -ClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. - -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system''s read and write availability. -ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system''s read / write availability. -ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. -ClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. -ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. - -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency. -ClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system. -ClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.' -)); - -SELECT 'uniqExact', uniqExact(s) FROM defaults; - - -SELECT 'ngramSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHash(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramSimHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHash(s, 2) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitive(s, 2) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleSimHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleSimHashCaseInsensitiveUTF8(s, 2) as h FROM defaults GROUP BY h ORDER BY h; - -SELECT 'ngramMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHash(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitive(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'ngramMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), ngramMinHashCaseInsensitiveUTF8(s) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHash'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHash(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHashCaseInsensitive'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitive(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHashUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; -SELECT 'wordShingleMinHashCaseInsensitiveUTF8'; -SELECT arrayStringConcat(groupArray(s), '\n:::::::\n'), count(), wordShingleMinHashCaseInsensitiveUTF8(s, 2, 3) as h FROM defaults GROUP BY h ORDER BY h; - -SELECT wordShingleSimHash('foobar', 9223372036854775807); -- { serverError 69 } -SELECT wordShingleSimHash('foobar', 1001); -- { serverError 69 } -SELECT wordShingleSimHash('foobar', 0); -- { serverError 69 } - -DROP TABLE defaults; diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference similarity index 100% rename from tests/queries/0_stateless/01016_simhash_minhash.reference rename to tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference From fe1df7aabe81cf64722a4087d8f3b6c598c7eeca Mon Sep 17 00:00:00 2001 From: MeenaRenganathan22 Date: Tue, 10 Jan 2023 22:00:53 -0800 Subject: [PATCH 166/262] Updated the reference files --- .../01016_simhash_minhash.reference | 152 ++++++++++++++++++ .../01016_simhash_minhash.x86_64.reference | 91 ++++++----- 2 files changed, 203 insertions(+), 40 deletions(-) create mode 100644 tests/queries/0_stateless/01016_simhash_minhash.reference diff --git a/tests/queries/0_stateless/01016_simhash_minhash.reference b/tests/queries/0_stateless/01016_simhash_minhash.reference new file mode 100644 index 00000000000..9d3ff35efb0 --- /dev/null +++ b/tests/queries/0_stateless/01016_simhash_minhash.reference @@ -0,0 +1,152 @@ +18446744073709551615 +130877626 +130877626 +2414681787 +2414681787 +3795742796 +3795742796 +3795742796 +3795742796 +(10693559443859979498,10693559443859979498) +(12862934800683464900,12912608544812513109) +(12862934800683464900,12912608544812513109) +(5701637312405877447,12912608544812513109) +(5701637312405877447,12912608544812513109) +(17357047205102710216,17357047205102710216) +(17357047205102710216,17357047205102710216) +(17357047205102710216,17357047205102710216) +(17357047205102710216,17357047205102710216) +3562273581 +3579050789 +3562257197 +3562258213 +3579050797 +3579050757 +3562258221 +3562258181 +3004171816 +2584740395 +437257770 +2651981610 +3004171816 +2584740395 +437257770 +2651981610 +(17614245890954671019,12771214424940442770) +(17614245890954671019,12771214424940442770) +(7128473921279637957,12771214424940442770) +(7128473921279637957,12771214424940442770) +(17614245890954671019,12771214424940442770) +(17614245890954671019,12771214424940442770) +(7128473921279637957,12771214424940442770) +(7128473921279637957,12771214424940442770) +(14260447771268573594,5578182242585518316) +(14260447771268573594,16377939020851853906) +(4363920713808688881,5013693163726625177) +(14260447771268573594,3863279269132177973) +(14260447771268573594,5578182242585518316) +(14260447771268573594,16377939020851853906) +(4363920713808688881,5013693163726625177) +(14260447771268573594,3863279269132177973) +uniqExact 6 +ngramSimHash +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 676648743 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1012193063 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2857686823 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567843 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 +ngramSimHashCaseInsensitive +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2891240999 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3908359975 +ngramSimHashUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 2 676648743 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1012193063 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2924795687 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3159676711 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3897874215 +ngramSimHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 2824132391 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2891241255 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 +wordShingleSimHash +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 163730020 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1863866568 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2066765888 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2131775692 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132302028 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2133610504 +wordShingleSimHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 769814628 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1851412545 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1983533133 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2121947213 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132430916 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2134530116 +wordShingleSimHashUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 163730020 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1863866568 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2066765888 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2131775692 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132302028 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2133610504 +wordShingleSimHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 769814628 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1851412545 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1983533133 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2121947213 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132430916 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2134530116 +ngramMinHash +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,17443426065825246292) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) +ngramMinHashCaseInsensitive +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) +ngramMinHashUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,17443426065825246292) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,17443426065825246292) +ngramMinHashCaseInsensitiveUTF8 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,8535005350590298790) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) +wordShingleMinHash +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (2737777099019241270,12203217272515755130) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,5291917846812693075) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,8290914314000593271) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,13404711269494939830) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (9049684948427678934,525844926417235186) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13984163398937596233,5291917846812693075) +wordShingleMinHashCaseInsensitive +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,304181940976393091) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,2742255228205943790) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,4737570281654602452) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (8339553084913780125,304181940976393091) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16416045251850351268,9014309695588044244) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (18035669763176492916,17383752913124421136) +wordShingleMinHashUTF8 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (2737777099019241270,12203217272515755130) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,5291917846812693075) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,8290914314000593271) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,13404711269494939830) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (9049684948427678934,525844926417235186) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13984163398937596233,5291917846812693075) +wordShingleMinHashCaseInsensitiveUTF8 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,304181940976393091) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,2742255228205943790) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,4737570281654602452) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (8339553084913780125,304181940976393091) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16416045251850351268,9014309695588044244) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (18035669763176492916,17383752913124421136) +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None diff --git a/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference index d4fdcfea6a5..9d3ff35efb0 100644 --- a/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference +++ b/tests/queries/0_stateless/01016_simhash_minhash.x86_64.reference @@ -72,33 +72,33 @@ ClickHouse makes full use of all available hardware to process each request as q ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 3092567591 ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 3906262823 wordShingleSimHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215014 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 563598566 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 163730020 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1863866568 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2066765888 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2131775692 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132302028 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2133610504 wordShingleSimHashCaseInsensitive -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 421737795 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 964941252 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 965465540 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 769814628 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1851412545 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1983533133 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2121947213 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132430916 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2134530116 wordShingleSimHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215014 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 404215270 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 425963587 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 563598566 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 857724390 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 991679910 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 163730020 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1863866568 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2066765888 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2131775692 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132302028 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 2133610504 wordShingleSimHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 420713958 -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 421737795 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 429118950 -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 959182215 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 964941252 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 965465540 +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 769814628 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 1851412545 +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 1983533133 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2121947213 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2132430916 +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 2134530116 ngramMinHash ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (4388091710993602029,17613327300639166679) ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (6021986790841777095,17443426065825246292) @@ -120,22 +120,33 @@ ClickHouse makes full use of all available hardware to process each request as q ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (7962672159337006560,8535005350590298790) ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (13225377334870249827,8535005350590298790) wordShingleMinHash -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,12338022931991160906) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (2737777099019241270,12203217272515755130) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,5291917846812693075) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,8290914314000593271) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,13404711269494939830) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (9049684948427678934,525844926417235186) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13984163398937596233,5291917846812693075) wordShingleMinHashCaseInsensitive -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,3381836163833256482) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (15504011608613565061,14581416672396321264) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,304181940976393091) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,2742255228205943790) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,4737570281654602452) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (8339553084913780125,304181940976393091) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16416045251850351268,9014309695588044244) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (18035669763176492916,17383752913124421136) wordShingleMinHashUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,12338022931991160906) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (18148981179837829400,6048943706095721476) -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (18148981179837829400,14581416672396321264) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (2737777099019241270,12203217272515755130) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,5291917846812693075) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,8290914314000593271) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5061606110519186545,13404711269494939830) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (9049684948427678934,525844926417235186) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (13984163398937596233,5291917846812693075) wordShingleMinHashCaseInsensitiveUTF8 -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems.\n:::::::\nClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 3 (5044918525503962090,3381836163833256482) -ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (15504011608613565061,6048943706095721476) -ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (15504011608613565061,14581416672396321264) -ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (16224204290372720939,13975393268888698430) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all your structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,304181940976393091) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (used columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the system\'s read / write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they are immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,2742255228205943790) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns only after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all of your structured data into the system, and it is immediately available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (5915334596853187377,4737570281654602452) +ClickHouse makes full use of all available hardware to process each request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (using columns after decompression only). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid a single point of failure. Downtime for one site or the entire data center will not affect the read / write availability of the system.\nClickHouse is simple and works out of the box. It simplifies all processing of your data: it loads all structured data into the system and immediately becomes available for building reports. The SQL dialect allows you to express the desired result without resorting to any of the non-standard APIs found in some alternative systems. 1 (8339553084913780125,304181940976393091) +ClickHouse makes full use of all available hardware to process every request as quickly as possible. Peak performance for a single query is over 2 terabytes per second (only used columns after unpacking). In a distributed setup, reads are automatically balanced across healthy replicas to avoid increased latency.\nClickHouse supports asynchronous multi-master replication and can be deployed across multiple data centers. All nodes are equal to avoid single points of failure. Downtime for one site or the entire data center will not affect the system\'s read and write availability.\nClickHouse is simple and works out of the box. It simplifies all the processing of your data: it loads all your structured data into the system, and they immediately become available for building reports. The SQL dialect allows you to express the desired result without resorting to any non-standard APIs that can be found in some alternative systems. 1 (16416045251850351268,9014309695588044244) +ClickHouse uses all available hardware to its full potential to process each query as fast as possible. Peak processing performance for a single query stands at more than 2 terabytes per second (after decompression, only used columns). In distributed setup reads are automatically balanced among healthy replicas to avoid increasing latency.\nClickHouse supports multi-master asynchronous replication and can be deployed across multiple datacenters. All nodes are equal, which allows avoiding having single points of failure. Downtime of a single node or the whole datacenter wont affect the systems availability for both reads and writes.\nClickHouse is simple and works out-of-the-box. It streamlines all your data processing: ingest all your structured data into the system and it becomes instantly available for building reports. SQL dialect allows expressing the desired result without involving any custom non-standard API that could be found in some alternative systems. 1 (18035669763176492916,17383752913124421136) +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 9223372036854775807)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be greater then 25: While processing wordShingleSimHash('foobar', 1001)None +code 69: Second argument (shingle size) of function wordShingleSimHash cannot be zero: While processing wordShingleSimHash('foobar', 0)None From af0c3d751fb602a807dc51716c30ff9fbb0d1999 Mon Sep 17 00:00:00 2001 From: taiyang-li <654010905@qq.com> Date: Wed, 11 Jan 2023 14:32:28 +0800 Subject: [PATCH 167/262] fix uts --- .../0_stateless/00921_datetime64_compatibility_long.reference | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference index 8d28a69ff3d..8a168ed0e9e 100644 --- a/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference +++ b/tests/queries/0_stateless/00921_datetime64_compatibility_long.reference @@ -28,7 +28,7 @@ SELECT toDayOfMonth(N, \'Asia/Istanbul\') "UInt8",16 "UInt8",16 ------------------------------------------ -SELECT toDayOfWeek(N, \'Asia/Istanbul\') +SELECT toDayOfWeek(N, 0, \'Asia/Istanbul\') "UInt8",1 "UInt8",1 "UInt8",1 From f871949d8513e712234ee3358ccb176fb99432ae Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Tue, 10 Jan 2023 12:02:33 +0100 Subject: [PATCH 168/262] Try to fix flaky test_create_user_and_login/test.py::test_login_as_dropped_user_xml --- tests/integration/test_create_user_and_login/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 1b59089fa11..372fd549b3f 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -80,7 +80,7 @@ EOF""", ["bash", "-c", "rm /etc/clickhouse-server/users.d/user_c.xml"] ) - expected_errors = ["no user with such name", "not found in user directories"] + expected_errors = ["no user with such name", "not found in user directories", "User has been dropped"] while True: out, err = instance.query_and_get_answer_with_error("SELECT 1", user="C") found_error = [ From 6d6e803cfbfe61fb33b382bdd9f6e006199d6f63 Mon Sep 17 00:00:00 2001 From: robot-clickhouse Date: Tue, 10 Jan 2023 11:09:34 +0000 Subject: [PATCH 169/262] Automatic style fix --- tests/integration/test_create_user_and_login/test.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_create_user_and_login/test.py b/tests/integration/test_create_user_and_login/test.py index 372fd549b3f..b60ec65cb7b 100644 --- a/tests/integration/test_create_user_and_login/test.py +++ b/tests/integration/test_create_user_and_login/test.py @@ -80,7 +80,11 @@ EOF""", ["bash", "-c", "rm /etc/clickhouse-server/users.d/user_c.xml"] ) - expected_errors = ["no user with such name", "not found in user directories", "User has been dropped"] + expected_errors = [ + "no user with such name", + "not found in user directories", + "User has been dropped", + ] while True: out, err = instance.query_and_get_answer_with_error("SELECT 1", user="C") found_error = [ From 82271d6c4bff3b6bcdf09638227a012213d1cbba Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 7 Dec 2022 16:05:06 +0100 Subject: [PATCH 170/262] Analyzer SumIfToCountIfPass crash fix --- src/Analyzer/Passes/SumIfToCountIfPass.cpp | 6 +++--- .../02497_analyzer_sum_if_count_if_pass_crash_fix.reference | 1 + .../02497_analyzer_sum_if_count_if_pass_crash_fix.sql | 4 ++++ 3 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference create mode 100644 tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql diff --git a/src/Analyzer/Passes/SumIfToCountIfPass.cpp b/src/Analyzer/Passes/SumIfToCountIfPass.cpp index 879eb4d4a8d..27717fccd78 100644 --- a/src/Analyzer/Passes/SumIfToCountIfPass.cpp +++ b/src/Analyzer/Passes/SumIfToCountIfPass.cpp @@ -77,7 +77,7 @@ public: if (!nested_function || nested_function->getFunctionName() != "if") return; - auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes(); + const auto & nested_if_function_arguments_nodes = nested_function->getArguments().getNodes(); if (nested_if_function_arguments_nodes.size() != 3) return; @@ -101,7 +101,7 @@ public: /// Rewrite `sum(if(cond, 1, 0))` into `countIf(cond)`. if (if_true_condition_value == 1 && if_false_condition_value == 0) { - function_node_arguments_nodes[0] = std::move(nested_if_function_arguments_nodes[0]); + function_node_arguments_nodes[0] = nested_if_function_arguments_nodes[0]; function_node_arguments_nodes.resize(1); resolveAsCountIfAggregateFunction(*function_node, function_node_arguments_nodes[0]->getResultType()); @@ -120,7 +120,7 @@ public: auto not_function = std::make_shared("not"); auto & not_function_arguments = not_function->getArguments().getNodes(); - not_function_arguments.push_back(std::move(nested_if_function_arguments_nodes[0])); + not_function_arguments.push_back(nested_if_function_arguments_nodes[0]); not_function->resolveAsFunction(FunctionFactory::instance().get("not", context)->build(not_function->getArgumentColumns())); diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference new file mode 100644 index 00000000000..cf534567c6f --- /dev/null +++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.reference @@ -0,0 +1 @@ +50 50 50 1 0 diff --git a/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql new file mode 100644 index 00000000000..51522565014 --- /dev/null +++ b/tests/queries/0_stateless/02497_analyzer_sum_if_count_if_pass_crash_fix.sql @@ -0,0 +1,4 @@ +SET allow_experimental_analyzer = 1; +SET optimize_rewrite_sum_if_to_count_if = 1; + +SELECT sum(if((number % 2) = 0 AS cond_expr, 1 AS one_expr, 0 AS zero_expr) AS if_expr), sum(cond_expr), sum(if_expr), one_expr, zero_expr FROM numbers(100); From eac7a07f3f8c0076a119c27ef2301326024e964a Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 7 Dec 2022 16:15:51 +0100 Subject: [PATCH 171/262] Analyzer AggregateFunctionsArithmeticOperationsPass fix --- ...egateFunctionsArithmericOperationsPass.cpp | 26 +++++++++++-------- ...s_arithmetic_operations_pass_fix.reference | 1 + ...nctions_arithmetic_operations_pass_fix.sql | 14 ++++++++++ 3 files changed, 30 insertions(+), 11 deletions(-) create mode 100644 tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference create mode 100644 tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index 01072e0b3fc..f1566f9639b 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -73,7 +73,7 @@ public: if (!inner_function_node) return; - auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes(); + const auto & inner_function_arguments_nodes = inner_function_node->getArguments().getNodes(); if (inner_function_arguments_nodes.size() != 2) return; @@ -121,11 +121,13 @@ public: } resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name); - auto inner_function = aggregate_function_arguments_nodes[0]; - auto inner_function_right_argument = std::move(inner_function_arguments_nodes[1]); - aggregate_function_arguments_nodes = {inner_function_right_argument}; - inner_function_arguments_nodes[1] = node; - node = std::move(inner_function); + auto inner_function_clone = inner_function_node->clone(); + auto & inner_function_clone_arguments = inner_function_clone->as().getArguments(); + auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes(); + auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1]; + aggregate_function_arguments_nodes = {inner_function_clone_right_argument}; + inner_function_clone_arguments_nodes[1] = node; + node = std::move(inner_function_clone); } else if (right_argument_constant_node) { @@ -138,11 +140,13 @@ public: } resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative); - auto inner_function = aggregate_function_arguments_nodes[0]; - auto inner_function_left_argument = std::move(inner_function_arguments_nodes[0]); - aggregate_function_arguments_nodes = {inner_function_left_argument}; - inner_function_arguments_nodes[0] = node; - node = std::move(inner_function); + auto inner_function_clone = inner_function_node->clone(); + auto & inner_function_clone_arguments = inner_function_clone->as().getArguments(); + auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes(); + auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0]; + aggregate_function_arguments_nodes = {inner_function_clone_left_argument}; + inner_function_clone_arguments_nodes[0] = node; + node = std::move(inner_function_clone); } } diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference new file mode 100644 index 00000000000..4f9430ef608 --- /dev/null +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.reference @@ -0,0 +1 @@ +4 2 diff --git a/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql new file mode 100644 index 00000000000..e3e508e17be --- /dev/null +++ b/tests/queries/0_stateless/02498_analyzer_aggregate_functions_arithmetic_operations_pass_fix.sql @@ -0,0 +1,14 @@ +SET allow_experimental_analyzer = 1; +SET optimize_arithmetic_operations_in_aggregate_functions = 1; + +DROP TABLE IF EXISTS test_table; +CREATE TABLE test_table +( + id UInt64, + value UInt64 +) ENGINE=MergeTree ORDER BY id; + +INSERT INTO test_table VALUES (1, 1); +INSERT INTO test_table VALUES (1, 1); + +SELECT sum((2 * id) as func), func FROM test_table GROUP BY id; From 1420c4b85278737afed41ee8d1cdb50a0b68755e Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 10:44:59 +0100 Subject: [PATCH 172/262] Use logging instead of printing --- tests/ci/get_previous_release_tag.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index 6551ba80ecd..aa84169611c 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -9,6 +9,8 @@ CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" CLICKHOUSE_PACKAGE_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" +logger = logging.getLogger(__name__) + class Version: def __init__(self, version): @@ -61,10 +63,10 @@ def find_previous_release(server_version, releases): ): return True, release else: - print( - "The tag {version}-{type} exists but the package is not yet available on GitHub".format( - version=release.version, type=release.type - ) + logger.debug( + "The tag %s-%s exists but the package is not yet available on GitHub", + release.version, + release.type, ) return False, None From 1dc9fe6f5ae97e8d6be2ca2fab6aebba299143e6 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 10:45:51 +0100 Subject: [PATCH 173/262] Fix the timeout, remove wrong parameters --- tests/ci/get_previous_release_tag.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index aa84169611c..579035bd943 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -54,10 +54,7 @@ def find_previous_release(server_version, releases): CLICKHOUSE_PACKAGE_URL.format( version=release.version, type=release.type ), - total=10, - read=10, - connect=10, - backoff_factor=0.3, + timeout=10, ).status_code != 404 ): From f1947b94def69a99960a13becf10a269f0798327 Mon Sep 17 00:00:00 2001 From: Maksim Kita Date: Wed, 11 Jan 2023 11:10:06 +0100 Subject: [PATCH 174/262] Fixed tests --- .../Passes/AggregateFunctionsArithmericOperationsPass.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp index f1566f9639b..33ecf549363 100644 --- a/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp +++ b/src/Analyzer/Passes/AggregateFunctionsArithmericOperationsPass.cpp @@ -119,13 +119,13 @@ public: { lower_function_name = function_name_if_constant_is_negative; } - resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[1], lower_function_name); auto inner_function_clone = inner_function_node->clone(); auto & inner_function_clone_arguments = inner_function_clone->as().getArguments(); auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes(); auto inner_function_clone_right_argument = inner_function_clone_arguments_nodes[1]; aggregate_function_arguments_nodes = {inner_function_clone_right_argument}; + resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_right_argument, lower_function_name); inner_function_clone_arguments_nodes[1] = node; node = std::move(inner_function_clone); } @@ -138,20 +138,20 @@ public: { lower_function_name = function_name_if_constant_is_negative; } - resolveAggregateFunctionNode(*aggregate_function_node, inner_function_arguments_nodes[0], function_name_if_constant_is_negative); auto inner_function_clone = inner_function_node->clone(); auto & inner_function_clone_arguments = inner_function_clone->as().getArguments(); auto & inner_function_clone_arguments_nodes = inner_function_clone_arguments.getNodes(); auto inner_function_clone_left_argument = inner_function_clone_arguments_nodes[0]; aggregate_function_arguments_nodes = {inner_function_clone_left_argument}; + resolveAggregateFunctionNode(*aggregate_function_node, inner_function_clone_left_argument, function_name_if_constant_is_negative); inner_function_clone_arguments_nodes[0] = node; node = std::move(inner_function_clone); } } private: - static inline void resolveAggregateFunctionNode(FunctionNode & function_node, QueryTreeNodePtr & argument, const String & aggregate_function_name) + static inline void resolveAggregateFunctionNode(FunctionNode & function_node, const QueryTreeNodePtr & argument, const String & aggregate_function_name) { auto function_aggregate_function = function_node.getAggregateFunction(); From 77f0724629c16a2bf425ed7e82b5e2c432335ed3 Mon Sep 17 00:00:00 2001 From: Yatsishin Ilya <2159081+qoega@users.noreply.github.com> Date: Wed, 11 Jan 2023 10:28:30 +0000 Subject: [PATCH 175/262] Fix flaky test_tcp_handler_interserver_listen_host --- .../test_tcp_handler_interserver_listen_host/test_case.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py index 44df1c369cf..e792d0867f6 100644 --- a/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py +++ b/tests/integration/test_tcp_handler_interserver_listen_host/test_case.py @@ -29,6 +29,12 @@ node_without_interserver_listen_host = cluster.add_instance( def start_cluster(): try: cluster.start() + cluster.wait_for_url( + f"http://{INTERSERVER_LISTEN_HOST}:{INTERSERVER_HTTP_PORT}" + ) + cluster.wait_for_url( + f"http://{node_without_interserver_listen_host.ip_address}:8123" + ) yield cluster finally: From a881a61e748fe7f728817d26a88bbe146520125c Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 23 Dec 2022 11:30:22 +0000 Subject: [PATCH 176/262] Set pipeline type in join step description --- src/Planner/PlannerJoinTree.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 3584c9d4caa..a0e8b9c5f7a 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -586,6 +586,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, size_t max_block_size = query_context->getSettingsRef().max_block_size; size_t max_streams = query_context->getSettingsRef().max_threads; + JoinPipelineType join_pipeline_type = join_algorithm->pipelineType(); auto join_step = std::make_unique( left_plan.getCurrentDataStream(), right_plan.getCurrentDataStream(), @@ -594,7 +595,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, max_streams, false /*optimize_read_in_order*/); - join_step->setStepDescription(fmt::format("JOIN {}", JoinPipelineType::FillRightFirst)); + join_step->setStepDescription(fmt::format("JOIN {}", join_pipeline_type)); std::vector plans; plans.emplace_back(std::make_unique(std::move(left_plan))); From 0b86deb58527c347482892d91ead2a7041c60c92 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 23 Dec 2022 19:33:46 +0000 Subject: [PATCH 177/262] [wip] drop unused columns after join on/using --- src/Planner/Planner.cpp | 5 ++++- src/Planner/PlannerJoinTree.cpp | 21 ++++++++++++++++++--- src/Planner/PlannerJoinTree.h | 3 +++ 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index a0e8c4687c6..fc8dafd1b49 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -64,6 +64,7 @@ #include #include #include +#include namespace DB { @@ -374,7 +375,9 @@ void Planner::buildQueryPlanIfNeeded() collectSets(query_tree, *planner_context); - query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, planner_context); + auto top_level_identifiers = collectUsedIdentifiers(query_tree, planner_context); + + query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, top_level_identifiers, planner_context); auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context); if (expression_analysis_result.hasWhere()) diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index a0e8b9c5f7a..5f1b27bb1cc 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -207,19 +208,25 @@ QueryPlan buildQueryPlanForTableExpression(QueryTreeNodePtr table_expression, QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { auto & join_node = join_tree_node->as(); + ColumnIdentifierSet current_scope_columns = outer_scope_columns; + collectUsedIdentifiers(join_tree_node, planner_context, current_scope_columns); + auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(), select_query_info, select_query_options, + current_scope_columns, planner_context); auto left_plan_output_columns = left_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); auto right_plan = buildQueryPlanForJoinTreeNode(join_node.getRightTableExpression(), select_query_info, select_query_options, + current_scope_columns, planner_context); auto right_plan_output_columns = right_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); @@ -610,8 +617,13 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, for (auto & output : drop_unused_columns_after_join_actions_dag->getOutputs()) { - if (updated_outputs_names.contains(output->result_name) || !planner_context->getGlobalPlannerContext()->hasColumnIdentifier(output->result_name)) + const auto & global_planner_context = planner_context->getGlobalPlannerContext(); + if (updated_outputs_names.contains(output->result_name) + || !global_planner_context->hasColumnIdentifier(output->result_name) + || !outer_scope_columns.contains(output->result_name)) + { continue; + } updated_outputs.push_back(output); updated_outputs_names.insert(output->result_name); @@ -629,6 +641,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { auto & array_join_node = table_expression->as(); @@ -636,6 +649,7 @@ QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, auto plan = buildQueryPlanForJoinTreeNode(array_join_node.getTableExpression(), select_query_info, select_query_options, + outer_scope_columns, planner_context); auto plan_output_columns = plan.getCurrentDataStream().header.getColumnsWithTypeAndName(); @@ -675,6 +689,7 @@ QueryPlan buildQueryPlanForArrayJoinNode(QueryTreeNodePtr table_expression, QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context) { auto join_tree_node_type = join_tree_node->getNodeType(); @@ -693,11 +708,11 @@ QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, } case QueryTreeNodeType::JOIN: { - return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + return buildQueryPlanForJoinNode(join_tree_node, select_query_info, select_query_options, outer_scope_columns, planner_context); } case QueryTreeNodeType::ARRAY_JOIN: { - return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, planner_context); + return buildQueryPlanForArrayJoinNode(join_tree_node, select_query_info, select_query_options, outer_scope_columns, planner_context); } default: { diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index c93b71e0df1..9d83bf62fc1 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -11,10 +11,13 @@ namespace DB { +using ColumnIdentifierSet = std::unordered_set; + /// Build query plan for query JOIN TREE node QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, SelectQueryInfo & select_query_info, const SelectQueryOptions & select_query_options, + const ColumnIdentifierSet & outer_scope_columns, PlannerContextPtr & planner_context); } From f3702e9279b82409af62be9df20ab8f62e638b73 Mon Sep 17 00:00:00 2001 From: vdimir Date: Fri, 23 Dec 2022 19:34:56 +0000 Subject: [PATCH 178/262] add missing src/Planner/CollectUsedIndetifiers --- src/Planner/CollectUsedIndetifiers.cpp | 66 ++++++++++++++++++++++++++ src/Planner/CollectUsedIndetifiers.h | 17 +++++++ 2 files changed, 83 insertions(+) create mode 100644 src/Planner/CollectUsedIndetifiers.cpp create mode 100644 src/Planner/CollectUsedIndetifiers.h diff --git a/src/Planner/CollectUsedIndetifiers.cpp b/src/Planner/CollectUsedIndetifiers.cpp new file mode 100644 index 00000000000..f475bc586f3 --- /dev/null +++ b/src/Planner/CollectUsedIndetifiers.cpp @@ -0,0 +1,66 @@ +#include + +#include +#include + +#include + +namespace DB +{ + +namespace +{ + +class CollectUsedIdentifiersVisitor : public InDepthQueryTreeVisitor +{ +public: + + explicit CollectUsedIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_) + : used_identifiers(used_identifiers_) + , planner_context(planner_context_) + {} + + bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]]) + { + const auto & node_type = child->getNodeType(); + return node_type != QueryTreeNodeType::TABLE + && node_type != QueryTreeNodeType::TABLE_FUNCTION + && node_type != QueryTreeNodeType::QUERY + && node_type != QueryTreeNodeType::UNION + && node_type != QueryTreeNodeType::JOIN + && node_type != QueryTreeNodeType::ARRAY_JOIN; + } + + void visitImpl(const QueryTreeNodePtr & node) + { + if (node->getNodeType() != QueryTreeNodeType::COLUMN) + return; + + const auto * column_ident = planner_context->getColumnNodeIdentifierOrNull(node); + if (!column_ident) + return; + + used_identifiers.insert(*column_ident); + } + + ColumnIdentifierSet & used_identifiers; + const PlannerContextPtr & planner_context; +}; + +} + +void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out) +{ + CollectUsedIdentifiersVisitor visitor(planner_context, out); + visitor.visit(node); +} + +ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context) +{ + ColumnIdentifierSet out; + collectUsedIdentifiers(node, planner_context, out); + return out; +} + +} + diff --git a/src/Planner/CollectUsedIndetifiers.h b/src/Planner/CollectUsedIndetifiers.h new file mode 100644 index 00000000000..06c50d41e59 --- /dev/null +++ b/src/Planner/CollectUsedIndetifiers.h @@ -0,0 +1,17 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +using ColumnIdentifierSet = std::unordered_set; + +ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context); +void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out); + + +} + From 6c5b4458cb000f0411cdf5ab617edc17b82f88a8 Mon Sep 17 00:00:00 2001 From: vdimir Date: Tue, 27 Dec 2022 11:08:56 +0000 Subject: [PATCH 179/262] Add test, comments, rename --- ...fiers.cpp => CollectColumnIndetifiers.cpp} | 14 +- src/Planner/CollectColumnIndetifiers.h | 22 +++ src/Planner/CollectUsedIndetifiers.h | 17 --- src/Planner/Planner.cpp | 4 +- src/Planner/PlannerJoinTree.cpp | 4 +- .../02514_analyzer_drop_join_on.reference | 141 ++++++++++++++++++ .../02514_analyzer_drop_join_on.sql | 46 ++++++ 7 files changed, 220 insertions(+), 28 deletions(-) rename src/Planner/{CollectUsedIndetifiers.cpp => CollectColumnIndetifiers.cpp} (64%) create mode 100644 src/Planner/CollectColumnIndetifiers.h delete mode 100644 src/Planner/CollectUsedIndetifiers.h create mode 100644 tests/queries/0_stateless/02514_analyzer_drop_join_on.reference create mode 100644 tests/queries/0_stateless/02514_analyzer_drop_join_on.sql diff --git a/src/Planner/CollectUsedIndetifiers.cpp b/src/Planner/CollectColumnIndetifiers.cpp similarity index 64% rename from src/Planner/CollectUsedIndetifiers.cpp rename to src/Planner/CollectColumnIndetifiers.cpp index f475bc586f3..13a53067a00 100644 --- a/src/Planner/CollectUsedIndetifiers.cpp +++ b/src/Planner/CollectColumnIndetifiers.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -11,11 +11,11 @@ namespace DB namespace { -class CollectUsedIdentifiersVisitor : public InDepthQueryTreeVisitor +class CollectTopLevelColumnIdentifiersVisitor : public InDepthQueryTreeVisitor { public: - explicit CollectUsedIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_) + explicit CollectTopLevelColumnIdentifiersVisitor(const PlannerContextPtr & planner_context_, ColumnIdentifierSet & used_identifiers_) : used_identifiers(used_identifiers_) , planner_context(planner_context_) {} @@ -49,16 +49,16 @@ public: } -void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out) +void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out) { - CollectUsedIdentifiersVisitor visitor(planner_context, out); + CollectTopLevelColumnIdentifiersVisitor visitor(planner_context, out); visitor.visit(node); } -ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context) +ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context) { ColumnIdentifierSet out; - collectUsedIdentifiers(node, planner_context, out); + collectTopLevelColumnIdentifiers(node, planner_context, out); return out; } diff --git a/src/Planner/CollectColumnIndetifiers.h b/src/Planner/CollectColumnIndetifiers.h new file mode 100644 index 00000000000..8c84908ee6b --- /dev/null +++ b/src/Planner/CollectColumnIndetifiers.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +using ColumnIdentifierSet = std::unordered_set; + +/// Collect all top level column identifiers from query tree node. +/// Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree. +/// For example, in the following query: +/// SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y +/// The top level column identifiers are: `t1.b`, `t2.y` +ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context); + +void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out); + +} + diff --git a/src/Planner/CollectUsedIndetifiers.h b/src/Planner/CollectUsedIndetifiers.h deleted file mode 100644 index 06c50d41e59..00000000000 --- a/src/Planner/CollectUsedIndetifiers.h +++ /dev/null @@ -1,17 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace DB -{ - -using ColumnIdentifierSet = std::unordered_set; - -ColumnIdentifierSet collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context); -void collectUsedIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out); - - -} - diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index fc8dafd1b49..ea14d29bd6f 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -64,7 +64,7 @@ #include #include #include -#include +#include namespace DB { @@ -375,7 +375,7 @@ void Planner::buildQueryPlanIfNeeded() collectSets(query_tree, *planner_context); - auto top_level_identifiers = collectUsedIdentifiers(query_tree, planner_context); + auto top_level_identifiers = collectTopLevelColumnIdentifiers(query_tree, planner_context); query_plan = buildQueryPlanForJoinTreeNode(query_node.getJoinTree(), select_query_info, select_query_options, top_level_identifiers, planner_context); auto expression_analysis_result = buildExpressionAnalysisResult(query_tree, query_plan.getCurrentDataStream().header.getColumnsWithTypeAndName(), planner_context); diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 5f1b27bb1cc..37c542d0494 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include @@ -214,7 +214,7 @@ QueryPlan buildQueryPlanForJoinNode(QueryTreeNodePtr join_tree_node, auto & join_node = join_tree_node->as(); ColumnIdentifierSet current_scope_columns = outer_scope_columns; - collectUsedIdentifiers(join_tree_node, planner_context, current_scope_columns); + collectTopLevelColumnIdentifiers(join_tree_node, planner_context, current_scope_columns); auto left_plan = buildQueryPlanForJoinTreeNode(join_node.getLeftTableExpression(), select_query_info, diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference new file mode 100644 index 00000000000..100b2fc42bf --- /dev/null +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -0,0 +1,141 @@ +Expression ((Project names + Projection)) +Header: count() UInt64 + Aggregating + Header: default.a.a2_4 String + count() UInt64 + Expression ((Before GROUP BY + DROP unused columns after JOIN)) + Header: default.a.a2_4 String + Join (JOIN FillRightFirst) + Header: default.a.a2_4 String + default.c.c1_2 UInt64 + default.d.d1_3 UInt64 + Expression ((JOIN actions + DROP unused columns after JOIN)) + Header: default.a.a2_4 String + default.c.c1_2 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_4 String + default.b.b1_0 UInt64 + default.c.c1_2 UInt64 + Expression ((JOIN actions + DROP unused columns after JOIN)) + Header: default.a.a2_4 String + default.b.b1_0 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_4 String + default.a.a1_1 UInt64 + default.b.b1_0 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.a.a2_4 String + default.a.a1_1 UInt64 + ReadFromStorage (Memory) + Header: a2 String + a1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.b.b1_0 UInt64 + ReadFromStorage (Memory) + Header: b1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.c.c1_2 UInt64 + ReadFromStorage (Memory) + Header: c1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.d.d1_3 UInt64 + ReadFromStorage (Memory) + Header: d1 UInt64 +Expression ((Project names + (Projection + DROP unused columns after JOIN))) +Header: a2 String + d2 String + Join (JOIN FillRightFirst) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + default.d.d2_1 String + default.d.k_5 UInt64 + Expression (DROP unused columns after JOIN) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + Join (JOIN FillRightFirst) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + default.c.k_4 UInt64 + Expression (DROP unused columns after JOIN) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + Join (JOIN FillRightFirst) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + default.b.k_3 UInt64 + Expression (Change column names to column identifiers) + Header: default.a.k_2 UInt64 + default.a.a2_0 String + ReadFromStorage (Memory) + Header: k UInt64 + a2 String + Expression (Change column names to column identifiers) + Header: default.b.k_3 UInt64 + ReadFromStorage (Memory) + Header: k UInt64 + Expression (Change column names to column identifiers) + Header: default.c.k_4 UInt64 + ReadFromStorage (Memory) + Header: k UInt64 + Expression (Change column names to column identifiers) + Header: default.d.k_5 UInt64 + default.d.d2_1 String + ReadFromStorage (Memory) + Header: k UInt64 + d2 String +Expression (Project names) +Header: bx String + Sorting (Sorting for ORDER BY) + Header: default.a.a2_6 String + b.bx_0 String + Expression ((Before ORDER BY + (Projection + ))) + Header: default.a.a2_6 String + b.bx_0 String + Join (JOIN FillRightFirst) + Header: default.a.a2_6 String + b.bx_0 String + default.c.c2_5 String + default.c.c1_3 UInt64 + d.d1_4 UInt64 + Filter (( + (JOIN actions + DROP unused columns after JOIN))) + Header: default.a.a2_6 String + b.bx_0 String + default.c.c2_5 String + default.c.c1_3 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_6 String + b.bx_0 String + b.b1_1 UInt64 + default.c.c2_5 String + default.c.c1_3 UInt64 + Expression ((JOIN actions + DROP unused columns after JOIN)) + Header: default.a.a2_6 String + b.bx_0 String + b.b1_1 UInt64 + Join (JOIN FillRightFirst) + Header: default.a.a2_6 String + default.a.a1_2 UInt64 + b.bx_0 String + b.b1_1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.a.a2_6 String + default.a.a1_2 UInt64 + ReadFromStorage (Memory) + Header: a2 String + a1 UInt64 + Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Header: b.b1_1 UInt64 + b.bx_0 String + ReadFromStorage (Memory) + Header: b2 String + b1 UInt64 + Expression ((JOIN actions + Change column names to column identifiers)) + Header: default.c.c2_5 String + default.c.c1_3 UInt64 + ReadFromStorage (Memory) + Header: c2 String + c1 UInt64 + Expression ((JOIN actions + (Change column names to column identifiers + (Project names + (Projection + Change column names to column identifiers))))) + Header: d.d1_4 UInt64 + ReadFromStorage (SystemNumbers) + Header: number UInt64 diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql new file mode 100644 index 00000000000..576e68c2289 --- /dev/null +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql @@ -0,0 +1,46 @@ +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS c; +DROP TABLE IF EXISTS d; + +CREATE TABLE a (k UInt64, a1 UInt64, a2 String) ENGINE = Memory; +INSERT INTO a VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +CREATE TABLE b (k UInt64, b1 UInt64, b2 String) ENGINE = Memory; +INSERT INTO b VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +CREATE TABLE c (k UInt64, c1 UInt64, c2 String) ENGINE = Memory; +INSERT INTO c VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +CREATE TABLE d (k UInt64, d1 UInt64, d2 String) ENGINE = Memory; +INSERT INTO d VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); + +SET allow_experimental_analyzer = 1; + +EXPLAIN PLAN header = 1 +SELECT count() +FROM a +JOIN b ON b.b1 = a.a1 +JOIN c ON c.c1 = b.b1 +JOIN d ON d.d1 = c.c1 +GROUP BY a.a2 +; + +EXPLAIN PLAN header = 1 +SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) +; + +EXPLAIN PLAN header = 1 +SELECT b.bx +FROM a +JOIN (SELECT b1, b2 || 'x' AS bx FROM b ) AS b ON b.b1 = a.a1 +JOIN c ON c.c1 = b.b1 +JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1 +WHERE c.c2 != '' +ORDER BY a.a2 +; + +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS c; +DROP TABLE IF EXISTS d; From eed2a295245b5b8229815a8f606609cbee322a81 Mon Sep 17 00:00:00 2001 From: Vladimir C Date: Thu, 29 Dec 2022 11:21:29 +0100 Subject: [PATCH 180/262] Fix style --- src/Planner/CollectColumnIndetifiers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Planner/CollectColumnIndetifiers.cpp b/src/Planner/CollectColumnIndetifiers.cpp index 13a53067a00..50e89658f9d 100644 --- a/src/Planner/CollectColumnIndetifiers.cpp +++ b/src/Planner/CollectColumnIndetifiers.cpp @@ -20,7 +20,7 @@ public: , planner_context(planner_context_) {} - bool needChildVisit(VisitQueryTreeNodeType & parent [[maybe_unused]], VisitQueryTreeNodeType & child [[maybe_unused]]) + static bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType & child) { const auto & node_type = child->getNodeType(); return node_type != QueryTreeNodeType::TABLE From 4dd628cd86577f7f0640fd5b50d0f9040fad1f88 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 12:05:31 +0100 Subject: [PATCH 181/262] Clean trash from changelog for v22.3.16.1190-lts --- docs/changelogs/v22.3.16.1190-lts.md | 159 --------------------------- 1 file changed, 159 deletions(-) diff --git a/docs/changelogs/v22.3.16.1190-lts.md b/docs/changelogs/v22.3.16.1190-lts.md index 1b22d9a88be..a43d34551ca 100644 --- a/docs/changelogs/v22.3.16.1190-lts.md +++ b/docs/changelogs/v22.3.16.1190-lts.md @@ -7,186 +7,27 @@ sidebar_label: 2023 ### ClickHouse release v22.3.16.1190-lts (bb4e0934e5a) FIXME as compared to v22.10.1.1877-stable (98ab5a3c189) -#### Backward Incompatible Change -* JSONExtract family of functions will now attempt to coerce to the request type. [#41502](https://github.com/ClickHouse/ClickHouse/pull/41502) ([Márcio Martins](https://github.com/marcioapm)). -* Backported in [#43484](https://github.com/ClickHouse/ClickHouse/issues/43484): Fixed backward incompatibility in (de)serialization of states of `min`, `max`, `any*`, `argMin`, `argMax` aggregate functions with `String` argument. The incompatibility was introduced in https://github.com/ClickHouse/ClickHouse/pull/41431 and affects 22.9, 22.10 and 22.11 branches (fixed since 22.9.6, 22.10.4 and 22.11.2 correspondingly). Some minor releases of 22.3, 22.7 and 22.8 branches are also affected: 22.3.13...22.3.14 (fixed since 22.3.15), 22.8.6...22.8.9 (fixed since 22.8.10), 22.7.6 and newer (will not be fixed in 22.7, we recommend to upgrade from 22.7.* to 22.8.10 or newer). This release note does not concern users that have never used affected versions. Incompatible versions append extra `'\0'` to strings when reading states of the aggregate functions mentioned above. For example, if an older version saved state of `anyState('foobar')` to `state_column` then incompatible version will print `'foobar\0'` on `anyMerge(state_column)`. Also incompatible versions write states of the aggregate functions without trailing `'\0'`. Newer versions (that have the fix) can correctly read data written by all versions including incompatible versions, except one corner case. If an incompatible version saved a state with a string that actually ends with null character, then newer version will trim trailing `'\0'` when reading state of affected aggregate function. For example, if an incompatible version saved state of `anyState('abrac\0dabra\0')` to `state_column` then newer versions will print `'abrac\0dabra'` on `anyMerge(state_column)`. The issue also affects distributed queries when an incompatible version works in a cluster together with older or newer versions. [#43038](https://github.com/ClickHouse/ClickHouse/pull/43038) ([Raúl Marín](https://github.com/Algunenano)). - -#### New Feature -* - Add function `displayName`, closes [#36770](https://github.com/ClickHouse/ClickHouse/issues/36770). [#37681](https://github.com/ClickHouse/ClickHouse/pull/37681) ([hongbin](https://github.com/xlwh)). -* Add Hudi and DeltaLake table engines, read-only, only for tables on S3. [#41054](https://github.com/ClickHouse/ClickHouse/pull/41054) ([Daniil Rubin](https://github.com/rubin-do)). -* Add 4LW command `csnp` for manually creating snapshots. Additionally, `lgif` was added to get Raft information for a specific node (e.g. index of last created snapshot, last committed log index). [#41766](https://github.com/ClickHouse/ClickHouse/pull/41766) ([JackyWoo](https://github.com/JackyWoo)). -* Add function ascii like in spark: https://spark.apache.org/docs/latest/api/sql/#ascii. [#42670](https://github.com/ClickHouse/ClickHouse/pull/42670) ([李扬](https://github.com/taiyang-li)). -* Published function `formatReadableDecimalSize`. [#42774](https://github.com/ClickHouse/ClickHouse/pull/42774) ([Alejandro](https://github.com/alexon1234)). - -#### Performance Improvement -* Currently, the only saturable operators are And and Or, and their code paths are affected by this change. [#42214](https://github.com/ClickHouse/ClickHouse/pull/42214) ([Zhiguo Zhou](https://github.com/ZhiguoZh)). -* `match` function can use the index if it's a condition on string prefix. This closes [#37333](https://github.com/ClickHouse/ClickHouse/issues/37333). [#42458](https://github.com/ClickHouse/ClickHouse/pull/42458) ([clarkcaoliu](https://github.com/Clark0)). -* Support parallel parsing for LineAsString input format. This improves performance just slightly. This closes [#42502](https://github.com/ClickHouse/ClickHouse/issues/42502). [#42780](https://github.com/ClickHouse/ClickHouse/pull/42780) ([Kruglov Pavel](https://github.com/Avogar)). -* Keeper performance improvement: improve commit performance for cases when many different nodes have uncommitted states. This should help with cases when a follower node can't sync fast enough. [#42926](https://github.com/ClickHouse/ClickHouse/pull/42926) ([Antonio Andelic](https://github.com/antonio2368)). - #### Improvement -* Support type `Object` inside other types, e.g. `Array(JSON)`. [#36969](https://github.com/ClickHouse/ClickHouse/pull/36969) ([Anton Popov](https://github.com/CurtizJ)). * Backported in [#42527](https://github.com/ClickHouse/ClickHouse/issues/42527): Fix issue with passing MySQL timeouts for MySQL database engine and MySQL table function. Closes [#34168](https://github.com/ClickHouse/ClickHouse/issues/34168)?notification_referrer_id=NT_kwDOAzsV57MzMDMxNjAzNTY5OjU0MjAzODc5. [#40751](https://github.com/ClickHouse/ClickHouse/pull/40751) ([Kseniia Sumarokova](https://github.com/kssenii)). -* ClickHouse Client and ClickHouse Local will show progress by default even in non-interactive mode. If `/dev/tty` is available, the progress will be rendered directly to the terminal, without writing to stderr. It allows to get progress even if stderr is redirected to a file, and the file will not be polluted by terminal escape sequences. The progress can be disabled by `--progress false`. This closes [#32238](https://github.com/ClickHouse/ClickHouse/issues/32238). [#42003](https://github.com/ClickHouse/ClickHouse/pull/42003) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* 1. Add, subtract and negate operations are now available on Intervals. In case when the types of Intervals are different they will be transformed into the Tuple of those types. 2. A tuple of intervals can be added to or subtracted from a Date/DateTime field. 3. Added parsing of Intervals with different types, for example: `INTERVAL '1 HOUR 1 MINUTE 1 SECOND'`. [#42195](https://github.com/ClickHouse/ClickHouse/pull/42195) ([Nikolay Degterinsky](https://github.com/evillique)). -* - Add `notLike` to key condition atom map, so condition like `NOT LIKE 'prefix%'` can use primary index. [#42209](https://github.com/ClickHouse/ClickHouse/pull/42209) ([Duc Canh Le](https://github.com/canhld94)). -* Add support for FixedString input to base64 coding functions. [#42285](https://github.com/ClickHouse/ClickHouse/pull/42285) ([ltrk2](https://github.com/ltrk2)). -* Add columns `bytes_on_disk` and `path` to `system.detached_parts`. Closes [#42264](https://github.com/ClickHouse/ClickHouse/issues/42264). [#42303](https://github.com/ClickHouse/ClickHouse/pull/42303) ([chen](https://github.com/xiedeyantu)). -* Added ** glob support for recursive directory traversal to filesystem and S3. resolves [#36316](https://github.com/ClickHouse/ClickHouse/issues/36316). [#42376](https://github.com/ClickHouse/ClickHouse/pull/42376) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Mask passwords and secret keys both in `system.query_log` and `/var/log/clickhouse-server/*.log` and also in error messages. [#42484](https://github.com/ClickHouse/ClickHouse/pull/42484) ([Vitaly Baranov](https://github.com/vitlibar)). -* Add a new variable call `limit` in query_info, indicating whether this query is a limit-trivial query. If so, we will adjust the approximate total rows for later estimation. Closes [#7071](https://github.com/ClickHouse/ClickHouse/issues/7071). [#42580](https://github.com/ClickHouse/ClickHouse/pull/42580) ([Han Fei](https://github.com/hanfei1991)). -* Implement `ATTACH` of `MergeTree` table for `s3_plain` disk (plus some fixes for `s3_plain`). [#42628](https://github.com/ClickHouse/ClickHouse/pull/42628) ([Azat Khuzhin](https://github.com/azat)). -* Fix no progress indication on INSERT FROM INFILE. Closes [#42548](https://github.com/ClickHouse/ClickHouse/issues/42548). [#42634](https://github.com/ClickHouse/ClickHouse/pull/42634) ([chen](https://github.com/xiedeyantu)). -* Add `min_age_to_force_merge_on_partition_only` setting to optimize old parts for the entire partition only. [#42659](https://github.com/ClickHouse/ClickHouse/pull/42659) ([Antonio Andelic](https://github.com/antonio2368)). -* Throttling algorithm changed to token bucket. [#42665](https://github.com/ClickHouse/ClickHouse/pull/42665) ([Sergei Trifonov](https://github.com/serxa)). -* Added new field allow_readonly in system.table_functions to allow using table functions in readonly mode resolves [#42414](https://github.com/ClickHouse/ClickHouse/issues/42414) Implementation: * Added a new field allow_readonly to table system.table_functions. * Updated to use new field allow_readonly to allow using table functions in readonly mode. Testing: * Added a test for filesystem tests/queries/0_stateless/02473_functions_in_readonly_mode.sh Documentation: * Updated the english documentation for Table Functions. [#42708](https://github.com/ClickHouse/ClickHouse/pull/42708) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). -* Allow to use Date32 arguments for formatDateTime and FROM_UNIXTIME functions. [#42737](https://github.com/ClickHouse/ClickHouse/pull/42737) ([Roman Vasin](https://github.com/rvasin)). -* Backported in [#42839](https://github.com/ClickHouse/ClickHouse/issues/42839): Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Update tzdata to 2022f. Mexico will no longer observe DST except near the US border: https://www.timeanddate.com/news/time/mexico-abolishes-dst-2022.html. Chihuahua moves to year-round UTC-6 on 2022-10-30. Fiji no longer observes DST. See https://github.com/google/cctz/pull/235 and https://bugs.launchpad.net/ubuntu/+source/tzdata/+bug/1995209. [#42796](https://github.com/ClickHouse/ClickHouse/pull/42796) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Add `FailedAsyncInsertQuery` event metric for async inserts. [#42814](https://github.com/ClickHouse/ClickHouse/pull/42814) ([Krzysztof Góralski](https://github.com/kgoralski)). -* Increase the size of upload part exponentially for backup to S3. [#42833](https://github.com/ClickHouse/ClickHouse/pull/42833) ([Vitaly Baranov](https://github.com/vitlibar)). #### Bug Fix * Backported in [#43829](https://github.com/ClickHouse/ClickHouse/issues/43829): Updated normaliser to clone the alias ast. resolves [#42452](https://github.com/ClickHouse/ClickHouse/issues/42452) Implementation: * Updated QueryNormalizer to clone alias ast, when its replaced. Previously just assigning the same leads to exception in LogicalExpressinsOptimizer as it would be the same parent being inserted again. * This bug is not seen with new analyser (allow_experimental_analyzer), so no changes for it. I added a test for the same. [#42827](https://github.com/ClickHouse/ClickHouse/pull/42827) ([SmitaRKulkarni](https://github.com/SmitaRKulkarni)). #### Build/Testing/Packaging Improvement -* Run SQLancer for each pull request and commit to master. [SQLancer](https://github.com/sqlancer/sqlancer) is an OpenSource fuzzer that focuses on automatic detection of logical bugs. [#42397](https://github.com/ClickHouse/ClickHouse/pull/42397) ([Ilya Yatsishin](https://github.com/qoega)). -* Update to latest zlib-ng. [#42463](https://github.com/ClickHouse/ClickHouse/pull/42463) ([Boris Kuschel](https://github.com/bkuschel)). -* use llvm `l64.lld` in macOS suppress ld warnings, close [#42282](https://github.com/ClickHouse/ClickHouse/issues/42282). [#42470](https://github.com/ClickHouse/ClickHouse/pull/42470) ([Lloyd-Pottiger](https://github.com/Lloyd-Pottiger)). -* * Improve bugfix validation check: fix bug with skipping the check, port separate status in CI, run after check labels and style check. Close [#40349](https://github.com/ClickHouse/ClickHouse/issues/40349). [#42702](https://github.com/ClickHouse/ClickHouse/pull/42702) ([Vladimir C](https://github.com/vdimir)). -* Backported in [#43050](https://github.com/ClickHouse/ClickHouse/issues/43050): Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Wait for all files are in sync before archiving them in integration tests. [#42891](https://github.com/ClickHouse/ClickHouse/pull/42891) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Use https://github.com/matus-chochlik/ctcache for clang-tidy results caching. [#42913](https://github.com/ClickHouse/ClickHouse/pull/42913) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Backported in [#42963](https://github.com/ClickHouse/ClickHouse/issues/42963): Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Before the fix, the user-defined config was preserved by RPM in `$file.rpmsave`. The PR fixes it and won't replace the user's files from packages. [#42936](https://github.com/ClickHouse/ClickHouse/pull/42936) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Backported in [#43039](https://github.com/ClickHouse/ClickHouse/issues/43039): Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add a CI step to mark commits as ready for release; soft-forbid launching a release script from branches but master. [#43017](https://github.com/ClickHouse/ClickHouse/pull/43017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44109](https://github.com/ClickHouse/ClickHouse/issues/44109): Bring sha512 sums back to the building step. [#44017](https://github.com/ClickHouse/ClickHouse/pull/44017) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44431](https://github.com/ClickHouse/ClickHouse/issues/44431): Kill stress tests after 2.5h in case of hanging process. [#44214](https://github.com/ClickHouse/ClickHouse/pull/44214) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). * Backported in [#44557](https://github.com/ClickHouse/ClickHouse/issues/44557): Retry the integration tests on compressing errors. [#44529](https://github.com/ClickHouse/ClickHouse/pull/44529) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). #### Bug Fix (user-visible misbehavior in official stable or prestable release) -* Fix schema inference in s3Cluster and improve in hdfsCluster. [#41979](https://github.com/ClickHouse/ClickHouse/pull/41979) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix retries while reading from http table engines / table function. (retrtiable errors could be retries more times than needed, non-retrialble errors resulted in failed assertion in code). [#42224](https://github.com/ClickHouse/ClickHouse/pull/42224) ([Kseniia Sumarokova](https://github.com/kssenii)). -* A segmentation fault related to DNS & c-ares has been reported. The below error ocurred in multiple threads: ``` 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008088 [ 356 ] {} BaseDaemon: ######################################## 2022-09-28 15:41:19.008,"2022.09.28 15:41:19.008147 [ 356 ] {} BaseDaemon: (version 22.8.5.29 (official build), build id: 92504ACA0B8E2267) (from thread 353) (no query) Received signal Segmentation fault (11)" 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008196 [ 356 ] {} BaseDaemon: Address: 0xf Access: write. Address not mapped to object. 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008216 [ 356 ] {} BaseDaemon: Stack trace: 0x188f8212 0x1626851b 0x1626a69e 0x16269b3f 0x16267eab 0x13cf8284 0x13d24afc 0x13c5217e 0x14ec2495 0x15ba440f 0x15b9d13b 0x15bb2699 0x1891ccb3 0x1891e00d 0x18ae0769 0x18ade022 0x7f76aa985609 0x7f76aa8aa133 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008274 [ 356 ] {} BaseDaemon: 2. Poco::Net::IPAddress::family() const @ 0x188f8212 in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008297 [ 356 ] {} BaseDaemon: 3. ? @ 0x1626851b in /usr/bin/clickhouse 2022-09-28 15:41:19.008,2022.09.28 15:41:19.008309 [ 356 ] {} BaseDaemon: 4. ? @ 0x1626a69e in /usr/bin/clickhouse ```. [#42234](https://github.com/ClickHouse/ClickHouse/pull/42234) ([Arthur Passos](https://github.com/arthurpassos)). -* Fix `LOGICAL_ERROR` `Arguments of 'plus' have incorrect data types` which may happen in PK analysis (monotonicity check). Fix invalid PK analysis for monotonic binary functions with first constant argument. [#42410](https://github.com/ClickHouse/ClickHouse/pull/42410) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix incorrect key analysis when key types cannot be inside Nullable. This fixes [#42456](https://github.com/ClickHouse/ClickHouse/issues/42456). [#42469](https://github.com/ClickHouse/ClickHouse/pull/42469) ([Amos Bird](https://github.com/amosbird)). -* Fix typo in setting name that led to bad usage of schema inference cache while using setting `input_format_csv_use_best_effort_in_schema_inference`. Closes [#41735](https://github.com/ClickHouse/ClickHouse/issues/41735). [#42536](https://github.com/ClickHouse/ClickHouse/pull/42536) ([Kruglov Pavel](https://github.com/Avogar)). -* Fix create Set with wrong header when data type is LowCardinality. Closes [#42460](https://github.com/ClickHouse/ClickHouse/issues/42460). [#42579](https://github.com/ClickHouse/ClickHouse/pull/42579) ([flynn](https://github.com/ucasfl)). -* `(U)Int128` and `(U)Int256` values are correctly checked in `PREWHERE`. [#42605](https://github.com/ClickHouse/ClickHouse/pull/42605) ([Antonio Andelic](https://github.com/antonio2368)). -* Fix a bug in ParserFunction that could have led to a segmentation fault. [#42724](https://github.com/ClickHouse/ClickHouse/pull/42724) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix truncate table does not hold lock correctly. [#42728](https://github.com/ClickHouse/ClickHouse/pull/42728) ([flynn](https://github.com/ucasfl)). -* Fix possible SIGSEGV for web disks when file does not exists (or `OPTIMIZE TABLE FINAL`, that also can got the same error eventually). [#42767](https://github.com/ClickHouse/ClickHouse/pull/42767) ([Azat Khuzhin](https://github.com/azat)). -* Fix `auth_type` mapping in `system.session_log`, by including `SSL_CERTIFICATE` for the enum values. [#42782](https://github.com/ClickHouse/ClickHouse/pull/42782) ([Miel Donkers](https://github.com/mdonkers)). -* Fix stack-use-after-return under ASAN build in ParserCreateUserQuery. [#42804](https://github.com/ClickHouse/ClickHouse/pull/42804) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix lowerUTF8()/upperUTF8() in case of symbol was in between 16-byte boundary (very frequent case of you have strings > 16 bytes long). [#42812](https://github.com/ClickHouse/ClickHouse/pull/42812) ([Azat Khuzhin](https://github.com/azat)). -* Additional bound check was added to lz4 decompression routine to fix misbehaviour in case of malformed input. [#42868](https://github.com/ClickHouse/ClickHouse/pull/42868) ([Nikita Taranov](https://github.com/nickitat)). -* Fix rare possible hung on query cancellation. [#42874](https://github.com/ClickHouse/ClickHouse/pull/42874) ([Azat Khuzhin](https://github.com/azat)). -* * Fix incorrect saved_block_sample with multiple disjuncts in hash join, close [#42832](https://github.com/ClickHouse/ClickHouse/issues/42832). [#42876](https://github.com/ClickHouse/ClickHouse/pull/42876) ([Vladimir C](https://github.com/vdimir)). -* A null pointer will be generated when select if as from ‘three table join’ , For example, the SQL:. [#42883](https://github.com/ClickHouse/ClickHouse/pull/42883) ([zzsmdfj](https://github.com/zzsmdfj)). -* Fix memory sanitizer report in ClusterDiscovery, close [#42763](https://github.com/ClickHouse/ClickHouse/issues/42763). [#42905](https://github.com/ClickHouse/ClickHouse/pull/42905) ([Vladimir C](https://github.com/vdimir)). -* Fix rare NOT_FOUND_COLUMN_IN_BLOCK error when projection is possible to use but there is no projection available. This fixes [#42771](https://github.com/ClickHouse/ClickHouse/issues/42771) . The bug was introduced in https://github.com/ClickHouse/ClickHouse/pull/25563. [#42938](https://github.com/ClickHouse/ClickHouse/pull/42938) ([Amos Bird](https://github.com/amosbird)). -* Fix ATTACH TABLE in PostgreSQL database engine if the table contains DATETIME data type. Closes [#42817](https://github.com/ClickHouse/ClickHouse/issues/42817). [#42960](https://github.com/ClickHouse/ClickHouse/pull/42960) ([Kseniia Sumarokova](https://github.com/kssenii)). -* Fix lambda parsing. Closes [#41848](https://github.com/ClickHouse/ClickHouse/issues/41848). [#42979](https://github.com/ClickHouse/ClickHouse/pull/42979) ([Nikolay Degterinsky](https://github.com/evillique)). * Backported in [#43512](https://github.com/ClickHouse/ClickHouse/issues/43512): - Fix several buffer over-reads. [#43159](https://github.com/ClickHouse/ClickHouse/pull/43159) ([Raúl Marín](https://github.com/Algunenano)). * Backported in [#43750](https://github.com/ClickHouse/ClickHouse/issues/43750): An issue with the following exception has been reported while trying to read a Parquet file from S3 into ClickHouse:. [#43297](https://github.com/ClickHouse/ClickHouse/pull/43297) ([Arthur Passos](https://github.com/arthurpassos)). -* Backported in [#43427](https://github.com/ClickHouse/ClickHouse/issues/43427): Fixed queries with `SAMPLE BY` with prewhere optimization on tables using `Merge` engine. [#43315](https://github.com/ClickHouse/ClickHouse/pull/43315) ([Antonio Andelic](https://github.com/antonio2368)). * Backported in [#43616](https://github.com/ClickHouse/ClickHouse/issues/43616): Fix sumMap() for Nullable(Decimal()). [#43414](https://github.com/ClickHouse/ClickHouse/pull/43414) ([Azat Khuzhin](https://github.com/azat)). -* Backported in [#43720](https://github.com/ClickHouse/ClickHouse/issues/43720): Fixed primary key analysis with conditions involving `toString(enum)`. [#43596](https://github.com/ClickHouse/ClickHouse/pull/43596) ([Nikita Taranov](https://github.com/nickitat)). * Backported in [#43885](https://github.com/ClickHouse/ClickHouse/issues/43885): Fixed `ALTER ... RESET SETTING` with `ON CLUSTER`. It could be applied to one replica only. Fixes [#43843](https://github.com/ClickHouse/ClickHouse/issues/43843). [#43848](https://github.com/ClickHouse/ClickHouse/pull/43848) ([Elena Torró](https://github.com/elenatorro)). * Backported in [#44179](https://github.com/ClickHouse/ClickHouse/issues/44179): Fix undefined behavior in the `quantiles` function, which might lead to uninitialized memory. Found by fuzzer. This closes [#44066](https://github.com/ClickHouse/ClickHouse/issues/44066). [#44067](https://github.com/ClickHouse/ClickHouse/pull/44067) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * Backported in [#44283](https://github.com/ClickHouse/ClickHouse/issues/44283): Prevent `ReadonlyReplica` metric from having negative values. [#44220](https://github.com/ClickHouse/ClickHouse/pull/44220) ([Antonio Andelic](https://github.com/antonio2368)). -#### Build Improvement - -* ... Add support for format ipv6 on s390x. [#42412](https://github.com/ClickHouse/ClickHouse/pull/42412) ([Suzy Wang](https://github.com/SuzyWangIBMer)). - #### NO CL ENTRY -* NO CL ENTRY: 'Revert "Sonar Cloud Workflow"'. [#42725](https://github.com/ClickHouse/ClickHouse/pull/42725) ([Alexey Milovidov](https://github.com/alexey-milovidov)). * NO CL ENTRY: 'Fix multipart upload for large S3 object, backport to 22.3'. [#44217](https://github.com/ClickHouse/ClickHouse/pull/44217) ([ianton-ru](https://github.com/ianton-ru)). -#### NOT FOR CHANGELOG / INSIGNIFICANT - -* Build with libcxx(abi) 15 [#42513](https://github.com/ClickHouse/ClickHouse/pull/42513) ([Robert Schulze](https://github.com/rschu1ze)). -* Sonar Cloud Workflow [#42534](https://github.com/ClickHouse/ClickHouse/pull/42534) ([Julio Jimenez](https://github.com/juliojimenez)). -* Invalid type in where for Merge table (logical error) [#42576](https://github.com/ClickHouse/ClickHouse/pull/42576) ([Nikolai Kochetov](https://github.com/KochetovNicolai)). -* Fix frequent memory drift message and clarify things in comments [#42582](https://github.com/ClickHouse/ClickHouse/pull/42582) ([Azat Khuzhin](https://github.com/azat)). -* Try to save `IDataPartStorage` interface [#42618](https://github.com/ClickHouse/ClickHouse/pull/42618) ([Anton Popov](https://github.com/CurtizJ)). -* Analyzer change setting into allow_experimental_analyzer [#42649](https://github.com/ClickHouse/ClickHouse/pull/42649) ([Maksim Kita](https://github.com/kitaisreal)). -* Analyzer IQueryTreeNode remove getName method [#42651](https://github.com/ClickHouse/ClickHouse/pull/42651) ([Maksim Kita](https://github.com/kitaisreal)). -* Minor fix iotest_nonblock build [#42658](https://github.com/ClickHouse/ClickHouse/pull/42658) ([Jordi Villar](https://github.com/jrdi)). -* Add tests and doc for some url-related functions [#42664](https://github.com/ClickHouse/ClickHouse/pull/42664) ([Vladimir C](https://github.com/vdimir)). -* Update version_date.tsv and changelogs after v22.10.1.1875-stable [#42676](https://github.com/ClickHouse/ClickHouse/pull/42676) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Fix error handling in clickhouse_helper.py [#42678](https://github.com/ClickHouse/ClickHouse/pull/42678) ([Ilya Yatsishin](https://github.com/qoega)). -* Fix execution of version_helper.py to use git tweaks [#42679](https://github.com/ClickHouse/ClickHouse/pull/42679) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* MergeTree indexes use RPNBuilderTree [#42681](https://github.com/ClickHouse/ClickHouse/pull/42681) ([Maksim Kita](https://github.com/kitaisreal)). -* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Always run `BuilderReport` and `BuilderSpecialReport` in all CI types [#42684](https://github.com/ClickHouse/ClickHouse/pull/42684) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Update version after release [#42699](https://github.com/ClickHouse/ClickHouse/pull/42699) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Update version_date.tsv and changelogs after v22.10.1.1877-stable [#42700](https://github.com/ClickHouse/ClickHouse/pull/42700) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* OrderByLimitByDuplicateEliminationPass improve performance [#42704](https://github.com/ClickHouse/ClickHouse/pull/42704) ([Maksim Kita](https://github.com/kitaisreal)). -* Analyzer improve subqueries representation [#42705](https://github.com/ClickHouse/ClickHouse/pull/42705) ([Maksim Kita](https://github.com/kitaisreal)). -* Update version_date.tsv and changelogs after v22.9.4.32-stable [#42712](https://github.com/ClickHouse/ClickHouse/pull/42712) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Update version_date.tsv and changelogs after v22.8.7.34-lts [#42713](https://github.com/ClickHouse/ClickHouse/pull/42713) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Update version_date.tsv and changelogs after v22.7.7.24-stable [#42714](https://github.com/ClickHouse/ClickHouse/pull/42714) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Move SonarCloud Job to nightly [#42718](https://github.com/ClickHouse/ClickHouse/pull/42718) ([Julio Jimenez](https://github.com/juliojimenez)). -* Update version_date.tsv and changelogs after v22.8.8.3-lts [#42738](https://github.com/ClickHouse/ClickHouse/pull/42738) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Minor fix implicit cast CaresPTRResolver [#42747](https://github.com/ClickHouse/ClickHouse/pull/42747) ([Jordi Villar](https://github.com/jrdi)). -* Fix build on master [#42752](https://github.com/ClickHouse/ClickHouse/pull/42752) ([Igor Nikonov](https://github.com/devcrafter)). -* Update version_date.tsv and changelogs after v22.3.14.18-lts [#42759](https://github.com/ClickHouse/ClickHouse/pull/42759) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Fix anchor links [#42760](https://github.com/ClickHouse/ClickHouse/pull/42760) ([Sergei Trifonov](https://github.com/serxa)). -* Update version_date.tsv and changelogs after v22.3.14.23-lts [#42764](https://github.com/ClickHouse/ClickHouse/pull/42764) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Update README.md [#42783](https://github.com/ClickHouse/ClickHouse/pull/42783) ([Yuko Takagi](https://github.com/yukotakagi)). -* Slightly better code with projections [#42794](https://github.com/ClickHouse/ClickHouse/pull/42794) ([Anton Popov](https://github.com/CurtizJ)). -* Fix some races in MergeTree [#42805](https://github.com/ClickHouse/ClickHouse/pull/42805) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix typo in comments [#42809](https://github.com/ClickHouse/ClickHouse/pull/42809) ([Gabriel](https://github.com/Gabriel39)). -* Fix compilation of LLVM with cmake cache [#42816](https://github.com/ClickHouse/ClickHouse/pull/42816) ([Azat Khuzhin](https://github.com/azat)). -* Fix link in docs [#42821](https://github.com/ClickHouse/ClickHouse/pull/42821) ([Sergei Trifonov](https://github.com/serxa)). -* Link to proper place in docs [#42822](https://github.com/ClickHouse/ClickHouse/pull/42822) ([Sergei Trifonov](https://github.com/serxa)). -* Fix argument type check in AggregateFunctionAnalysisOfVariance [#42823](https://github.com/ClickHouse/ClickHouse/pull/42823) ([Vladimir C](https://github.com/vdimir)). -* Tests/lambda analyzer [#42824](https://github.com/ClickHouse/ClickHouse/pull/42824) ([Denny Crane](https://github.com/den-crane)). -* Fix Missing Quotes - Sonar Nightly [#42831](https://github.com/ClickHouse/ClickHouse/pull/42831) ([Julio Jimenez](https://github.com/juliojimenez)). -* Add exclusions from the Snyk scan [#42834](https://github.com/ClickHouse/ClickHouse/pull/42834) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix Missing Env Vars - Sonar Nightly [#42843](https://github.com/ClickHouse/ClickHouse/pull/42843) ([Julio Jimenez](https://github.com/juliojimenez)). -* Fix typo [#42855](https://github.com/ClickHouse/ClickHouse/pull/42855) ([GoGoWen](https://github.com/GoGoWen)). -* Add timezone to 02458_datediff_date32 [#42857](https://github.com/ClickHouse/ClickHouse/pull/42857) ([Vladimir C](https://github.com/vdimir)). -* Adjust cancel and rerun workflow names to the actual [#42862](https://github.com/ClickHouse/ClickHouse/pull/42862) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Analyzer subquery in JOIN TREE with aggregation [#42865](https://github.com/ClickHouse/ClickHouse/pull/42865) ([Maksim Kita](https://github.com/kitaisreal)). -* Fix getauxval for sanitizer builds [#42866](https://github.com/ClickHouse/ClickHouse/pull/42866) ([Amos Bird](https://github.com/amosbird)). -* Update version_date.tsv and changelogs after v22.10.2.11-stable [#42871](https://github.com/ClickHouse/ClickHouse/pull/42871) ([robot-clickhouse](https://github.com/robot-clickhouse)). -* Validate Query Tree in debug [#42879](https://github.com/ClickHouse/ClickHouse/pull/42879) ([Dmitry Novik](https://github.com/novikd)). -* changed type name for s3 plain storage [#42890](https://github.com/ClickHouse/ClickHouse/pull/42890) ([Aleksandr](https://github.com/AVMusorin)). -* Cleanup implementation of regexpReplace(All|One) [#42907](https://github.com/ClickHouse/ClickHouse/pull/42907) ([Robert Schulze](https://github.com/rschu1ze)). -* Do not show status for Bugfix validate check in non bugfix PRs [#42932](https://github.com/ClickHouse/ClickHouse/pull/42932) ([Vladimir C](https://github.com/vdimir)). -* fix(typo): Passible -> Possible [#42933](https://github.com/ClickHouse/ClickHouse/pull/42933) ([Yakko Majuri](https://github.com/yakkomajuri)). -* Pin the cryptography version to not break lambdas [#42934](https://github.com/ClickHouse/ClickHouse/pull/42934) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix: bad cast from type DB::ColumnLowCardinality to DB::ColumnString [#42937](https://github.com/ClickHouse/ClickHouse/pull/42937) ([Igor Nikonov](https://github.com/devcrafter)). -* Attach thread pool for loading parts to the query [#42947](https://github.com/ClickHouse/ClickHouse/pull/42947) ([Azat Khuzhin](https://github.com/azat)). -* Fix macOS M1 builds due to sprintf deprecation [#42962](https://github.com/ClickHouse/ClickHouse/pull/42962) ([Jordi Villar](https://github.com/jrdi)). -* Less use of CH-specific bit_cast() [#42968](https://github.com/ClickHouse/ClickHouse/pull/42968) ([Robert Schulze](https://github.com/rschu1ze)). -* Remove some utils [#42972](https://github.com/ClickHouse/ClickHouse/pull/42972) ([Alexey Milovidov](https://github.com/alexey-milovidov)). -* Fix a bug in CAST function parser [#42980](https://github.com/ClickHouse/ClickHouse/pull/42980) ([Nikolay Degterinsky](https://github.com/evillique)). -* Fix old bug to remove `refs/head` from ref name [#42981](https://github.com/ClickHouse/ClickHouse/pull/42981) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add debug information to nightly builds [#42997](https://github.com/ClickHouse/ClickHouse/pull/42997) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Add `on: workflow_call` to debug CI [#43000](https://github.com/ClickHouse/ClickHouse/pull/43000) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Simple fixes for restart replica description [#43004](https://github.com/ClickHouse/ClickHouse/pull/43004) ([Igor Nikonov](https://github.com/devcrafter)). -* Cleanup match code [#43006](https://github.com/ClickHouse/ClickHouse/pull/43006) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix TSan errors (correctly ignore _exit interception) [#43009](https://github.com/ClickHouse/ClickHouse/pull/43009) ([Azat Khuzhin](https://github.com/azat)). -* fix bandwidth throttlers initialization order [#43015](https://github.com/ClickHouse/ClickHouse/pull/43015) ([Sergei Trifonov](https://github.com/serxa)). -* Add test for issue [#42520](https://github.com/ClickHouse/ClickHouse/issues/42520) [#43027](https://github.com/ClickHouse/ClickHouse/pull/43027) ([Robert Schulze](https://github.com/rschu1ze)). -* Fix msan warning [#43065](https://github.com/ClickHouse/ClickHouse/pull/43065) ([Raúl Marín](https://github.com/Algunenano)). -* Update SECURITY.md on new stable tags [#43365](https://github.com/ClickHouse/ClickHouse/pull/43365) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Use all parameters with prefixes from ssm [#43467](https://github.com/ClickHouse/ClickHouse/pull/43467) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Temporarily disable `test_hive_query` [#43542](https://github.com/ClickHouse/ClickHouse/pull/43542) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Do not checkout submodules recursively [#43637](https://github.com/ClickHouse/ClickHouse/pull/43637) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Use docker images cache from merged PRs in master and release branches [#43664](https://github.com/ClickHouse/ClickHouse/pull/43664) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Yet another fix for AggregateFunctionMinMaxAny [#43778](https://github.com/ClickHouse/ClickHouse/pull/43778) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Fix tags workflow [#43942](https://github.com/ClickHouse/ClickHouse/pull/43942) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Assign release PRs [#44055](https://github.com/ClickHouse/ClickHouse/pull/44055) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Fix another bug in AggregateFunctionMinMaxAny [#44091](https://github.com/ClickHouse/ClickHouse/pull/44091) ([Alexander Tokmakov](https://github.com/tavplubix)). -* Bump libdivide (to gain some new optimizations) [#44132](https://github.com/ClickHouse/ClickHouse/pull/44132) ([Azat Khuzhin](https://github.com/azat)). -* Add check for submodules sanity [#44386](https://github.com/ClickHouse/ClickHouse/pull/44386) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). -* Implement a custom central checkout action [#44399](https://github.com/ClickHouse/ClickHouse/pull/44399) ([Mikhail f. Shiryaev](https://github.com/Felixoid)). - From b156209ed6b84b874a31621699e6a61159da2ade Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 11 Jan 2023 14:17:44 +0300 Subject: [PATCH 182/262] Update test.py --- tests/integration/test_storage_rabbitmq/test.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_storage_rabbitmq/test.py b/tests/integration/test_storage_rabbitmq/test.py index 63b8d1215aa..43c964d9d93 100644 --- a/tests/integration/test_storage_rabbitmq/test.py +++ b/tests/integration/test_storage_rabbitmq/test.py @@ -1,3 +1,10 @@ +import pytest + +# FIXME This test is too flaky +# https://github.com/ClickHouse/ClickHouse/issues/45160 + +pytestmark = pytest.mark.skip + import json import os.path as p import random @@ -9,7 +16,6 @@ from random import randrange import math import pika -import pytest from google.protobuf.internal.encoder import _VarintBytes from helpers.client import QueryRuntimeException from helpers.cluster import ClickHouseCluster, check_rabbitmq_is_available From 07a3967d6bc8b69778a9610a2e145514a9e25316 Mon Sep 17 00:00:00 2001 From: Nikolai Kochetov Date: Wed, 11 Jan 2023 11:20:02 +0000 Subject: [PATCH 183/262] Disable test_ttl_move_memory_usage as too flaky. --- .../test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py index a1e10cde031..ebdecb2f16c 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py @@ -22,6 +22,9 @@ def started_single_node_cluster(): def test_move_and_s3_memory_usage(started_single_node_cluster): + + pytest.skip("Test is too flaky. Disable it for now.") + if small_node.is_built_with_sanitizer() or small_node.is_debug_build(): pytest.skip("Disabled for debug and sanitizers. Too slow.") From 1b6e036d46d3f245d75f4db65c6a1f30c4ccf733 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 11 Jan 2023 11:54:28 +0000 Subject: [PATCH 184/262] Review fixes --- ...fiers.cpp => CollectColumnIdentifiers.cpp} | 20 +++++++++---------- ...detifiers.h => CollectColumnIdentifiers.h} | 15 +++++++------- src/Planner/Planner.cpp | 2 +- src/Planner/PlannerJoinTree.cpp | 2 +- src/Planner/PlannerJoinTree.h | 2 -- src/Planner/TableExpressionData.h | 1 + .../02514_analyzer_drop_join_on.reference | 15 ++++++++++++++ .../02514_analyzer_drop_join_on.sql | 17 +++++++--------- 8 files changed, 43 insertions(+), 31 deletions(-) rename src/Planner/{CollectColumnIndetifiers.cpp => CollectColumnIdentifiers.cpp} (71%) rename src/Planner/{CollectColumnIndetifiers.h => CollectColumnIdentifiers.h} (57%) diff --git a/src/Planner/CollectColumnIndetifiers.cpp b/src/Planner/CollectColumnIdentifiers.cpp similarity index 71% rename from src/Planner/CollectColumnIndetifiers.cpp rename to src/Planner/CollectColumnIdentifiers.cpp index 50e89658f9d..f7cdf196ad1 100644 --- a/src/Planner/CollectColumnIndetifiers.cpp +++ b/src/Planner/CollectColumnIdentifiers.cpp @@ -1,4 +1,4 @@ -#include +#include #include #include @@ -23,12 +23,12 @@ public: static bool needChildVisit(VisitQueryTreeNodeType &, VisitQueryTreeNodeType & child) { const auto & node_type = child->getNodeType(); - return node_type != QueryTreeNodeType::TABLE - && node_type != QueryTreeNodeType::TABLE_FUNCTION - && node_type != QueryTreeNodeType::QUERY - && node_type != QueryTreeNodeType::UNION - && node_type != QueryTreeNodeType::JOIN - && node_type != QueryTreeNodeType::ARRAY_JOIN; + return node_type != QueryTreeNodeType::TABLE + && node_type != QueryTreeNodeType::TABLE_FUNCTION + && node_type != QueryTreeNodeType::QUERY + && node_type != QueryTreeNodeType::UNION + && node_type != QueryTreeNodeType::JOIN + && node_type != QueryTreeNodeType::ARRAY_JOIN; } void visitImpl(const QueryTreeNodePtr & node) @@ -36,11 +36,11 @@ public: if (node->getNodeType() != QueryTreeNodeType::COLUMN) return; - const auto * column_ident = planner_context->getColumnNodeIdentifierOrNull(node); - if (!column_ident) + const auto * column_identifier = planner_context->getColumnNodeIdentifierOrNull(node); + if (!column_identifier) return; - used_identifiers.insert(*column_ident); + used_identifiers.insert(*column_identifier); } ColumnIdentifierSet & used_identifiers; diff --git a/src/Planner/CollectColumnIndetifiers.h b/src/Planner/CollectColumnIdentifiers.h similarity index 57% rename from src/Planner/CollectColumnIndetifiers.h rename to src/Planner/CollectColumnIdentifiers.h index 8c84908ee6b..b0cad10ba4f 100644 --- a/src/Planner/CollectColumnIndetifiers.h +++ b/src/Planner/CollectColumnIdentifiers.h @@ -7,13 +7,14 @@ namespace DB { -using ColumnIdentifierSet = std::unordered_set; - -/// Collect all top level column identifiers from query tree node. -/// Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree. -/// For example, in the following query: -/// SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y -/// The top level column identifiers are: `t1.b`, `t2.y` +/** Collect all top level column identifiers from query tree node. + * Top level column identifiers are in the SELECT list or GROUP BY/ORDER BY/WHERE/HAVING clause, but not in child nodes of join tree. + * For example, in the following query: + * SELECT sum(b) FROM (SELECT x AS a, y AS b FROM t) AS t1 JOIN t2 ON t1.a = t2.key GROUP BY t2.y + * The top level column identifiers are: `t1.b`, `t2.y` + * + * There is precondition that table expression data is collected in planner context. + */ ColumnIdentifierSet collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context); void collectTopLevelColumnIdentifiers(const QueryTreeNodePtr & node, const PlannerContextPtr & planner_context, ColumnIdentifierSet & out); diff --git a/src/Planner/Planner.cpp b/src/Planner/Planner.cpp index ea14d29bd6f..b865e137038 100644 --- a/src/Planner/Planner.cpp +++ b/src/Planner/Planner.cpp @@ -64,7 +64,7 @@ #include #include #include -#include +#include namespace DB { diff --git a/src/Planner/PlannerJoinTree.cpp b/src/Planner/PlannerJoinTree.cpp index 37c542d0494..69bf7dd79bb 100644 --- a/src/Planner/PlannerJoinTree.cpp +++ b/src/Planner/PlannerJoinTree.cpp @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/Planner/PlannerJoinTree.h b/src/Planner/PlannerJoinTree.h index 9d83bf62fc1..742d6853267 100644 --- a/src/Planner/PlannerJoinTree.h +++ b/src/Planner/PlannerJoinTree.h @@ -11,8 +11,6 @@ namespace DB { -using ColumnIdentifierSet = std::unordered_set; - /// Build query plan for query JOIN TREE node QueryPlan buildQueryPlanForJoinTreeNode(QueryTreeNodePtr join_tree_node, SelectQueryInfo & select_query_info, diff --git a/src/Planner/TableExpressionData.h b/src/Planner/TableExpressionData.h index e737788cebf..6b4a9b4748d 100644 --- a/src/Planner/TableExpressionData.h +++ b/src/Planner/TableExpressionData.h @@ -13,6 +13,7 @@ namespace ErrorCodes using ColumnIdentifier = std::string; using ColumnIdentifiers = std::vector; +using ColumnIdentifierSet = std::unordered_set; /** Table expression data is created for each table expression that take part in query. * Table expression data has information about columns that participate in query, their name to identifier mapping, diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference index 100b2fc42bf..abd49790ced 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.reference @@ -1,3 +1,8 @@ +-- { echoOn } + +EXPLAIN PLAN header = 1 +SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1 = c.c1 GROUP BY a.a2 +; Expression ((Project names + Projection)) Header: count() UInt64 Aggregating @@ -41,6 +46,9 @@ Header: count() UInt64 Header: default.d.d1_3 UInt64 ReadFromStorage (Memory) Header: d1 UInt64 +EXPLAIN PLAN header = 1 +SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) +; Expression ((Project names + (Projection + DROP unused columns after JOIN))) Header: a2 String d2 String @@ -83,6 +91,13 @@ Header: a2 String ReadFromStorage (Memory) Header: k UInt64 d2 String +EXPLAIN PLAN header = 1 +SELECT b.bx FROM a +JOIN (SELECT b1, b2 || 'x' AS bx FROM b ) AS b ON b.b1 = a.a1 +JOIN c ON c.c1 = b.b1 +JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1 +WHERE c.c2 != '' ORDER BY a.a2 +; Expression (Project names) Header: bx String Sorting (Sorting for ORDER BY) diff --git a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql index 576e68c2289..2406be13aa8 100644 --- a/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql +++ b/tests/queries/0_stateless/02514_analyzer_drop_join_on.sql @@ -17,13 +17,10 @@ INSERT INTO d VALUES (1, 1, 'a'), (2, 2, 'b'), (3, 3, 'c'); SET allow_experimental_analyzer = 1; +-- { echoOn } + EXPLAIN PLAN header = 1 -SELECT count() -FROM a -JOIN b ON b.b1 = a.a1 -JOIN c ON c.c1 = b.b1 -JOIN d ON d.d1 = c.c1 -GROUP BY a.a2 +SELECT count() FROM a JOIN b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN d ON d.d1 = c.c1 GROUP BY a.a2 ; EXPLAIN PLAN header = 1 @@ -31,15 +28,15 @@ SELECT a.a2, d.d2 FROM a JOIN b USING (k) JOIN c USING (k) JOIN d USING (k) ; EXPLAIN PLAN header = 1 -SELECT b.bx -FROM a +SELECT b.bx FROM a JOIN (SELECT b1, b2 || 'x' AS bx FROM b ) AS b ON b.b1 = a.a1 JOIN c ON c.c1 = b.b1 JOIN (SELECT number AS d1 from numbers(10)) AS d ON d.d1 = c.c1 -WHERE c.c2 != '' -ORDER BY a.a2 +WHERE c.c2 != '' ORDER BY a.a2 ; +-- { echoOff } + DROP TABLE IF EXISTS a; DROP TABLE IF EXISTS b; DROP TABLE IF EXISTS c; From 659fa963655a61f33bef64b944aa70e3f236de75 Mon Sep 17 00:00:00 2001 From: Alexander Gololobov <440544+davenger@users.noreply.github.com> Date: Wed, 11 Jan 2023 13:06:38 +0100 Subject: [PATCH 185/262] More logging to facilitate debugging --- src/Processors/Transforms/TTLTransform.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Processors/Transforms/TTLTransform.cpp b/src/Processors/Transforms/TTLTransform.cpp index e79dcb34c41..3250d012d5c 100644 --- a/src/Processors/Transforms/TTLTransform.cpp +++ b/src/Processors/Transforms/TTLTransform.cpp @@ -144,6 +144,8 @@ void TTLTransform::finalize() else LOG_DEBUG(log, "Removed {} rows with expired TTL from part {}", delete_algorithm->getNumberOfRemovedRows(), data_part->name); } + else + LOG_DEBUG(log, "No delete algorithm was applied for part {}", data_part->name); } IProcessor::Status TTLTransform::prepare() From c0f529600d2d17fa111ce875e10bb1557fc1bd74 Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 11 Jan 2023 12:23:33 +0000 Subject: [PATCH 186/262] fix darwin build --- src/Common/CancelableSharedMutex.h | 3 ++- src/Common/SharedMutex.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Common/CancelableSharedMutex.h b/src/Common/CancelableSharedMutex.h index dfd9631c564..af87b213479 100644 --- a/src/Common/CancelableSharedMutex.h +++ b/src/Common/CancelableSharedMutex.h @@ -1,12 +1,13 @@ #pragma once +#include + #ifdef OS_LINUX /// Because of futex #include #include #include #include -#include // for std::unique_lock and std::shared_lock namespace DB { diff --git a/src/Common/SharedMutex.h b/src/Common/SharedMutex.h index 26c649c6fa8..e0143d4042d 100644 --- a/src/Common/SharedMutex.h +++ b/src/Common/SharedMutex.h @@ -1,11 +1,12 @@ #pragma once +#include + #ifdef OS_LINUX /// Because of futex #include #include #include -#include // for std::unique_lock and std::shared_lock namespace DB { From 22c30ca38c373d21f6d95dc0323dc34a6bfead5d Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 10:59:48 +0100 Subject: [PATCH 187/262] Add typing to get_previous_release_tag.py --- tests/ci/get_previous_release_tag.py | 40 ++++++++++++++++++---------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/tests/ci/get_previous_release_tag.py b/tests/ci/get_previous_release_tag.py index 579035bd943..c6fe6cd5fb5 100755 --- a/tests/ci/get_previous_release_tag.py +++ b/tests/ci/get_previous_release_tag.py @@ -2,21 +2,25 @@ import re import logging +from typing import List, Optional, Tuple import requests # type: ignore CLICKHOUSE_TAGS_URL = "https://api.github.com/repos/ClickHouse/ClickHouse/tags" -CLICKHOUSE_PACKAGE_URL = "https://github.com/ClickHouse/ClickHouse/releases/download/v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" +CLICKHOUSE_PACKAGE_URL = ( + "https://github.com/ClickHouse/ClickHouse/releases/download/" + "v{version}-{type}/clickhouse-common-static_{version}_amd64.deb" +) VERSION_PATTERN = r"(v(?:\d+\.)?(?:\d+\.)?(?:\d+\.)?\d+-[a-zA-Z]*)" logger = logging.getLogger(__name__) class Version: - def __init__(self, version): + def __init__(self, version: str): self.version = version - def __lt__(self, other): + def __lt__(self, other: "Version") -> bool: return list(map(int, self.version.split("."))) < list( map(int, other.version.split(".")) ) @@ -26,7 +30,7 @@ class Version: class ReleaseInfo: - def __init__(self, release_tag): + def __init__(self, release_tag: str): self.version = Version(release_tag[1:].split("-")[0]) self.type = release_tag[1:].split("-")[1] @@ -37,7 +41,9 @@ class ReleaseInfo: return f"ReleaseInfo: {self.version}-{self.type}" -def find_previous_release(server_version, releases): +def find_previous_release( + server_version: Optional[Version], releases: List[ReleaseInfo] +) -> Tuple[bool, Optional[ReleaseInfo]]: releases.sort(key=lambda x: x.version, reverse=True) if server_version is None: @@ -59,21 +65,23 @@ def find_previous_release(server_version, releases): != 404 ): return True, release - else: - logger.debug( - "The tag %s-%s exists but the package is not yet available on GitHub", - release.version, - release.type, - ) + + logger.debug( + "The tag v%s-%s exists but the package is not yet available on GitHub", + release.version, + release.type, + ) return False, None -def get_previous_release(server_version): +def get_previous_release(server_version: Optional[Version]) -> Optional[ReleaseInfo]: page = 1 found = False while not found: - response = requests.get(CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}) + response = requests.get( + CLICKHOUSE_TAGS_URL, {"page": page, "per_page": 100}, timeout=10 + ) if not response.ok: raise Exception( "Cannot load the list of tags from github: " + response.reason @@ -94,7 +102,11 @@ def get_previous_release(server_version): return previous_release -if __name__ == "__main__": +def main(): logging.basicConfig(level=logging.INFO) server_version = Version(input()) print(get_previous_release(server_version)) + + +if __name__ == "__main__": + main() From 21573028ea02b3c0a9cfe6dc2420a0360c2d3731 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 11 Jan 2023 07:38:41 -0500 Subject: [PATCH 188/262] Update docs/en/engines/table-engines/integrations/deltalake.md --- docs/en/engines/table-engines/integrations/deltalake.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index eb4d8e934a7..251d2fef52e 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -18,7 +18,7 @@ CREATE TABLE deltalake **Engine parameters** -- `path` — Bucket url with path to the existing Delta Lake table. +- `url` — Bucket url with path to the existing Delta Lake table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). **Example** From 73ef2657dd906974e5a4178e55099faec553ee77 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 11 Jan 2023 07:40:10 -0500 Subject: [PATCH 189/262] Update docs/en/engines/table-engines/integrations/hudi.md --- docs/en/engines/table-engines/integrations/hudi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index 6da1634ba5a..75b1969101b 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -18,7 +18,7 @@ CREATE TABLE hudi_table **Engine parameters** -- `path` — Bucket url with the path to an existing Hudi table. +- `url` — Bucket url with the path to an existing Hudi table. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. Parameter is optional. If credentials are not specified, they are used from the configuration file. For more information see [Using S3 for Data Storage](../mergetree-family/mergetree.md#table_engine-mergetree-s3). **Example** From 367d4fc4bf0cfc992f48ab5297f38da5017efb73 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 11 Jan 2023 07:40:52 -0500 Subject: [PATCH 190/262] Update docs/en/sql-reference/table-functions/hudi.md --- docs/en/sql-reference/table-functions/hudi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/hudi.md b/docs/en/sql-reference/table-functions/hudi.md index c1ccd0cda2f..b8d0724a7b9 100644 --- a/docs/en/sql-reference/table-functions/hudi.md +++ b/docs/en/sql-reference/table-functions/hudi.md @@ -10,7 +10,7 @@ Provides a read-only table-like interface to Apache [Hudi](https://hudi.apache.o ## Syntax ``` sql -hudi(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +hudi(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) ``` ## Arguments From d4c4f84161014f9434fc4535cd545b950ba2bd5d Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 11 Jan 2023 07:41:36 -0500 Subject: [PATCH 191/262] Update docs/en/sql-reference/table-functions/hudi.md --- docs/en/sql-reference/table-functions/hudi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/table-functions/hudi.md b/docs/en/sql-reference/table-functions/hudi.md index b8d0724a7b9..5a97b2401b4 100644 --- a/docs/en/sql-reference/table-functions/hudi.md +++ b/docs/en/sql-reference/table-functions/hudi.md @@ -15,7 +15,7 @@ hudi(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,co ## Arguments -- `path` — Bucket url with the path to an existing Hudi table in S3. +- `url` — Bucket url with the path to an existing Hudi table in S3. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3). - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. From 02261b0e2a83bab18dcff08f48246ef122ab1449 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 11 Jan 2023 07:42:15 -0500 Subject: [PATCH 192/262] Update docs/en/engines/table-engines/integrations/hudi.md --- docs/en/engines/table-engines/integrations/hudi.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/engines/table-engines/integrations/hudi.md b/docs/en/engines/table-engines/integrations/hudi.md index 75b1969101b..4e335e6c075 100644 --- a/docs/en/engines/table-engines/integrations/hudi.md +++ b/docs/en/engines/table-engines/integrations/hudi.md @@ -13,7 +13,7 @@ Note that the Hudi table must already exist in S3, this command does not take DD ``` sql CREATE TABLE hudi_table - ENGINE = Hudi(path, [aws_access_key_id, aws_secret_access_key,]) + ENGINE = Hudi(url, [aws_access_key_id, aws_secret_access_key,]) ``` **Engine parameters** From 1ddc9c3bb60195db72dac5fb966d7870e9b0dbc8 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Wed, 11 Jan 2023 05:44:51 -0700 Subject: [PATCH 193/262] Update merge-tree-settings.md --- .../settings/merge-tree-settings.md | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/docs/en/operations/settings/merge-tree-settings.md b/docs/en/operations/settings/merge-tree-settings.md index ec492605930..ed4ee37fc37 100644 --- a/docs/en/operations/settings/merge-tree-settings.md +++ b/docs/en/operations/settings/merge-tree-settings.md @@ -643,3 +643,106 @@ Default value: `0` (limit never applied). ``` xml 10 ``` + +## ratio_of_defaults_for_sparse_serialization {#ratio_of_defaults_for_sparse_serialization} + +Minimal ratio of the number of _default_ values to the number of _all_ values in a column. Setting this value causes the column to be stored using sparse serializations. + +If a column is sparse (contains mostly zeros), ClickHouse can encode it in a sparse format and automatically optimize calculations - the data does not require full decompression during queries. To enable this sparse serialization, define the `ratio_of_defaults_for_sparse_serialization` setting to be less than 1.0. If the value is greater than or equal to 1.0 (the default), then the columns will be always written using the normal full serialization. + +Possible values: + +- Float between 0 and 1 to enable sparse serialization +- 1.0 (or greater) if you do not want to use sparse serialization + +Default value: `1.0` (sparse serialization is disabled) + +**Example** + +Notice the `s` column in the following table is an empty string for 95% of the rows. In `my_regular_table` we do not use sparse serialization, and in `my_sparse_table` we set `ratio_of_defaults_for_sparse_serialization` to 0.95: + +```sql +CREATE TABLE my_regular_table +( + `id` UInt64, + `s` String +) +ENGINE = MergeTree +ORDER BY id; + +INSERT INTO my_regular_table +SELECT + number AS id, + number % 20 = 0 ? toString(number): '' AS s +FROM + numbers(10000000); + + +CREATE TABLE my_sparse_table +( + `id` UInt64, + `s` String +) +ENGINE = MergeTree +ORDER BY id +SETTINGS ratio_of_defaults_for_sparse_serialization = 0.95; + +INSERT INTO my_sparse_table +SELECT + number, + number % 20 = 0 ? toString(number): '' +FROM + numbers(10000000); +``` + +Notice the `s` column in `my_sparse_table` uses less storage space on disk: + +```sql +SELECT table, name, data_compressed_bytes, data_uncompressed_bytes FROM system.columns +WHERE table LIKE 'my_%_table'; +``` + +```response +┌─table────────────┬─name─┬─data_compressed_bytes─┬─data_uncompressed_bytes─┐ +│ my_regular_table │ id │ 37790741 │ 75488328 │ +│ my_regular_table │ s │ 2451377 │ 12683106 │ +│ my_sparse_table │ id │ 37790741 │ 75488328 │ +│ my_sparse_table │ s │ 2283454 │ 9855751 │ +└──────────────────┴──────┴───────────────────────┴─────────────────────────┘ +``` + +You can verify if a column is using the sparse encoding by viewing the `serialization_kind` column of the `system.parts_columns` table: + +```sql +SELECT column, serialization_kind FROM system.parts_columns +WHERE table LIKE 'my_sparse_table'; +``` + +You can see which parts of `s` were stored using the sparse serialization: + +```response +┌─column─┬─serialization_kind─┐ +│ id │ Default │ +│ s │ Default │ +│ id │ Default │ +│ s │ Default │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +│ id │ Default │ +│ s │ Sparse │ +└────────┴────────────────────┘ +``` \ No newline at end of file From 1d002e45a904dc945d0696798fd293a3d65b4625 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 11 Jan 2023 12:49:01 +0000 Subject: [PATCH 194/262] Fix test & review comments --- src/Storages/MergeTree/MergeTreeData.cpp | 21 ++++++++++++------- src/Storages/MergeTree/MergeTreeSettings.h | 2 +- ...21_incorrect_dealy_for_insert_bug_44902.sh | 4 ++-- 3 files changed, 16 insertions(+), 11 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 47fc02165b6..5830e0145bc 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -3870,27 +3870,32 @@ void MergeTreeData::delayInsertOrThrowIfNeeded(Poco::Event * until, const Contex { size_t parts_over_threshold = 0; size_t allowed_parts_over_threshold = 1; - if (active_parts_over_threshold >= outdated_parts_over_threshold) + const bool use_active_parts_threshold = (active_parts_over_threshold >= outdated_parts_over_threshold); + if (use_active_parts_threshold) { - parts_over_threshold = active_parts_over_threshold; + parts_over_threshold = active_parts_over_threshold; allowed_parts_over_threshold = active_parts_to_throw_insert - active_parts_to_delay_insert; } else { parts_over_threshold = outdated_parts_over_threshold; - allowed_parts_over_threshold = outdated_parts_over_threshold; + allowed_parts_over_threshold = outdated_parts_over_threshold; /// if throw threshold is not set, will use max delay if (settings->inactive_parts_to_throw_insert > 0) allowed_parts_over_threshold = settings->inactive_parts_to_throw_insert - settings->inactive_parts_to_delay_insert; } - chassert(allowed_parts_over_threshold > 0 && parts_over_threshold <= allowed_parts_over_threshold); + if (allowed_parts_over_threshold == 0 || parts_over_threshold > allowed_parts_over_threshold) [[unlikely]] + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Incorrect calculation of {} parts over threshold: allowed_parts_over_threshold={}, parts_over_threshold={}", + (use_active_parts_threshold ? "active" : "inactive"), + allowed_parts_over_threshold, + parts_over_threshold); const UInt64 max_delay_milliseconds = (settings->max_delay_to_insert > 0 ? settings->max_delay_to_insert * 1000 : 1000); double delay_factor = static_cast(parts_over_threshold) / allowed_parts_over_threshold; - UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms; - /// min() as a save guard here - delay_milliseconds = std::max( - min_delay_milliseconds, std::min(max_delay_milliseconds, static_cast(max_delay_milliseconds * delay_factor))); + const UInt64 min_delay_milliseconds = settings->min_delay_to_insert_ms; + delay_milliseconds = std::max(min_delay_milliseconds, static_cast(max_delay_milliseconds * delay_factor)); } ProfileEvents::increment(ProfileEvents::DelayedInserts); diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h index 0b8188f67c7..d1f957740e2 100644 --- a/src/Storages/MergeTree/MergeTreeSettings.h +++ b/src/Storages/MergeTree/MergeTreeSettings.h @@ -68,7 +68,7 @@ struct Settings; M(Bool, remove_rolled_back_parts_immediately, 1, "Setting for an incomplete experimental feature.", 0) \ \ /** Inserts settings. */ \ - M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table.", 0) \ + M(UInt64, parts_to_delay_insert, 150, "If table contains at least that many active parts in single partition, artificially slow down insert into table. Disabled if set to 0", 0) \ M(UInt64, inactive_parts_to_delay_insert, 0, "If table contains at least that many inactive parts in single partition, artificially slow down insert into table.", 0) \ M(UInt64, parts_to_throw_insert, 300, "If more than this number active parts in single partition, throw 'Too many parts ...' exception.", 0) \ M(UInt64, inactive_parts_to_throw_insert, 0, "If more than this number inactive parts in single partition, throw 'Too many inactive parts ...' exception.", 0) \ diff --git a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh index 6cbd77b262a..5f91ef19a5a 100755 --- a/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh +++ b/tests/queries/0_stateless/02521_incorrect_dealy_for_insert_bug_44902.sh @@ -15,10 +15,10 @@ for i in {0..4} do query_id="${CLICKHOUSE_DATABASE}_02521_${i}_$RANDOM$RANDOM" $CLICKHOUSE_CLIENT --query_id="$query_id" -q "INSERT INTO test_02521_insert_delay SELECT number, toString(number) FROM numbers(${i}, 1)" - $CLICKHOUSE_CLIENT -q "system flush logs" + $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "select ProfileEvents['DelayedInsertsMilliseconds'] as delay from system.query_log where event_date >= yesterday() and query_id = {query_id:String} order by delay desc limit 1" done -$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS' +$CLICKHOUSE_CLIENT -q "INSERT INTO test_02521_insert_delay VALUES(0, 'This query throws error')" 2>&1 | grep -o 'TOO_MANY_PARTS' | head -n 1 $CLICKHOUSE_CLIENT -q "DROP TABLE test_02521_insert_delay" From 6e9669cfaebf4a0a8c8c5a15f9ead146833627e1 Mon Sep 17 00:00:00 2001 From: Dan Roscigno Date: Wed, 11 Jan 2023 07:53:37 -0500 Subject: [PATCH 195/262] Apply suggestions from code review --- docs/en/engines/table-engines/integrations/deltalake.md | 2 +- docs/en/sql-reference/table-functions/deltalake.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/en/engines/table-engines/integrations/deltalake.md b/docs/en/engines/table-engines/integrations/deltalake.md index 251d2fef52e..83526ac944d 100644 --- a/docs/en/engines/table-engines/integrations/deltalake.md +++ b/docs/en/engines/table-engines/integrations/deltalake.md @@ -13,7 +13,7 @@ Note that the Delta Lake table must already exist in S3, this command does not t ``` sql CREATE TABLE deltalake - ENGINE = DeltaLake(path, [aws_access_key_id, aws_secret_access_key,]) + ENGINE = DeltaLake(url, [aws_access_key_id, aws_secret_access_key,]) ``` **Engine parameters** diff --git a/docs/en/sql-reference/table-functions/deltalake.md b/docs/en/sql-reference/table-functions/deltalake.md index 10e7c20e17a..f1cc4659a2a 100644 --- a/docs/en/sql-reference/table-functions/deltalake.md +++ b/docs/en/sql-reference/table-functions/deltalake.md @@ -10,12 +10,12 @@ Provides a read-only table-like interface to [Delta Lake](https://github.com/del ## Syntax ``` sql -deltaLake(path [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) +deltaLake(url [,aws_access_key_id, aws_secret_access_key] [,format] [,structure] [,compression]) ``` ## Arguments -- `path` — Bucket url with path to existing Delta Lake table in S3. +- `url` — Bucket url with path to existing Delta Lake table in S3. - `aws_access_key_id`, `aws_secret_access_key` - Long-term credentials for the [AWS](https://aws.amazon.com/) account user. You can use these to authenticate your requests. These parameters are optional. If credentials are not specified, they are used from the ClickHouse configuration. For more information see [Using S3 for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree.md/#table_engine-mergetree-s3). - `format` — The [format](/docs/en/interfaces/formats.md/#formats) of the file. - `structure` — Structure of the table. Format `'column1_name column1_type, column2_name column2_type, ...'`. From 764abb641089bb220038704673377b55531fc1d7 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Wed, 11 Jan 2023 15:55:18 +0300 Subject: [PATCH 196/262] try to fix flaky test_ttl_move_memory_usage --- .../test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py index a1e10cde031..9d53b7c048b 100644 --- a/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py +++ b/tests/integration/test_s3_zero_copy_ttl/test_ttl_move_memory_usage.py @@ -48,7 +48,9 @@ def test_move_and_s3_memory_usage(started_single_node_cluster): ) small_node.query("system flush logs") max_usage = small_node.query( - "select max(CurrentMetric_MemoryTracking) from system.metric_log" + """select max(m.val - am.val * 4096) from + (select toStartOfMinute(event_time) as time, max(CurrentMetric_MemoryTracking) as val from system.metric_log group by time) as m join + (select toStartOfMinute(event_time) as time, min(value) as val from system.asynchronous_metric_log where metric='jemalloc.arenas.all.pdirty' group by time) as am using time""" ) # 3G limit is a big one. However, we can hit it anyway with parallel s3 writes enabled. # Also actual value can be bigger because of memory drift. From a79f6d19fa0d8cad4a38255f7ae547e1bfee02bf Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 11 Jan 2023 13:04:05 +0000 Subject: [PATCH 197/262] add docs for `system.moves` table --- docs/en/operations/system-tables/moves.md | 42 +++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 docs/en/operations/system-tables/moves.md diff --git a/docs/en/operations/system-tables/moves.md b/docs/en/operations/system-tables/moves.md new file mode 100644 index 00000000000..e790946a15f --- /dev/null +++ b/docs/en/operations/system-tables/moves.md @@ -0,0 +1,42 @@ +--- +slug: /en/operations/system-tables/moves +--- +# moves + +The table contains information about in-progress [data part moves](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) of [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) tables. Each data part movement is represented by a single row. + +Columns: + +- `database` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the database. + +- `table` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the table containing moving data part. + +- `elapsed` ([Float64](../../sql-reference/data-types/float.md)) — Time elapsed (in seconds) since data part movement started. + +- `target_disk_name` ([String](disks.md)) — Name of [disk](/docs/en/operations/system-tables/disks/) to which the data part is moving. + +- `target_disk_path` ([String](disks.md)) — Path to the mount point of the [disk](/docs/en/operations/system-tables/disks/) in the file system. + +- `part_name` ([String](/docs/en/sql-reference/data-types/string.md)) — Name of the data part being moved. + +- `part_size` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Data part size. + +- `thread_id` ([UInt64](../../sql-reference/data-types/int-uint.md)) — Identifier of a thread performing the movement. + +**Example** + +```sql +SELECT * FROM system.moves +``` + +```text +┌─database─┬─table─┬─────elapsed─┬─target_disk_name─┬─target_disk_path─┬─part_name─┬─part_size─┬─thread_id─┐ +│ default │ test2 │ 1.668056039 │ s3 │ ./disks/s3/ │ all_3_3_0 │ 136 │ 296146 │ +└──────────┴───────┴─────────────┴──────────────────┴──────────────────┴───────────┴───────────┴───────────┘ +``` + +**See Also** + +- [MergeTree](/docs/en/engines/table-engines/mergetree-family/mergetree.md) table engine +- [Using Multiple Block Devices for Data Storage](/docs/en/engines/table-engines/mergetree-family/mergetree#table_engine-mergetree-multiple-volumes) +- [ALTER TABLE ... MOVE PART](/docs/en/sql-reference/statements/alter/partition#move-partitionpart) command From 4767147745787180647243cafd76b939ed09dc25 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 11 Jan 2023 08:23:44 -0500 Subject: [PATCH 198/262] format query response --- docs/en/operations/system-tables/moves.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/operations/system-tables/moves.md b/docs/en/operations/system-tables/moves.md index e790946a15f..54f07540507 100644 --- a/docs/en/operations/system-tables/moves.md +++ b/docs/en/operations/system-tables/moves.md @@ -29,7 +29,7 @@ Columns: SELECT * FROM system.moves ``` -```text +```response ┌─database─┬─table─┬─────elapsed─┬─target_disk_name─┬─target_disk_path─┬─part_name─┬─part_size─┬─thread_id─┐ │ default │ test2 │ 1.668056039 │ s3 │ ./disks/s3/ │ all_3_3_0 │ 136 │ 296146 │ └──────────┴───────┴─────────────┴──────────────────┴──────────────────┴───────────┴───────────┴───────────┘ From 8d099a44172837e8c54c149d3d811322c757132c Mon Sep 17 00:00:00 2001 From: serxa Date: Wed, 11 Jan 2023 13:43:51 +0000 Subject: [PATCH 199/262] make more SQL queries copyable from docs in one click --- docs/en/interfaces/formats.md | 19 ++--- docs/en/operations/system-tables/disks.md | 2 +- .../system-tables/merge_tree_settings.md | 2 +- docs/en/operations/system-tables/numbers.md | 2 +- .../en/operations/system-tables/numbers_mt.md | 2 +- docs/en/operations/system-tables/one.md | 2 +- docs/en/operations/system-tables/processes.md | 2 +- .../sql-reference/table-functions/format.md | 5 +- .../sql-reference/table-functions/format.md | 5 +- .../mergetree-family/summingmergetree.md | 4 +- docs/zh/operations/system-tables/disks.md | 2 +- .../system-tables/merge_tree_settings.md | 2 +- docs/zh/operations/system-tables/numbers.md | 2 +- docs/zh/operations/system-tables/one.md | 2 +- docs/zh/sql-reference/data-types/array.md | 69 +++++++-------- docs/zh/sql-reference/data-types/enum.md | 85 ++++++++++++------- .../data-types/special-data-types/nothing.md | 8 +- docs/zh/sql-reference/data-types/tuple.md | 36 ++++---- .../functions/functions-for-nulls.md | 85 ++++++++++--------- .../functions/other-functions.md | 75 ++++++++-------- .../sql-reference/functions/uuid-functions.md | 22 +++-- docs/zh/sql-reference/operators/index.md | 24 ++---- .../sql-reference/table-functions/format.md | 5 +- 23 files changed, 235 insertions(+), 227 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index c78b34e0b0d..75ef0ac3cc0 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -1265,7 +1265,7 @@ For input it uses the following correspondence between BSON types and ClickHouse | `\x10` int32 | [Int32/UInt32](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal32](/docs/en/sql-reference/data-types/decimal.md) | | `\x12` int64 | [Int64/UInt64](/docs/en/sql-reference/data-types/int-uint.md)/[Decimal64](/docs/en/sql-reference/data-types/decimal.md)/[DateTime64](/docs/en/sql-reference/data-types/datetime64.md) | -Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). +Other BSON types are not supported. Also, it performs conversion between different integer types (for example, you can insert BSON int32 value into ClickHouse UInt8). Big integers and decimals (Int128/UInt128/Int256/UInt256/Decimal128/Decimal256) can be parsed from BSON Binary value with `\x00` binary subtype. In this case this format will validate that the size of binary data equals the size of expected value. Note: this format don't work properly on Big-Endian platforms. @@ -2319,25 +2319,22 @@ INSERT INTO `test2` VALUES (1),(2),(3); Queries: ```sql -:) desc file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2' - -DESCRIBE TABLE file(dump.sql, MySQLDump) -SETTINGS input_format_mysql_dump_table_name = 'test2' - -Query id: 25e66c89-e10a-42a8-9b42-1ee8bbbde5ef +DESCRIBE TABLE file(dump.sql, MySQLDump) SETTINGS input_format_mysql_dump_table_name = 'test2' +``` +```text ┌─name─┬─type────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ x │ Nullable(Int32) │ │ │ │ │ │ └──────┴─────────────────┴──────────────┴────────────────────┴─────────┴──────────────────┴────────────────┘ +``` -:) select * from file(dump.sql, MySQLDump) settings input_format_mysql_dump_table_name='test2' - +```sql SELECT * FROM file(dump.sql, MySQLDump) SETTINGS input_format_mysql_dump_table_name = 'test2' +``` -Query id: 17d59664-ebce-4053-bb79-d46a516fb590 - +```text ┌─x─┐ │ 1 │ │ 2 │ diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index 4096a8c765c..d492e42c2ec 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -17,7 +17,7 @@ Columns: **Example** ```sql -:) SELECT * FROM system.disks; +SELECT * FROM system.disks; ``` ```text diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index 9f8006d77a7..a05d4abccda 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -15,7 +15,7 @@ Columns: **Example** ```sql -:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; +SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ``` ```text diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index f2204dbf0ba..0dc001ebb6f 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -12,7 +12,7 @@ Reads from this table are not parallelized. **Example** ```sql -:) SELECT * FROM system.numbers LIMIT 10; +SELECT * FROM system.numbers LIMIT 10; ``` ```text diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index deb7be7dc68..cc461b29ad0 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -10,7 +10,7 @@ Used for tests. **Example** ```sql -:) SELECT * FROM system.numbers_mt LIMIT 10; +SELECT * FROM system.numbers_mt LIMIT 10; ``` ```text diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index d71c82f5e94..ee2907a6d6d 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -12,7 +12,7 @@ This is similar to the `DUAL` table found in other DBMSs. **Example** ```sql -:) SELECT * FROM system.one LIMIT 10; +SELECT * FROM system.one LIMIT 10; ``` ```text diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 2cf15a9bb2b..95c46f551ef 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -20,7 +20,7 @@ Columns: - `is_all_data_sent` (Int8) – Was all data sent to the client (in other words query had been finished on the server). ```sql -:) SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; +SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; ``` ```text diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md index 78b67a47d4e..4a0ee58d758 100644 --- a/docs/en/sql-reference/table-functions/format.md +++ b/docs/en/sql-reference/table-functions/format.md @@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext **Query:** ``` sql -:) select * from format(JSONEachRow, +SELECT * FROM format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} @@ -49,8 +49,7 @@ $$) **Query:** ```sql - -:) desc format(JSONEachRow, +DESC format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md index 5dc463e5b27..a91b4ca2b1e 100644 --- a/docs/ru/sql-reference/table-functions/format.md +++ b/docs/ru/sql-reference/table-functions/format.md @@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext **Query:** ``` sql -:) select * from format(JSONEachRow, +SELECT * FROM format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} @@ -49,8 +49,7 @@ $$) **Query:** ```sql - -:) desc format(JSONEachRow, +DESC format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} diff --git a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md index 620a56006db..f59d327b4ae 100644 --- a/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md +++ b/docs/zh/engines/table-engines/mergetree-family/summingmergetree.md @@ -69,7 +69,9 @@ ORDER BY key 向其中插入数据: - :) INSERT INTO summtt Values(1,1),(1,2),(2,1) +``` sql +INSERT INTO summtt Values(1,1),(1,2),(2,1) +``` ClickHouse可能不会完整的汇总所有行([见下文](#data-processing)),因此我们在查询中使用了聚合函数 `sum` 和 `GROUP BY` 子句。 diff --git a/docs/zh/operations/system-tables/disks.md b/docs/zh/operations/system-tables/disks.md index 36f7e8de4f1..0e774632074 100644 --- a/docs/zh/operations/system-tables/disks.md +++ b/docs/zh/operations/system-tables/disks.md @@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/disks **示例** ```sql -:) SELECT * FROM system.disks; +SELECT * FROM system.disks; ``` ```text diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md index c3c424c01fe..c2bdcd14d24 100644 --- a/docs/zh/operations/system-tables/merge_tree_settings.md +++ b/docs/zh/operations/system-tables/merge_tree_settings.md @@ -16,7 +16,7 @@ slug: /zh/operations/system-tables/merge_tree_settings **示例** ```sql -:) SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; +SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ``` ```text diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md index f3db66f365b..8cb92351ae7 100644 --- a/docs/zh/operations/system-tables/numbers.md +++ b/docs/zh/operations/system-tables/numbers.md @@ -12,7 +12,7 @@ slug: /zh/operations/system-tables/numbers **示例** ```sql -:) SELECT * FROM system.numbers LIMIT 10; +SELECT * FROM system.numbers LIMIT 10; ``` ```text diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md index 6929b1b4245..2e44a9bd89a 100644 --- a/docs/zh/operations/system-tables/one.md +++ b/docs/zh/operations/system-tables/one.md @@ -12,7 +12,7 @@ slug: /zh/operations/system-tables/one **示例** ```sql -:) SELECT * FROM system.one LIMIT 10; +SELECT * FROM system.one LIMIT 10; ``` ```text diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index e2f18a42de8..0c38eb86004 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -19,29 +19,25 @@ slug: /zh/sql-reference/data-types/array 创建数组示例: - :) SELECT array(1, 2) AS x, toTypeName(x) +```sql +SELECT array(1, 2) AS x, toTypeName(x) +``` - SELECT - [1, 2] AS x, - toTypeName(x) +```text +┌─x─────┬─toTypeName(array(1, 2))─┐ +│ [1,2] │ Array(UInt8) │ +└───────┴─────────────────────────┘ +``` - ┌─x─────┬─toTypeName(array(1, 2))─┐ - │ [1,2] │ Array(UInt8) │ - └───────┴─────────────────────────┘ +``` sql +SELECT [1, 2] AS x, toTypeName(x) +``` - 1 rows in set. Elapsed: 0.002 sec. - - :) SELECT [1, 2] AS x, toTypeName(x) - - SELECT - [1, 2] AS x, - toTypeName(x) - - ┌─x─────┬─toTypeName([1, 2])─┐ - │ [1,2] │ Array(UInt8) │ - └───────┴────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +``` text +┌─x─────┬─toTypeName([1, 2])─┐ +│ [1,2] │ Array(UInt8) │ +└───────┴────────────────────┘ +``` ## 使用数据类型 {#shi-yong-shu-ju-lei-xing} @@ -50,26 +46,23 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素 如果 ClickHouse 无法确定数据类型,它将产生异常。当尝试同时创建一个包含字符串和数字的数组时会发生这种情况 (`SELECT array(1, 'a')`)。 自动数据类型检测示例: +```sql +SELECT array(1, 2, NULL) AS x, toTypeName(x) +``` - :) SELECT array(1, 2, NULL) AS x, toTypeName(x) - - SELECT - [1, 2, NULL] AS x, - toTypeName(x) - - ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐ - │ [1,2,NULL] │ Array(Nullable(UInt8)) │ - └────────────┴───────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```text +┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐ +│ [1,2,NULL] │ Array(Nullable(UInt8)) │ +└────────────┴───────────────────────────────┘ +``` 如果您尝试创建不兼容的数据类型数组,ClickHouse 将引发异常: - :) SELECT array(1, 'a') +```sql +SELECT array(1, 'a') +``` - SELECT [1, 'a'] - - Received exception from server (version 1.1.54388): - Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. - - 0 rows in set. Elapsed: 0.246 sec. +```text +Received exception from server (version 1.1.54388): +Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. +``` diff --git a/docs/zh/sql-reference/data-types/enum.md b/docs/zh/sql-reference/data-types/enum.md index 0cf8a02d76b..9832df3da02 100644 --- a/docs/zh/sql-reference/data-types/enum.md +++ b/docs/zh/sql-reference/data-types/enum.md @@ -20,49 +20,64 @@ slug: /zh/sql-reference/data-types/enum 这个 `x` 列只能存储类型定义中列出的值:`'hello'`或`'world'`。如果您尝试保存任何其他值,ClickHouse 抛出异常。 - :) INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello') +```sql +INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello') +``` - INSERT INTO t_enum VALUES +```text +Ok. - Ok. +3 rows in set. Elapsed: 0.002 sec. +``` - 3 rows in set. Elapsed: 0.002 sec. +```sql +INSERT INTO t_enum VALUES('a') +``` - :) insert into t_enum values('a') - - INSERT INTO t_enum VALUES - - - Exception on client: - Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2) +```text +Exception on client: +Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2) +``` 当您从表中查询数据时,ClickHouse 从 `Enum` 中输出字符串值。 - SELECT * FROM t_enum +```sql +SELECT * FROM t_enum +``` - ┌─x─────┐ - │ hello │ - │ world │ - │ hello │ - └───────┘ +```text +┌─x─────┐ +│ hello │ +│ world │ +│ hello │ +└───────┘ +``` 如果需要看到对应行的数值,则必须将 `Enum` 值转换为整数类型。 - SELECT CAST(x, 'Int8') FROM t_enum +```sql +SELECT CAST(x, 'Int8') FROM t_enum +``` - ┌─CAST(x, 'Int8')─┐ - │ 1 │ - │ 2 │ - │ 1 │ - └─────────────────┘ +```text +┌─CAST(x, 'Int8')─┐ +│ 1 │ +│ 2 │ +│ 1 │ +└─────────────────┘ +``` 在查询中创建枚举值,您还需要使用 `CAST`。 - SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)')) +```sql +SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)')) +``` - ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐ - │ Enum8('a' = 1, 'b' = 2) │ - └──────────────────────────────────────────────────────┘ +```text +┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐ +│ Enum8('a' = 1, 'b' = 2) │ +└──────────────────────────────────────────────────────┘ +``` ## 规则及用法 {#gui-ze-ji-yong-fa} @@ -72,15 +87,19 @@ slug: /zh/sql-reference/data-types/enum `Enum` 包含在 [可为空](nullable.md) 类型中。因此,如果您使用此查询创建一个表 - CREATE TABLE t_enum_nullable - ( - x Nullable( Enum8('hello' = 1, 'world' = 2) ) - ) - ENGINE = TinyLog +```sql +CREATE TABLE t_enum_nullable +( + x Nullable( Enum8('hello' = 1, 'world' = 2) ) +) +ENGINE = TinyLog +``` 不仅可以存储 `'hello'` 和 `'world'` ,还可以存储 `NULL`。 - INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) +```sql +INSERT INTO t_enum_nullable Values('hello'),('world'),(NULL) +``` 在内存中,`Enum` 列的存储方式与相应数值的 `Int8` 或 `Int16` 相同。 diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md index 2b10934f566..e123622edf6 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md @@ -9,11 +9,11 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing `Nothing` 类型也可以用来表示空数组: -``` bash -:) SELECT toTypeName(array()) - -SELECT toTypeName([]) +```sql +SELECT toTypeName(array()) +``` +```text ┌─toTypeName(array())─┐ │ Array(Nothing) │ └─────────────────────┘ diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md index e991fa7145a..905a872da24 100644 --- a/docs/zh/sql-reference/data-types/tuple.md +++ b/docs/zh/sql-reference/data-types/tuple.md @@ -17,17 +17,15 @@ slug: /zh/sql-reference/data-types/tuple 创建元组的示例: - :) SELECT tuple(1,'a') AS x, toTypeName(x) +```sql +SELECT tuple(1,'a') AS x, toTypeName(x) +``` - SELECT - (1, 'a') AS x, - toTypeName(x) - - ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐ - │ (1,'a') │ Tuple(UInt8, String) │ - └─────────┴───────────────────────────┘ - - 1 rows in set. Elapsed: 0.021 sec. +```text +┌─x───────┬─toTypeName(tuple(1, 'a'))─┐ +│ (1,'a') │ Tuple(UInt8, String) │ +└─────────┴───────────────────────────┘ +``` ## 元组中的数据类型 {#yuan-zu-zhong-de-shu-ju-lei-xing} @@ -35,14 +33,12 @@ slug: /zh/sql-reference/data-types/tuple 自动数据类型检测示例: - SELECT tuple(1, NULL) AS x, toTypeName(x) +```sql +SELECT tuple(1, NULL) AS x, toTypeName(x) +``` - SELECT - (1, NULL) AS x, - toTypeName(x) - - ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐ - │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │ - └──────────┴─────────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```text +┌─x────────┬─toTypeName(tuple(1, NULL))──────┐ +│ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │ +└──────────┴─────────────────────────────────┘ +``` diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md index 1ae53f5ddc1..9ecf39e56c5 100644 --- a/docs/zh/sql-reference/functions/functions-for-nulls.md +++ b/docs/zh/sql-reference/functions/functions-for-nulls.md @@ -22,24 +22,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls 存在以下内容的表 - ┌─x─┬────y─┐ - │ 1 │ ᴺᵁᴸᴸ │ - │ 2 │ 3 │ - └───┴──────┘ +```text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` 对其进行查询 - :) SELECT x FROM t_null WHERE isNull(y) +```sql +SELECT x FROM t_null WHERE isNull(y) +``` - SELECT x - FROM t_null - WHERE isNull(y) - - ┌─x─┐ - │ 1 │ - └───┘ - - 1 rows in set. Elapsed: 0.010 sec. +```text +┌─x─┐ +│ 1 │ +└───┘ +``` ## isNotNull {#isnotnull} @@ -60,24 +60,24 @@ slug: /zh/sql-reference/functions/functions-for-nulls 存在以下内容的表 - ┌─x─┬────y─┐ - │ 1 │ ᴺᵁᴸᴸ │ - │ 2 │ 3 │ - └───┴──────┘ +```text +┌─x─┬────y─┐ +│ 1 │ ᴺᵁᴸᴸ │ +│ 2 │ 3 │ +└───┴──────┘ +``` 对其进行查询 - :) SELECT x FROM t_null WHERE isNotNull(y) +```sql +SELECT x FROM t_null WHERE isNotNull(y) +``` - SELECT x - FROM t_null - WHERE isNotNull(y) - - ┌─x─┐ - │ 2 │ - └───┘ - - 1 rows in set. Elapsed: 0.010 sec. +```text +┌─x─┐ +│ 2 │ +└───┘ +``` ## 合并 {#coalesce} @@ -98,26 +98,27 @@ slug: /zh/sql-reference/functions/functions-for-nulls 考虑可以指定多种联系客户的方式的联系人列表。 - ┌─name─────┬─mail─┬─phone─────┬──icq─┐ - │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ - │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ - └──────────┴──────┴───────────┴──────┘ +```text +┌─name─────┬─mail─┬─phone─────┬──icq─┐ +│ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ +│ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ +└──────────┴──────┴───────────┴──────┘ +``` `mail`和`phone`字段是String类型,但`icq`字段是`UInt32`,所以它需要转换为`String`。 从联系人列表中获取客户的第一个可用联系方式: - :) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook +```sql +SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook +``` - SELECT coalesce(mail, phone, CAST(icq, 'Nullable(String)')) - FROM aBook - - ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ - │ client 1 │ 123-45-67 │ - │ client 2 │ ᴺᵁᴸᴸ │ - └──────────┴──────────────────────────────────────────────────────┘ - - 2 rows in set. Elapsed: 0.006 sec. +```text +┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ +│ client 1 │ 123-45-67 │ +│ client 2 │ ᴺᵁᴸᴸ │ +└──────────┴──────────────────────────────────────────────────────┘ +``` ## ifNull {#ifnull} diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index 07acf8fdfe0..a5c67e94921 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -398,23 +398,25 @@ FROM **`toTypeName ' 与 ' toColumnTypeName`的区别示例** - :) select toTypeName(cast('2018-01-01 01:02:03' AS DateTime)) +```sql +SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) +``` - SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) +```text +┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ +│ DateTime │ +└─────────────────────────────────────────────────────┘ +``` - ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ - │ DateTime │ - └─────────────────────────────────────────────────────┘ +```sql +SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) +``` - 1 rows in set. Elapsed: 0.008 sec. - - :) select toColumnTypeName(cast('2018-01-01 01:02:03' AS DateTime)) - - SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) - - ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ - │ Const(UInt32) │ - └───────────────────────────────────────────────────────────┘ +```text +┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ +│ Const(UInt32) │ +└───────────────────────────────────────────────────────────┘ +``` 该示例显示`DateTime`数据类型作为`Const(UInt32)`存储在内存中。 @@ -460,26 +462,25 @@ FROM **示例** - :) SELECT defaultValueOfArgumentType( CAST(1 AS Int8) ) +```sql +SELECT defaultValueOfArgumentType(CAST(1, 'Int8')) +``` - SELECT defaultValueOfArgumentType(CAST(1, 'Int8')) +```text +┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ +│ 0 │ +└─────────────────────────────────────────────┘ +``` - ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ - │ 0 │ - └─────────────────────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. - - :) SELECT defaultValueOfArgumentType( CAST(1 AS Nullable(Int8) ) ) - - SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) - - ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ - │ ᴺᵁᴸᴸ │ - └───────────────────────────────────────────────────────┘ - - 1 rows in set. Elapsed: 0.002 sec. +```sql +SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) +``` +```text +┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ +│ ᴺᵁᴸᴸ │ +└───────────────────────────────────────────────────────┘ +``` ## indexHint {#indexhint} 输出符合索引选择范围内的所有数据,同时不实用参数中的表达式进行过滤。 @@ -506,9 +507,11 @@ SELECT count() FROM ontime 对该表进行如下的查询: +```sql +SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k ``` -:) SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k +```text SELECT FlightDate AS k, count() @@ -530,9 +533,11 @@ ORDER BY k ASC 在这个查询中,由于没有使用索引,所以ClickHouse将处理整个表的所有数据(`Processed 4.28 million rows`)。使用下面的查询尝试使用索引进行查询: +```sql +SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k ``` -:) SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k +```text SELECT FlightDate AS k, count() @@ -552,9 +557,11 @@ ORDER BY k ASC 现在将表达式`k = '2017-09-15'`传递给`indexHint`函数: +```sql +SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k ``` -:) SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k +```text SELECT FlightDate AS k, count() diff --git a/docs/zh/sql-reference/functions/uuid-functions.md b/docs/zh/sql-reference/functions/uuid-functions.md index 8ee65dd52d0..e635fd4fba8 100644 --- a/docs/zh/sql-reference/functions/uuid-functions.md +++ b/docs/zh/sql-reference/functions/uuid-functions.md @@ -21,13 +21,13 @@ UUID类型的值。 此示例演示如何在表中创建UUID类型的列,并对其写入数据。 -``` sql -:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog - -:) INSERT INTO t_uuid SELECT generateUUIDv4() - -:) SELECT * FROM t_uuid +```sql +CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog +INSERT INTO t_uuid SELECT generateUUIDv4() +SELECT * FROM t_uuid +``` +```text ┌────────────────────────────────────x─┐ │ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │ └──────────────────────────────────────┘ @@ -47,9 +47,11 @@ UUID类型的值 **使用示例** -``` sql -:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid +```sql +SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid +``` +```text ┌─────────────────────────────────uuid─┐ │ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │ └──────────────────────────────────────┘ @@ -70,10 +72,12 @@ UUIDStringToNum(String) **使用示例** ``` sql -:) SELECT +SELECT '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid, UUIDStringToNum(uuid) AS bytes +``` +```text ┌─uuid─────────────────────────────────┬─bytes────────────┐ │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │ └──────────────────────────────────────┴──────────────────┘ diff --git a/docs/zh/sql-reference/operators/index.md b/docs/zh/sql-reference/operators/index.md index 7e0bd9a9cfb..8544f9f5a91 100644 --- a/docs/zh/sql-reference/operators/index.md +++ b/docs/zh/sql-reference/operators/index.md @@ -226,18 +226,14 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。 -``` bash -:) SELECT x+100 FROM t_null WHERE y IS NULL - -SELECT x + 100 -FROM t_null -WHERE isNull(y) +``` sql +SELECT x+100 FROM t_null WHERE y IS NULL +``` +``` text ┌─plus(x, 100)─┐ │ 101 │ └──────────────┘ - -1 rows in set. Elapsed: 0.002 sec. ``` ### IS NOT NULL {#is-not-null} @@ -249,16 +245,12 @@ WHERE isNull(y) -``` bash -:) SELECT * FROM t_null WHERE y IS NOT NULL - -SELECT * -FROM t_null -WHERE isNotNull(y) +``` sql +SELECT * FROM t_null WHERE y IS NOT NULL +``` +``` text ┌─x─┬─y─┐ │ 2 │ 3 │ └───┴───┘ - -1 rows in set. Elapsed: 0.002 sec. ``` diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md index ea2087fde5e..bc017ccc3c7 100644 --- a/docs/zh/sql-reference/table-functions/format.md +++ b/docs/zh/sql-reference/table-functions/format.md @@ -27,7 +27,7 @@ A table with data parsed from `data` argument according specified format and ext **Query:** ``` sql -:) select * from format(JSONEachRow, +SELECT * FROM format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} @@ -49,8 +49,7 @@ $$) **Query:** ```sql - -:) desc format(JSONEachRow, +DESC format(JSONEachRow, $$ {"a": "Hello", "b": 111} {"a": "World", "b": 123} From ed63f88cd12b7824147634b9dbb4f58c394e35f3 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 14:54:26 +0100 Subject: [PATCH 200/262] Improve README.md for clickhouse-com-content --- docs/tools/release.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/tools/release.sh b/docs/tools/release.sh index 67499631baa..c198f488822 100755 --- a/docs/tools/release.sh +++ b/docs/tools/release.sh @@ -25,7 +25,10 @@ then # Add files. cp -R "${BUILD_DIR}"/* . echo -n "${BASE_DOMAIN}" > CNAME - echo -n "" > README.md + cat > README.md << 'EOF' +## This repo is the source for https://content.clickhouse.com +It's built in [the action](https://github.com/ClickHouse/ClickHouse/blob/master/.github/workflows/docs_release.yml) in the DocsRelease job. +EOF echo -n "" > ".nojekyll" cp "${BASE_DIR}/../../LICENSE" . git add ./* From fe8f373aa6af3c710eca067192834e522e9e71d8 Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 11 Jan 2023 15:20:18 +0100 Subject: [PATCH 201/262] Less stupid tests --- contrib/azure | 2 +- src/Disks/tests/gtest_azure_xml_reader.cpp | 10 +++---- src/Disks/tests/gtest_disk.cpp | 34 +++++----------------- src/Disks/tests/gtest_disk.h | 8 ----- src/Storages/tests/gtest_storage_log.cpp | 10 ++----- 5 files changed, 15 insertions(+), 49 deletions(-) diff --git a/contrib/azure b/contrib/azure index 000f7ee8fd2..0d2a6b84021 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 000f7ee8fd22fa69e5ddb8fd6fd36b12c7a1bc2f +Subproject commit 0d2a6b840215fdfb1733287f3dc236d46ee0f268 diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp index 3caf34f938a..b3c14e7b8bd 100644 --- a/src/Disks/tests/gtest_azure_xml_reader.cpp +++ b/src/Disks/tests/gtest_azure_xml_reader.cpp @@ -16,12 +16,10 @@ TEST(AzureXMLWrapper, TestLeak) { std::string str = "world"; - { - Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length()); - reader.Read(); - Azure::Storage::_internal::XmlReader reader2(std::move(reader)); - Azure::Storage::_internal::XmlReader reader3 = std::move(reader2); - } + Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length()); + reader.Read(); + Azure::Storage::_internal::XmlReader reader2(std::move(reader)); + Azure::Storage::_internal::XmlReader reader3 = std::move(reader2); } #endif diff --git a/src/Disks/tests/gtest_disk.cpp b/src/Disks/tests/gtest_disk.cpp index 8a24873c5ed..2b9db7e5ea2 100644 --- a/src/Disks/tests/gtest_disk.cpp +++ b/src/Disks/tests/gtest_disk.cpp @@ -7,49 +7,29 @@ namespace fs = std::filesystem; -template -DB::DiskPtr createDisk(); - - -template <> -DB::DiskPtr createDisk() +DB::DiskPtr createDisk() { fs::create_directory("tmp/"); return std::make_shared("local_disk", "tmp/", 0); } - -template void destroyDisk(DB::DiskPtr & disk) -{ - disk.reset(); -} - - -template <> -void destroyDisk(DB::DiskPtr & disk) { disk.reset(); fs::remove_all("tmp/"); } - -template class DiskTest : public testing::Test { public: - void SetUp() override { disk = createDisk(); } - void TearDown() override { destroyDisk(disk); } + void SetUp() override { disk = createDisk(); } + void TearDown() override { destroyDisk(disk); } DB::DiskPtr disk; }; -using DiskImplementations = testing::Types; -TYPED_TEST_SUITE(DiskTest, DiskImplementations); - - -TYPED_TEST(DiskTest, createDirectories) +TEST_F(DiskTest, createDirectories) { this->disk->createDirectories("test_dir1/"); EXPECT_TRUE(this->disk->isDirectory("test_dir1/")); @@ -59,7 +39,7 @@ TYPED_TEST(DiskTest, createDirectories) } -TYPED_TEST(DiskTest, writeFile) +TEST_F(DiskTest, writeFile) { { std::unique_ptr out = this->disk->writeFile("test_file"); @@ -77,7 +57,7 @@ TYPED_TEST(DiskTest, writeFile) } -TYPED_TEST(DiskTest, readFile) +TEST_F(DiskTest, readFile) { { std::unique_ptr out = this->disk->writeFile("test_file"); @@ -112,7 +92,7 @@ TYPED_TEST(DiskTest, readFile) } -TYPED_TEST(DiskTest, iterateDirectory) +TEST_F(DiskTest, iterateDirectory) { this->disk->createDirectories("test_dir/nested_dir/"); diff --git a/src/Disks/tests/gtest_disk.h b/src/Disks/tests/gtest_disk.h index 07a1269bb2e..3f0e84f3961 100644 --- a/src/Disks/tests/gtest_disk.h +++ b/src/Disks/tests/gtest_disk.h @@ -3,14 +3,6 @@ #include #include -template DB::DiskPtr createDisk(); -template <> -DB::DiskPtr createDisk(); - -template void destroyDisk(DB::DiskPtr & disk); - -template <> -void destroyDisk(DB::DiskPtr & disk); diff --git a/src/Storages/tests/gtest_storage_log.cpp b/src/Storages/tests/gtest_storage_log.cpp index c9613f1512d..b63de6a66ef 100644 --- a/src/Storages/tests/gtest_storage_log.cpp +++ b/src/Storages/tests/gtest_storage_log.cpp @@ -39,21 +39,20 @@ DB::StoragePtr createStorage(DB::DiskPtr & disk) return table; } -template class StorageLogTest : public testing::Test { public: void SetUp() override { - disk = createDisk(); + disk = createDisk(); table = createStorage(disk); } void TearDown() override { table->flushAndShutdown(); - destroyDisk(disk); + destroyDisk(disk); } const DB::DiskPtr & getDisk() { return disk; } @@ -65,9 +64,6 @@ private: }; -using DiskImplementations = testing::Types; -TYPED_TEST_SUITE(StorageLogTest, DiskImplementations); - // Returns data written to table in Values format. std::string writeData(int rows, DB::StoragePtr & table, const DB::ContextPtr context) { @@ -153,7 +149,7 @@ std::string readData(DB::StoragePtr & table, const DB::ContextPtr context) return out_buf.str(); } -TYPED_TEST(StorageLogTest, testReadWrite) +TEST_F(StorageLogTest, testReadWrite) { using namespace DB; const auto & context_holder = getContext(); From e581a56ed0d8620fc1266e7e2487a2150435f47d Mon Sep 17 00:00:00 2001 From: alesapin Date: Wed, 11 Jan 2023 15:30:48 +0100 Subject: [PATCH 202/262] Better --- contrib/azure | 2 +- src/Disks/tests/gtest_azure_xml_reader.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/contrib/azure b/contrib/azure index 0d2a6b84021..3b857189b40 160000 --- a/contrib/azure +++ b/contrib/azure @@ -1 +1 @@ -Subproject commit 0d2a6b840215fdfb1733287f3dc236d46ee0f268 +Subproject commit 3b857189b401e68f34c3cd164f5b270887c76b86 diff --git a/src/Disks/tests/gtest_azure_xml_reader.cpp b/src/Disks/tests/gtest_azure_xml_reader.cpp index b3c14e7b8bd..8cb352ad2f7 100644 --- a/src/Disks/tests/gtest_azure_xml_reader.cpp +++ b/src/Disks/tests/gtest_azure_xml_reader.cpp @@ -17,9 +17,9 @@ TEST(AzureXMLWrapper, TestLeak) std::string str = "world"; Azure::Storage::_internal::XmlReader reader(str.c_str(), str.length()); - reader.Read(); Azure::Storage::_internal::XmlReader reader2(std::move(reader)); Azure::Storage::_internal::XmlReader reader3 = std::move(reader2); + reader3.Read(); } #endif From b3fc6a970625f26d60b1199caf45a4ff25ceab9a Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 15:43:57 +0100 Subject: [PATCH 203/262] Clean out unused code for old site/docs building system --- docs/tools/build.py | 120 ++++++++++++++++++----------- docs/tools/make_links.sh | 22 ------ docs/tools/mdx_clickhouse.py | 142 ----------------------------------- docs/tools/redirects.py | 53 ------------- docs/tools/requirements.txt | 29 ------- docs/tools/util.py | 136 --------------------------------- docs/tools/website.py | 63 ---------------- 7 files changed, 77 insertions(+), 488 deletions(-) delete mode 100755 docs/tools/make_links.sh delete mode 100755 docs/tools/mdx_clickhouse.py delete mode 100644 docs/tools/redirects.py delete mode 100644 docs/tools/util.py delete mode 100644 docs/tools/website.py diff --git a/docs/tools/build.py b/docs/tools/build.py index 7f78af5e203..5653a9b949d 100755 --- a/docs/tools/build.py +++ b/docs/tools/build.py @@ -1,45 +1,96 @@ #!/usr/bin/env python3 +from pathlib import Path import argparse import logging -import os import shutil -import subprocess import sys import livereload -import redirects -import website + +def write_redirect_html(output_path: Path, to_url: str) -> None: + output_dir = output_path.parent + output_dir.mkdir(parents=True, exist_ok=True) + output_path.write_text( + f""" + + + + + + + Page Redirection + + + If you are not redirected automatically, follow this link. + +""" + ) -def build(args): - if os.path.exists(args.output_dir): +def build_static_redirects(output_dir: Path): + for static_redirect in [ + ("benchmark.html", "/benchmark/dbms/"), + ("benchmark_hardware.html", "/benchmark/hardware/"), + ( + "tutorial.html", + "/docs/en/getting_started/tutorial/", + ), + ( + "reference_en.html", + "/docs/en/single/", + ), + ( + "reference_ru.html", + "/docs/ru/single/", + ), + ( + "docs/index.html", + "/docs/en/", + ), + ]: + write_redirect_html(output_dir / static_redirect[0], static_redirect[1]) + + +def build(root_dir: Path, output_dir: Path): + if output_dir.exists(): shutil.rmtree(args.output_dir) - if not args.skip_website: - website.build_website(args) - redirects.build_static_redirects(args) + (output_dir / "data").mkdir(parents=True) + + logging.info("Building website") + + # This file can be requested to check for available ClickHouse releases. + shutil.copy2( + root_dir / "utils" / "list-versions" / "version_date.tsv", + output_dir / "data" / "version_date.tsv", + ) + + # This file can be requested to install ClickHouse. + shutil.copy2( + root_dir / "docs" / "_includes" / "install" / "universal.sh", + output_dir / "data" / "install.sh", + ) + + build_static_redirects(output_dir) if __name__ == "__main__": - os.chdir(os.path.join(os.path.dirname(__file__), "..")) + root_dir = Path(__file__).parent.parent.parent + docs_dir = root_dir / "docs" - # A root path to ClickHouse source code. - src_dir = ".." - - website_dir = os.path.join(src_dir, "website") - - arg_parser = argparse.ArgumentParser() - arg_parser.add_argument("--lang", default="en,ru,zh,ja") - arg_parser.add_argument("--theme-dir", default=website_dir) - arg_parser.add_argument("--website-dir", default=website_dir) - arg_parser.add_argument("--src-dir", default=src_dir) - arg_parser.add_argument("--output-dir", default="build") - arg_parser.add_argument("--nav-limit", type=int, default="0") - arg_parser.add_argument("--skip-multi-page", action="store_true") - arg_parser.add_argument("--skip-website", action="store_true") - arg_parser.add_argument("--htmlproofer", action="store_true") + arg_parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + arg_parser.add_argument( + "--output-dir", + type=Path, + default=docs_dir / "build", + help="path to the output dir", + ) arg_parser.add_argument("--livereload", type=int, default="0") arg_parser.add_argument("--verbose", action="store_true") @@ -49,26 +100,9 @@ if __name__ == "__main__": level=logging.DEBUG if args.verbose else logging.INFO, stream=sys.stderr ) - logging.getLogger("MARKDOWN").setLevel(logging.INFO) - - args.rev = ( - subprocess.check_output("git rev-parse HEAD", shell=True) - .decode("utf-8") - .strip() - ) - args.rev_short = ( - subprocess.check_output("git rev-parse --short HEAD", shell=True) - .decode("utf-8") - .strip() - ) - args.rev_url = f"https://github.com/ClickHouse/ClickHouse/commit/{args.rev}" - - build(args) + build(root_dir, args.output_dir) if args.livereload: - new_args = [arg for arg in sys.argv if not arg.startswith("--livereload")] - new_args = sys.executable + " " + " ".join(new_args) - server = livereload.Server() server.serve(root=args.output_dir, host="0.0.0.0", port=args.livereload) sys.exit(0) diff --git a/docs/tools/make_links.sh b/docs/tools/make_links.sh deleted file mode 100755 index 801086178bf..00000000000 --- a/docs/tools/make_links.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/bash - -# Fixes missing documentation in other languages -# by putting relative symbolic links to the original doc file. - -BASE_DIR=$(dirname $(readlink -f $0)) - -function do_make_links() -{ - set -x - langs=(en zh ru ja) - src_file="$1" - for lang in "${langs[@]}" - do - dst_file="${src_file/\/en\///${lang}/}" - mkdir -p $(dirname "${dst_file}") - ln -sr "${src_file}" "${dst_file}" 2>/dev/null - done -} - -export -f do_make_links -find "${BASE_DIR}/../en" -iname '*.md' -exec /bin/bash -c 'do_make_links "{}"' \; diff --git a/docs/tools/mdx_clickhouse.py b/docs/tools/mdx_clickhouse.py deleted file mode 100755 index bce9f215759..00000000000 --- a/docs/tools/mdx_clickhouse.py +++ /dev/null @@ -1,142 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - - -import datetime -import os -import subprocess - -import jinja2 -import markdown.inlinepatterns -import markdown.extensions -import markdown.util -import macros.plugin - -import slugify as slugify_impl - - -def slugify(value, separator): - return slugify_impl.slugify( - value, separator=separator, word_boundary=True, save_order=True - ) - - -MARKDOWN_EXTENSIONS = [ - "mdx_clickhouse", - "admonition", - "attr_list", - "def_list", - "codehilite", - "nl2br", - "sane_lists", - "pymdownx.details", - "pymdownx.magiclink", - "pymdownx.superfences", - "extra", - {"toc": {"permalink": True, "slugify": slugify}}, -] - - -class ClickHouseLinkMixin(object): - def handleMatch(self, m, data): - try: - el, start, end = super(ClickHouseLinkMixin, self).handleMatch(m, data) - except IndexError: - return - - if el is not None: - href = el.get("href") or "" - is_external = href.startswith("http:") or href.startswith("https:") - if is_external: - if not href.startswith("https://clickhouse.com"): - el.set("rel", "external nofollow noreferrer") - return el, start, end - - -class ClickHouseAutolinkPattern( - ClickHouseLinkMixin, markdown.inlinepatterns.AutolinkInlineProcessor -): - pass - - -class ClickHouseLinkPattern( - ClickHouseLinkMixin, markdown.inlinepatterns.LinkInlineProcessor -): - pass - - -class ClickHousePreprocessor(markdown.util.Processor): - def run(self, lines): - for line in lines: - if "" not in line: - yield line - - -class ClickHouseMarkdown(markdown.extensions.Extension): - def extendMarkdown(self, md, md_globals): - md.preprocessors["clickhouse"] = ClickHousePreprocessor() - md.inlinePatterns["link"] = ClickHouseLinkPattern( - markdown.inlinepatterns.LINK_RE, md - ) - md.inlinePatterns["autolink"] = ClickHouseAutolinkPattern( - markdown.inlinepatterns.AUTOLINK_RE, md - ) - - -def makeExtension(**kwargs): - return ClickHouseMarkdown(**kwargs) - - -def get_translations(dirname, lang): - import babel.support - - return babel.support.Translations.load(dirname=dirname, locales=[lang, "en"]) - - -class PatchedMacrosPlugin(macros.plugin.MacrosPlugin): - disabled = False - - def on_config(self, config): - super(PatchedMacrosPlugin, self).on_config(config) - self.env.comment_start_string = "{##" - self.env.comment_end_string = "##}" - self.env.loader = jinja2.FileSystemLoader( - [ - os.path.join(config.data["site_dir"]), - os.path.join(config.data["extra"]["includes_dir"]), - ] - ) - - def on_env(self, env, config, files): - import util - - env.add_extension("jinja2.ext.i18n") - dirname = os.path.join(config.data["theme"].dirs[0], "locale") - lang = config.data["theme"]["language"] - env.install_gettext_translations(get_translations(dirname, lang), newstyle=True) - util.init_jinja2_filters(env) - return env - - def render(self, markdown): - if not self.disabled: - return self.render_impl(markdown) - else: - return markdown - - def on_page_markdown(self, markdown, page, config, files): - markdown = super(PatchedMacrosPlugin, self).on_page_markdown( - markdown, page, config, files - ) - - if os.path.islink(page.file.abs_src_path): - lang = config.data["theme"]["language"] - page.canonical_url = page.canonical_url.replace(f"/{lang}/", "/en/", 1) - - return markdown - - def render_impl(self, markdown): - md_template = self.env.from_string(markdown) - return md_template.render(**self.variables) - - -macros.plugin.MacrosPlugin = PatchedMacrosPlugin diff --git a/docs/tools/redirects.py b/docs/tools/redirects.py deleted file mode 100644 index 1b5490a040f..00000000000 --- a/docs/tools/redirects.py +++ /dev/null @@ -1,53 +0,0 @@ -import os - - -def write_redirect_html(out_path, to_url): - out_dir = os.path.dirname(out_path) - try: - os.makedirs(out_dir) - except OSError: - pass - with open(out_path, "w") as f: - f.write( - f""" - - - - - - - Page Redirection - - - If you are not redirected automatically, follow this link. - -""" - ) - - -def build_static_redirects(args): - for static_redirect in [ - ("benchmark.html", "/benchmark/dbms/"), - ("benchmark_hardware.html", "/benchmark/hardware/"), - ( - "tutorial.html", - "/docs/en/getting_started/tutorial/", - ), - ( - "reference_en.html", - "/docs/en/single/", - ), - ( - "reference_ru.html", - "/docs/ru/single/", - ), - ( - "docs/index.html", - "/docs/en/", - ), - ]: - write_redirect_html( - os.path.join(args.output_dir, static_redirect[0]), static_redirect[1] - ) diff --git a/docs/tools/requirements.txt b/docs/tools/requirements.txt index afd6b1a889d..0e0f7c6d044 100644 --- a/docs/tools/requirements.txt +++ b/docs/tools/requirements.txt @@ -1,30 +1 @@ -Babel==2.9.1 -Jinja2==3.0.3 -Markdown==3.3.2 -MarkupSafe==2.1.1 -PyYAML==6.0 -Pygments>=2.12.0 -beautifulsoup4==4.9.1 -click==7.1.2 -ghp_import==2.1.0 -importlib_metadata==4.11.4 -jinja2-highlight==0.6.1 livereload==2.6.3 -mergedeep==1.3.4 -mkdocs-macros-plugin==0.4.20 -mkdocs-macros-test==0.1.0 -mkdocs-material==8.2.15 -mkdocs==1.3.0 -mkdocs_material_extensions==1.0.3 -packaging==21.3 -pymdown_extensions==9.4 -pyparsing==3.0.9 -python-slugify==4.0.1 -python_dateutil==2.8.2 -pytz==2022.1 -six==1.15.0 -soupsieve==2.3.2 -termcolor==1.1.0 -text_unidecode==1.3 -tornado==6.1 -zipp==3.8.0 diff --git a/docs/tools/util.py b/docs/tools/util.py deleted file mode 100644 index dc9fb640b47..00000000000 --- a/docs/tools/util.py +++ /dev/null @@ -1,136 +0,0 @@ -import collections -import contextlib -import datetime -import multiprocessing -import os -import shutil -import sys -import socket -import tempfile -import threading - -import jinja2 -import yaml - - -@contextlib.contextmanager -def temp_dir(): - path = tempfile.mkdtemp(dir=os.environ.get("TEMP")) - try: - yield path - finally: - shutil.rmtree(path) - - -@contextlib.contextmanager -def cd(new_cwd): - old_cwd = os.getcwd() - os.chdir(new_cwd) - try: - yield - finally: - os.chdir(old_cwd) - - -def get_free_port(): - with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s: - s.bind(("", 0)) - s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - return s.getsockname()[1] - - -def run_function_in_parallel(func, args_list, threads=False): - processes = [] - exit_code = 0 - for task in args_list: - cls = threading.Thread if threads else multiprocessing.Process - processes.append(cls(target=func, args=task)) - processes[-1].start() - for process in processes: - process.join() - if not threads: - if process.exitcode and not exit_code: - exit_code = process.exitcode - if exit_code: - sys.exit(exit_code) - - -def read_md_file(path): - in_meta = False - meta = {} - meta_text = [] - content = [] - if os.path.exists(path): - with open(path, "r") as f: - for line in f: - if line.startswith("---"): - if in_meta: - in_meta = False - meta = yaml.full_load("".join(meta_text)) - else: - in_meta = True - else: - if in_meta: - meta_text.append(line) - else: - content.append(line) - return meta, "".join(content) - - -def write_md_file(path, meta, content): - dirname = os.path.dirname(path) - if not os.path.exists(dirname): - os.makedirs(dirname) - - with open(path, "w") as f: - if meta: - print("---", file=f) - yaml.dump(meta, f) - print("---", file=f) - if not content.startswith("\n"): - print("", file=f) - f.write(content) - - -def represent_ordereddict(dumper, data): - value = [] - for item_key, item_value in data.items(): - node_key = dumper.represent_data(item_key) - node_value = dumper.represent_data(item_value) - - value.append((node_key, node_value)) - - return yaml.nodes.MappingNode("tag:yaml.org,2002:map", value) - - -yaml.add_representer(collections.OrderedDict, represent_ordereddict) - - -def init_jinja2_filters(env): - import website - - chunk_size = 10240 - env.filters["chunks"] = lambda line: [ - line[i : i + chunk_size] for i in range(0, len(line), chunk_size) - ] - env.filters["to_rfc882"] = lambda d: datetime.datetime.strptime( - d, "%Y-%m-%d" - ).strftime("%a, %d %b %Y %H:%M:%S GMT") - - -def init_jinja2_env(args): - import mdx_clickhouse - - env = jinja2.Environment( - loader=jinja2.FileSystemLoader( - [args.website_dir, os.path.join(args.src_dir, "docs", "_includes")] - ), - extensions=["jinja2.ext.i18n", "jinja2_highlight.HighlightExtension"], - ) - env.extend(jinja2_highlight_cssclass="syntax p-3 my-3") - translations_dir = os.path.join(args.website_dir, "locale") - env.install_gettext_translations( - mdx_clickhouse.get_translations(translations_dir, "en"), newstyle=True - ) - init_jinja2_filters(env) - return env diff --git a/docs/tools/website.py b/docs/tools/website.py deleted file mode 100644 index 2a34458fd29..00000000000 --- a/docs/tools/website.py +++ /dev/null @@ -1,63 +0,0 @@ -import hashlib -import json -import logging -import os -import shutil -import subprocess - -import util - - -def build_website(args): - logging.info("Building website") - env = util.init_jinja2_env(args) - - shutil.copytree( - args.website_dir, - args.output_dir, - ignore=shutil.ignore_patterns( - "*.md", - "*.sh", - "*.css", - "*.json", - "js/*.js", - "build", - "docs", - "public", - "node_modules", - "src", - "templates", - "locale", - ".gitkeep", - ), - ) - - # This file can be requested to check for available ClickHouse releases. - shutil.copy2( - os.path.join(args.src_dir, "utils", "list-versions", "version_date.tsv"), - os.path.join(args.output_dir, "data", "version_date.tsv"), - ) - - # This file can be requested to install ClickHouse. - shutil.copy2( - os.path.join(args.src_dir, "docs", "_includes", "install", "universal.sh"), - os.path.join(args.output_dir, "data", "install.sh"), - ) - - for root, _, filenames in os.walk(args.output_dir): - for filename in filenames: - if filename == "main.html": - continue - - path = os.path.join(root, filename) - if not filename.endswith(".html"): - continue - logging.info("Processing %s", path) - with open(path, "rb") as f: - content = f.read().decode("utf-8") - - template = env.from_string(content) - content = template.render(args.__dict__) - - with open(path, "wb") as f: - f.write(content.encode("utf-8")) From d34a755cfb14fed23c00eb86ad22c4102682bec0 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 11 Jan 2023 09:59:23 -0500 Subject: [PATCH 204/262] switch text to response for query blocks --- docs/zh/operations/system-tables/numbers.md | 2 +- docs/zh/operations/system-tables/one.md | 2 +- docs/zh/sql-reference/data-types/array.md | 8 +++---- docs/zh/sql-reference/data-types/enum.md | 10 ++++---- .../data-types/special-data-types/nothing.md | 2 +- docs/zh/sql-reference/data-types/tuple.md | 4 ++-- .../functions/functions-for-nulls.md | 12 +++++----- .../functions/other-functions.md | 23 ++++++++++--------- .../sql-reference/functions/uuid-functions.md | 9 ++++---- docs/zh/sql-reference/operators/index.md | 8 +++---- .../sql-reference/table-functions/format.md | 4 ++-- 11 files changed, 43 insertions(+), 41 deletions(-) diff --git a/docs/zh/operations/system-tables/numbers.md b/docs/zh/operations/system-tables/numbers.md index 8cb92351ae7..801c43f8e91 100644 --- a/docs/zh/operations/system-tables/numbers.md +++ b/docs/zh/operations/system-tables/numbers.md @@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/numbers SELECT * FROM system.numbers LIMIT 10; ``` -```text +```response ┌─number─┐ │ 0 │ │ 1 │ diff --git a/docs/zh/operations/system-tables/one.md b/docs/zh/operations/system-tables/one.md index 2e44a9bd89a..29dd25c5282 100644 --- a/docs/zh/operations/system-tables/one.md +++ b/docs/zh/operations/system-tables/one.md @@ -15,7 +15,7 @@ slug: /zh/operations/system-tables/one SELECT * FROM system.one LIMIT 10; ``` -```text +```response ┌─dummy─┐ │ 0 │ └───────┘ diff --git a/docs/zh/sql-reference/data-types/array.md b/docs/zh/sql-reference/data-types/array.md index 0c38eb86004..46c40b889ad 100644 --- a/docs/zh/sql-reference/data-types/array.md +++ b/docs/zh/sql-reference/data-types/array.md @@ -23,7 +23,7 @@ slug: /zh/sql-reference/data-types/array SELECT array(1, 2) AS x, toTypeName(x) ``` -```text +```response ┌─x─────┬─toTypeName(array(1, 2))─┐ │ [1,2] │ Array(UInt8) │ └───────┴─────────────────────────┘ @@ -33,7 +33,7 @@ SELECT array(1, 2) AS x, toTypeName(x) SELECT [1, 2] AS x, toTypeName(x) ``` -``` text +```response ┌─x─────┬─toTypeName([1, 2])─┐ │ [1,2] │ Array(UInt8) │ └───────┴────────────────────┘ @@ -50,7 +50,7 @@ ClickHouse会自动检测数组元素,并根据元素计算出存储这些元素 SELECT array(1, 2, NULL) AS x, toTypeName(x) ``` -```text +```response ┌─x──────────┬─toTypeName(array(1, 2, NULL))─┐ │ [1,2,NULL] │ Array(Nullable(UInt8)) │ └────────────┴───────────────────────────────┘ @@ -62,7 +62,7 @@ SELECT array(1, 2, NULL) AS x, toTypeName(x) SELECT array(1, 'a') ``` -```text +```response Received exception from server (version 1.1.54388): Code: 386. DB::Exception: Received from localhost:9000, 127.0.0.1. DB::Exception: There is no supertype for types UInt8, String because some of them are String/FixedString and some of them are not. ``` diff --git a/docs/zh/sql-reference/data-types/enum.md b/docs/zh/sql-reference/data-types/enum.md index 9832df3da02..496a4c5a78c 100644 --- a/docs/zh/sql-reference/data-types/enum.md +++ b/docs/zh/sql-reference/data-types/enum.md @@ -24,7 +24,7 @@ slug: /zh/sql-reference/data-types/enum INSERT INTO t_enum VALUES ('hello'), ('world'), ('hello') ``` -```text +```response Ok. 3 rows in set. Elapsed: 0.002 sec. @@ -34,7 +34,7 @@ Ok. INSERT INTO t_enum VALUES('a') ``` -```text +```response Exception on client: Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' = 2) ``` @@ -45,7 +45,7 @@ Code: 49. DB::Exception: Unknown element 'a' for type Enum8('hello' = 1, 'world' SELECT * FROM t_enum ``` -```text +```response ┌─x─────┐ │ hello │ │ world │ @@ -59,7 +59,7 @@ SELECT * FROM t_enum SELECT CAST(x, 'Int8') FROM t_enum ``` -```text +```response ┌─CAST(x, 'Int8')─┐ │ 1 │ │ 2 │ @@ -73,7 +73,7 @@ SELECT CAST(x, 'Int8') FROM t_enum SELECT toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)')) ``` -```text +```response ┌─toTypeName(CAST('a', 'Enum8(\'a\' = 1, \'b\' = 2)'))─┐ │ Enum8('a' = 1, 'b' = 2) │ └──────────────────────────────────────────────────────┘ diff --git a/docs/zh/sql-reference/data-types/special-data-types/nothing.md b/docs/zh/sql-reference/data-types/special-data-types/nothing.md index e123622edf6..19a78cb540e 100644 --- a/docs/zh/sql-reference/data-types/special-data-types/nothing.md +++ b/docs/zh/sql-reference/data-types/special-data-types/nothing.md @@ -13,7 +13,7 @@ slug: /zh/sql-reference/data-types/special-data-types/nothing SELECT toTypeName(array()) ``` -```text +```response ┌─toTypeName(array())─┐ │ Array(Nothing) │ └─────────────────────┘ diff --git a/docs/zh/sql-reference/data-types/tuple.md b/docs/zh/sql-reference/data-types/tuple.md index 905a872da24..004c80ff916 100644 --- a/docs/zh/sql-reference/data-types/tuple.md +++ b/docs/zh/sql-reference/data-types/tuple.md @@ -21,7 +21,7 @@ slug: /zh/sql-reference/data-types/tuple SELECT tuple(1,'a') AS x, toTypeName(x) ``` -```text +```response ┌─x───────┬─toTypeName(tuple(1, 'a'))─┐ │ (1,'a') │ Tuple(UInt8, String) │ └─────────┴───────────────────────────┘ @@ -37,7 +37,7 @@ SELECT tuple(1,'a') AS x, toTypeName(x) SELECT tuple(1, NULL) AS x, toTypeName(x) ``` -```text +```response ┌─x────────┬─toTypeName(tuple(1, NULL))──────┐ │ (1,NULL) │ Tuple(UInt8, Nullable(Nothing)) │ └──────────┴─────────────────────────────────┘ diff --git a/docs/zh/sql-reference/functions/functions-for-nulls.md b/docs/zh/sql-reference/functions/functions-for-nulls.md index 9ecf39e56c5..4dd30970923 100644 --- a/docs/zh/sql-reference/functions/functions-for-nulls.md +++ b/docs/zh/sql-reference/functions/functions-for-nulls.md @@ -22,7 +22,7 @@ slug: /zh/sql-reference/functions/functions-for-nulls 存在以下内容的表 -```text +```response ┌─x─┬────y─┐ │ 1 │ ᴺᵁᴸᴸ │ │ 2 │ 3 │ @@ -35,7 +35,7 @@ slug: /zh/sql-reference/functions/functions-for-nulls SELECT x FROM t_null WHERE isNull(y) ``` -```text +```response ┌─x─┐ │ 1 │ └───┘ @@ -60,7 +60,7 @@ SELECT x FROM t_null WHERE isNull(y) 存在以下内容的表 -```text +```response ┌─x─┬────y─┐ │ 1 │ ᴺᵁᴸᴸ │ │ 2 │ 3 │ @@ -73,7 +73,7 @@ SELECT x FROM t_null WHERE isNull(y) SELECT x FROM t_null WHERE isNotNull(y) ``` -```text +```response ┌─x─┐ │ 2 │ └───┘ @@ -98,7 +98,7 @@ SELECT x FROM t_null WHERE isNotNull(y) 考虑可以指定多种联系客户的方式的联系人列表。 -```text +```response ┌─name─────┬─mail─┬─phone─────┬──icq─┐ │ client 1 │ ᴺᵁᴸᴸ │ 123-45-67 │ 123 │ │ client 2 │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ ᴺᵁᴸᴸ │ @@ -113,7 +113,7 @@ SELECT x FROM t_null WHERE isNotNull(y) SELECT coalesce(mail, phone, CAST(icq,'Nullable(String)')) FROM aBook ``` -```text +```response ┌─name─────┬─coalesce(mail, phone, CAST(icq, 'Nullable(String)'))─┐ │ client 1 │ 123-45-67 │ │ client 2 │ ᴺᵁᴸᴸ │ diff --git a/docs/zh/sql-reference/functions/other-functions.md b/docs/zh/sql-reference/functions/other-functions.md index a5c67e94921..2eeaad63694 100644 --- a/docs/zh/sql-reference/functions/other-functions.md +++ b/docs/zh/sql-reference/functions/other-functions.md @@ -33,7 +33,7 @@ slug: /zh/sql-reference/functions/other-functions SELECT 'some/long/path/to/file' AS a, basename(a) ``` -``` text +```response ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -43,7 +43,7 @@ SELECT 'some/long/path/to/file' AS a, basename(a) SELECT 'some\\long\\path\\to\\file' AS a, basename(a) ``` -``` text +```response ┌─a──────────────────────┬─basename('some\\long\\path\\to\\file')─┐ │ some\long\path\to\file │ file │ └────────────────────────┴────────────────────────────────────────┘ @@ -53,7 +53,7 @@ SELECT 'some\\long\\path\\to\\file' AS a, basename(a) SELECT 'some-file-name' AS a, basename(a) ``` -``` text +```response ┌─a──────────────┬─basename('some-file-name')─┐ │ some-file-name │ some-file-name │ └────────────────┴────────────────────────────┘ @@ -402,7 +402,7 @@ FROM SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) ``` -```text +```response ┌─toTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ DateTime │ └─────────────────────────────────────────────────────┘ @@ -412,7 +412,7 @@ SELECT toTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) ``` -```text +```response ┌─toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime'))─┐ │ Const(UInt32) │ └───────────────────────────────────────────────────────────┘ @@ -466,7 +466,7 @@ SELECT toColumnTypeName(CAST('2018-01-01 01:02:03', 'DateTime')) SELECT defaultValueOfArgumentType(CAST(1, 'Int8')) ``` -```text +```response ┌─defaultValueOfArgumentType(CAST(1, 'Int8'))─┐ │ 0 │ └─────────────────────────────────────────────┘ @@ -476,7 +476,7 @@ SELECT defaultValueOfArgumentType(CAST(1, 'Int8')) SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) ``` -```text +```response ┌─defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)'))─┐ │ ᴺᵁᴸᴸ │ └───────────────────────────────────────────────────────┘ @@ -497,7 +497,8 @@ SELECT defaultValueOfArgumentType(CAST(1, 'Nullable(Int8)')) ``` SELECT count() FROM ontime - +``` +```response ┌─count()─┐ │ 4276457 │ └─────────┘ @@ -511,7 +512,7 @@ SELECT count() FROM ontime SELECT FlightDate AS k, count() FROM ontime GROUP BY k ORDER BY k ``` -```text +```response SELECT FlightDate AS k, count() @@ -537,7 +538,7 @@ ORDER BY k ASC SELECT FlightDate AS k, count() FROM ontime WHERE k = '2017-09-15' GROUP BY k ORDER BY k ``` -```text +```response SELECT FlightDate AS k, count() @@ -561,7 +562,7 @@ ORDER BY k ASC SELECT FlightDate AS k, count() FROM ontime WHERE indexHint(k = '2017-09-15') GROUP BY k ORDER BY k ``` -```text +```response SELECT FlightDate AS k, count() diff --git a/docs/zh/sql-reference/functions/uuid-functions.md b/docs/zh/sql-reference/functions/uuid-functions.md index e635fd4fba8..57b75a6c889 100644 --- a/docs/zh/sql-reference/functions/uuid-functions.md +++ b/docs/zh/sql-reference/functions/uuid-functions.md @@ -27,7 +27,7 @@ INSERT INTO t_uuid SELECT generateUUIDv4() SELECT * FROM t_uuid ``` -```text +```response ┌────────────────────────────────────x─┐ │ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │ └──────────────────────────────────────┘ @@ -51,7 +51,7 @@ UUID类型的值 SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid ``` -```text +```response ┌─────────────────────────────────uuid─┐ │ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │ └──────────────────────────────────────┘ @@ -77,7 +77,7 @@ SELECT UUIDStringToNum(uuid) AS bytes ``` -```text +```response ┌─uuid─────────────────────────────────┬─bytes────────────┐ │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │ └──────────────────────────────────────┴──────────────────┘ @@ -101,7 +101,8 @@ UUIDNumToString(FixedString(16)) SELECT 'a/<@];!~p{jTj={)' AS bytes, UUIDNumToString(toFixedString(bytes, 16)) AS uuid - +``` +```response ┌─bytes────────────┬─uuid─────────────────────────────────┐ │ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ └──────────────────┴──────────────────────────────────────┘ diff --git a/docs/zh/sql-reference/operators/index.md b/docs/zh/sql-reference/operators/index.md index 8544f9f5a91..353386903c4 100644 --- a/docs/zh/sql-reference/operators/index.md +++ b/docs/zh/sql-reference/operators/index.md @@ -143,7 +143,7 @@ SELECT FROM test.Orders; ``` -``` text +``` response ┌─OrderYear─┬─OrderMonth─┬─OrderDay─┬─OrderHour─┬─OrderMinute─┬─OrderSecond─┐ │ 2008 │ 10 │ 11 │ 13 │ 23 │ 44 │ └───────────┴────────────┴──────────┴───────────┴─────────────┴─────────────┘ @@ -161,7 +161,7 @@ FROM test.Orders; SELECT now() AS current_date_time, current_date_time + INTERVAL 4 DAY + INTERVAL 3 HOUR ``` -``` text +``` response ┌───current_date_time─┬─plus(plus(now(), toIntervalDay(4)), toIntervalHour(3))─┐ │ 2019-10-23 11:16:28 │ 2019-10-27 14:16:28 │ └─────────────────────┴────────────────────────────────────────────────────────┘ @@ -230,7 +230,7 @@ ClickHouse 支持 `IS NULL` 和 `IS NOT NULL` 。 SELECT x+100 FROM t_null WHERE y IS NULL ``` -``` text +``` response ┌─plus(x, 100)─┐ │ 101 │ └──────────────┘ @@ -249,7 +249,7 @@ SELECT x+100 FROM t_null WHERE y IS NULL SELECT * FROM t_null WHERE y IS NOT NULL ``` -``` text +``` response ┌─x─┬─y─┐ │ 2 │ 3 │ └───┴───┘ diff --git a/docs/zh/sql-reference/table-functions/format.md b/docs/zh/sql-reference/table-functions/format.md index bc017ccc3c7..f84d047e599 100644 --- a/docs/zh/sql-reference/table-functions/format.md +++ b/docs/zh/sql-reference/table-functions/format.md @@ -38,7 +38,7 @@ $$) **Result:** -```text +```response ┌───b─┬─a─────┐ │ 111 │ Hello │ │ 123 │ World │ @@ -60,7 +60,7 @@ $$) **Result:** -```text +```response ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ b │ Nullable(Float64) │ │ │ │ │ │ │ a │ Nullable(String) │ │ │ │ │ │ From 7168c217b0bd26ff47fd13a8f0e8f8fcc01b2839 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 11 Jan 2023 10:08:11 -0500 Subject: [PATCH 205/262] switch text to response for query blocks --- docs/en/interfaces/formats.md | 28 +++++++++---------- docs/en/operations/system-tables/disks.md | 2 +- .../system-tables/merge_tree_settings.md | 2 +- docs/en/operations/system-tables/numbers.md | 2 +- .../en/operations/system-tables/numbers_mt.md | 2 +- docs/en/operations/system-tables/one.md | 2 +- docs/en/operations/system-tables/processes.md | 2 +- .../sql-reference/table-functions/format.md | 4 +-- .../sql-reference/table-functions/format.md | 4 +-- 9 files changed, 24 insertions(+), 24 deletions(-) diff --git a/docs/en/interfaces/formats.md b/docs/en/interfaces/formats.md index 75ef0ac3cc0..e28c486afca 100644 --- a/docs/en/interfaces/formats.md +++ b/docs/en/interfaces/formats.md @@ -101,7 +101,7 @@ The `TabSeparated` format supports outputting total values (when using WITH TOTA SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT TabSeparated ``` -``` text +``` response 2014-03-17 1406958 2014-03-18 1383658 2014-03-19 1405797 @@ -177,7 +177,7 @@ INSERT INTO nestedt Values ( 1, [1], ['a']) SELECT * FROM nestedt FORMAT TSV ``` -``` text +``` response 1 [1] ['a'] ``` @@ -761,7 +761,7 @@ SELECT * FROM json_as_string; Result: -``` text +``` response ┌─json──────────────────────────────┐ │ {"foo":{"bar":{"x":"y"},"baz":1}} │ │ {} │ @@ -782,7 +782,7 @@ SELECT * FROM json_square_brackets; Result: -```text +```response ┌─field──────────────────────┐ │ {"id": 1, "name": "name1"} │ │ {"id": 2, "name": "name2"} │ @@ -1118,7 +1118,7 @@ When inserting data with `input_format_defaults_for_omitted_fields = 1`, ClickHo Consider the `UserActivity` table as an example: -``` text +``` response ┌──────────────UserID─┬─PageViews─┬─Duration─┬─Sign─┐ │ 4324182021466249494 │ 5 │ 146 │ -1 │ │ 4324182021466249494 │ 6 │ 185 │ 1 │ @@ -1127,7 +1127,7 @@ Consider the `UserActivity` table as an example: The query `SELECT * FROM UserActivity FORMAT JSONEachRow` returns: -``` text +``` response {"UserID":"4324182021466249494","PageViews":5,"Duration":146,"Sign":-1} {"UserID":"4324182021466249494","PageViews":6,"Duration":185,"Sign":1} ``` @@ -1171,7 +1171,7 @@ Without this setting, ClickHouse throws an exception. SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested_json' ``` -``` text +``` response ┌─name────────────────────────────┬─value─┐ │ input_format_import_nested_json │ 0 │ └─────────────────────────────────┴───────┘ @@ -1181,7 +1181,7 @@ SELECT name, value FROM system.settings WHERE name = 'input_format_import_nested INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], "i": [1, 23]}} ``` -``` text +``` response Code: 117. DB::Exception: Unknown field found while parsing JSONEachRow format: n: (at row 1) ``` @@ -1191,7 +1191,7 @@ INSERT INTO json_each_row_nested FORMAT JSONEachRow {"n": {"s": ["abc", "def"], SELECT * FROM json_each_row_nested ``` -``` text +``` response ┌─n.s───────────┬─n.i────┐ │ ['abc','def'] │ [1,23] │ └───────────────┴────────┘ @@ -1300,7 +1300,7 @@ Example (shown for the [PrettyCompact](#prettycompact) format): SELECT * FROM t_null ``` -``` text +``` response ┌─x─┬────y─┐ │ 1 │ ᴺᵁᴸᴸ │ └───┴──────┘ @@ -1312,7 +1312,7 @@ Rows are not escaped in Pretty\* formats. Example is shown for the [PrettyCompac SELECT 'String with \'quotes\' and \t character' AS Escaping_test ``` -``` text +``` response ┌─Escaping_test────────────────────────┐ │ String with 'quotes' and character │ └──────────────────────────────────────┘ @@ -1327,7 +1327,7 @@ The Pretty format supports outputting total values (when using WITH TOTALS) and SELECT EventDate, count() AS c FROM test.hits GROUP BY EventDate WITH TOTALS ORDER BY EventDate FORMAT PrettyCompact ``` -``` text +``` response ┌──EventDate─┬───────c─┐ │ 2014-03-17 │ 1406958 │ │ 2014-03-18 │ 1383658 │ @@ -1488,7 +1488,7 @@ Example: SELECT * FROM t_null FORMAT Vertical ``` -``` text +``` response Row 1: ────── x: 1 @@ -1501,7 +1501,7 @@ Rows are not escaped in Vertical format: SELECT 'string with \'quotes\' and \t with some special \n characters' AS test FORMAT Vertical ``` -``` text +``` response Row 1: ────── test: string with 'quotes' and with some special diff --git a/docs/en/operations/system-tables/disks.md b/docs/en/operations/system-tables/disks.md index d492e42c2ec..a079f3338d2 100644 --- a/docs/en/operations/system-tables/disks.md +++ b/docs/en/operations/system-tables/disks.md @@ -20,7 +20,7 @@ Columns: SELECT * FROM system.disks; ``` -```text +```response ┌─name────┬─path─────────────────┬───free_space─┬──total_space─┬─keep_free_space─┐ │ default │ /var/lib/clickhouse/ │ 276392587264 │ 490652508160 │ 0 │ └─────────┴──────────────────────┴──────────────┴──────────────┴─────────────────┘ diff --git a/docs/en/operations/system-tables/merge_tree_settings.md b/docs/en/operations/system-tables/merge_tree_settings.md index a05d4abccda..672c79e335b 100644 --- a/docs/en/operations/system-tables/merge_tree_settings.md +++ b/docs/en/operations/system-tables/merge_tree_settings.md @@ -18,7 +18,7 @@ Columns: SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ``` -```text +```response Row 1: ────── name: index_granularity diff --git a/docs/en/operations/system-tables/numbers.md b/docs/en/operations/system-tables/numbers.md index 0dc001ebb6f..68efeb2ee38 100644 --- a/docs/en/operations/system-tables/numbers.md +++ b/docs/en/operations/system-tables/numbers.md @@ -15,7 +15,7 @@ Reads from this table are not parallelized. SELECT * FROM system.numbers LIMIT 10; ``` -```text +```response ┌─number─┐ │ 0 │ │ 1 │ diff --git a/docs/en/operations/system-tables/numbers_mt.md b/docs/en/operations/system-tables/numbers_mt.md index cc461b29ad0..653a8d43cc9 100644 --- a/docs/en/operations/system-tables/numbers_mt.md +++ b/docs/en/operations/system-tables/numbers_mt.md @@ -13,7 +13,7 @@ Used for tests. SELECT * FROM system.numbers_mt LIMIT 10; ``` -```text +```response ┌─number─┐ │ 0 │ │ 1 │ diff --git a/docs/en/operations/system-tables/one.md b/docs/en/operations/system-tables/one.md index ee2907a6d6d..6d3519f0069 100644 --- a/docs/en/operations/system-tables/one.md +++ b/docs/en/operations/system-tables/one.md @@ -15,7 +15,7 @@ This is similar to the `DUAL` table found in other DBMSs. SELECT * FROM system.one LIMIT 10; ``` -```text +```response ┌─dummy─┐ │ 0 │ └───────┘ diff --git a/docs/en/operations/system-tables/processes.md b/docs/en/operations/system-tables/processes.md index 95c46f551ef..76219813ad7 100644 --- a/docs/en/operations/system-tables/processes.md +++ b/docs/en/operations/system-tables/processes.md @@ -23,7 +23,7 @@ Columns: SELECT * FROM system.processes LIMIT 10 FORMAT Vertical; ``` -```text +```response Row 1: ────── is_initial_query: 1 diff --git a/docs/en/sql-reference/table-functions/format.md b/docs/en/sql-reference/table-functions/format.md index 4a0ee58d758..3af48249e3c 100644 --- a/docs/en/sql-reference/table-functions/format.md +++ b/docs/en/sql-reference/table-functions/format.md @@ -38,7 +38,7 @@ $$) **Result:** -```text +```response ┌───b─┬─a─────┐ │ 111 │ Hello │ │ 123 │ World │ @@ -60,7 +60,7 @@ $$) **Result:** -```text +```response ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ b │ Nullable(Float64) │ │ │ │ │ │ │ a │ Nullable(String) │ │ │ │ │ │ diff --git a/docs/ru/sql-reference/table-functions/format.md b/docs/ru/sql-reference/table-functions/format.md index a91b4ca2b1e..204658914e0 100644 --- a/docs/ru/sql-reference/table-functions/format.md +++ b/docs/ru/sql-reference/table-functions/format.md @@ -38,7 +38,7 @@ $$) **Result:** -```text +```response ┌───b─┬─a─────┐ │ 111 │ Hello │ │ 123 │ World │ @@ -60,7 +60,7 @@ $$) **Result:** -```text +```response ┌─name─┬─type──────────────┬─default_type─┬─default_expression─┬─comment─┬─codec_expression─┬─ttl_expression─┐ │ b │ Nullable(Float64) │ │ │ │ │ │ │ a │ Nullable(String) │ │ │ │ │ │ From 806dd1357ca44bc3995a4af80039567e70aefca2 Mon Sep 17 00:00:00 2001 From: DanRoscigno Date: Wed, 11 Jan 2023 10:13:31 -0500 Subject: [PATCH 206/262] switch text to response for query blocks --- docs/zh/operations/system-tables/merge_tree_settings.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/zh/operations/system-tables/merge_tree_settings.md b/docs/zh/operations/system-tables/merge_tree_settings.md index c2bdcd14d24..3118d6b7530 100644 --- a/docs/zh/operations/system-tables/merge_tree_settings.md +++ b/docs/zh/operations/system-tables/merge_tree_settings.md @@ -19,7 +19,7 @@ slug: /zh/operations/system-tables/merge_tree_settings SELECT * FROM system.merge_tree_settings LIMIT 4 FORMAT Vertical; ``` -```text +```response Row 1: ────── name: index_granularity From 807e84da98fea573d90041555d9dcefc182bd454 Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 15:44:21 +0100 Subject: [PATCH 207/262] Delete unused website directory --- utils/check-style/check-style | 10 +++++----- website/README.md | 1 - website/data/.gitkeep | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) delete mode 100644 website/README.md delete mode 100644 website/data/.gitkeep diff --git a/utils/check-style/check-style b/utils/check-style/check-style index b5e1a4748a5..5c36d85fc74 100755 --- a/utils/check-style/check-style +++ b/utils/check-style/check-style @@ -252,12 +252,12 @@ find $ROOT_PATH/{src,programs,utils} -name '*.h' | while read file; do [[ $(head -n1 $file) != '#pragma once' ]] && echo "File $file must have '#pragma once' in first line"; done # Check for executable bit on non-executable files -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable." +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} '(' -name '*.cpp' -or -name '*.h' -or -name '*.sql' -or -name '*.j2' -or -name '*.xml' -or -name '*.reference' -or -name '*.txt' -or -name '*.md' ')' -and -executable | grep -P '.' && echo "These files should not be executable." # Check for BOM -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM" -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM" -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xEF\xBB\xBF' | grep -P '.' && echo "Files should not have UTF-8 BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFF\xFE' | grep -P '.' && echo "Files should not have UTF-16LE BOM" +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -l -F $'\xFE\xFF' | grep -P '.' && echo "Files should not have UTF-16BE BOM" # Too many exclamation marks find $ROOT_PATH/{src,base,programs,utils} -name '*.h' -or -name '*.cpp' | @@ -336,7 +336,7 @@ for test_case in "${expect_tests[@]}"; do done # Conflict markers -find $ROOT_PATH/{src,base,programs,utils,tests,docs,website,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | +find $ROOT_PATH/{src,base,programs,utils,tests,docs,cmake} -name '*.md' -or -name '*.cpp' -or -name '*.h' | xargs grep -P '^(<<<<<<<|=======|>>>>>>>)$' | grep -P '.' && echo "Conflict markers are found in files" # Forbid subprocess.check_call(...) in integration tests because it does not provide enough information on errors diff --git a/website/README.md b/website/README.md deleted file mode 100644 index 67937044ba0..00000000000 --- a/website/README.md +++ /dev/null @@ -1 +0,0 @@ -# This is not a website diff --git a/website/data/.gitkeep b/website/data/.gitkeep deleted file mode 100644 index 0d540696911..00000000000 --- a/website/data/.gitkeep +++ /dev/null @@ -1 +0,0 @@ -# This directory will contain miscellaneous data files on ClickHouse website \ No newline at end of file From 6499e8e687182f2bebb3f74c7321400edd9964d8 Mon Sep 17 00:00:00 2001 From: kssenii Date: Wed, 11 Jan 2023 16:14:32 +0100 Subject: [PATCH 208/262] Calculate only required column in system.detached_parts --- .../System/StorageSystemDetachedParts.cpp | 56 ++++++++++++++----- 1 file changed, 41 insertions(+), 15 deletions(-) diff --git a/src/Storages/System/StorageSystemDetachedParts.cpp b/src/Storages/System/StorageSystemDetachedParts.cpp index 1828c5932ad..01c7b7d69e4 100644 --- a/src/Storages/System/StorageSystemDetachedParts.cpp +++ b/src/Storages/System/StorageSystemDetachedParts.cpp @@ -60,7 +60,7 @@ static UInt64 calculateTotalSizeOnDisk(const DiskPtr & disk, const String & from } Pipe StorageSystemDetachedParts::read( - const Names & /* column_names */, + const Names & column_names, const StorageSnapshotPtr & storage_snapshot, SelectQueryInfo & query_info, ContextPtr context, @@ -68,37 +68,63 @@ Pipe StorageSystemDetachedParts::read( const size_t /*max_block_size*/, const size_t /*num_streams*/) { + storage_snapshot->check(column_names); + StoragesInfoStream stream(query_info, context); /// Create the result. Block block = storage_snapshot->metadata->getSampleBlock(); - MutableColumns new_columns = block.cloneEmptyColumns(); + NameSet names_set(column_names.begin(), column_names.end()); + std::vector columns_mask(block.columns()); + Block header; + + for (size_t i = 0; i < block.columns(); ++i) + { + if (names_set.contains(block.getByPosition(i).name)) + { + columns_mask[i] = 1; + header.insert(block.getByPosition(i)); + } + } + + MutableColumns new_columns = header.cloneEmptyColumns(); while (StoragesInfo info = stream.next()) { const auto parts = info.data->getDetachedParts(); for (const auto & p : parts) { - size_t i = 0; + size_t src_index = 0, res_index = 0; String detached_part_path = fs::path(MergeTreeData::DETACHED_DIR_NAME) / p.dir_name; - new_columns[i++]->insert(info.database); - new_columns[i++]->insert(info.table); - new_columns[i++]->insert(p.valid_name ? p.partition_id : Field()); - new_columns[i++]->insert(p.dir_name); - new_columns[i++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path)); - new_columns[i++]->insert(p.disk->getName()); - new_columns[i++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); - new_columns[i++]->insert(p.valid_name ? p.prefix : Field()); - new_columns[i++]->insert(p.valid_name ? p.min_block : Field()); - new_columns[i++]->insert(p.valid_name ? p.max_block : Field()); - new_columns[i++]->insert(p.valid_name ? p.level : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(info.database); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(info.table); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.partition_id : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.dir_name); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(calculateTotalSizeOnDisk(p.disk, fs::path(info.data->getRelativeDataPath()) / detached_part_path)); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.disk->getName()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert((fs::path(info.data->getFullPathOnDisk(p.disk)) / detached_part_path).string()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.prefix : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.min_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.max_block : Field()); + if (columns_mask[src_index++]) + new_columns[res_index++]->insert(p.valid_name ? p.level : Field()); } } UInt64 num_rows = new_columns.at(0)->size(); Chunk chunk(std::move(new_columns), num_rows); - return Pipe(std::make_shared(std::move(block), std::move(chunk))); + return Pipe(std::make_shared(std::move(header), std::move(chunk))); } } From 3e9d142066c6591c83a4c73be36f8c3958d70c99 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 11 Jan 2023 16:17:21 +0000 Subject: [PATCH 209/262] Fix wrong column nullability in StoreageJoin --- src/Interpreters/HashJoin.cpp | 12 ++++++------ src/Interpreters/TableJoin.cpp | 10 ---------- src/Storages/StorageJoin.cpp | 16 ++++++++++++++-- src/Storages/StorageJoin.h | 6 +++--- 4 files changed, 23 insertions(+), 21 deletions(-) diff --git a/src/Interpreters/HashJoin.cpp b/src/Interpreters/HashJoin.cpp index dc041094381..5ff4f9beb05 100644 --- a/src/Interpreters/HashJoin.cpp +++ b/src/Interpreters/HashJoin.cpp @@ -886,20 +886,20 @@ public: const auto & lhs = lhs_block.getByPosition(i); const auto & rhs = rhs_block.getByPosition(i); if (lhs.name != rhs.name) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]", - lhs_block.dumpStructure(), rhs_block.dumpStructure()); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})", + lhs_block.dumpStructure(), rhs_block.dumpStructure(), lhs.name, rhs.name); const auto & ltype = recursiveRemoveLowCardinality(lhs.type); const auto & rtype = recursiveRemoveLowCardinality(rhs.type); if (!ltype->equals(*rtype)) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]", - lhs_block.dumpStructure(), rhs_block.dumpStructure()); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})", + lhs_block.dumpStructure(), rhs_block.dumpStructure(), ltype->getName(), rtype->getName()); const auto & lcol = recursiveRemoveLowCardinality(lhs.column); const auto & rcol = recursiveRemoveLowCardinality(rhs.column); if (lcol->getDataType() != rcol->getDataType()) - throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}]", - lhs_block.dumpStructure(), rhs_block.dumpStructure()); + throw DB::Exception(ErrorCodes::LOGICAL_ERROR, "Block structure mismatch: [{}] != [{}] ({} != {})", + lhs_block.dumpStructure(), rhs_block.dumpStructure(), lcol->getDataType(), rcol->getDataType()); } } diff --git a/src/Interpreters/TableJoin.cpp b/src/Interpreters/TableJoin.cpp index aa4f821657f..78218ac59a5 100644 --- a/src/Interpreters/TableJoin.cpp +++ b/src/Interpreters/TableJoin.cpp @@ -458,16 +458,6 @@ TableJoin::createConvertingActions( LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: empty", side); return; } - auto format_cols = [](const auto & cols) -> std::string - { - std::vector str_cols; - str_cols.reserve(cols.size()); - for (const auto & col : cols) - str_cols.push_back(fmt::format("'{}': {}", col.name, col.type->getName())); - return fmt::format("[{}]", fmt::join(str_cols, ", ")); - }; - LOG_DEBUG(&Poco::Logger::get("TableJoin"), "{} JOIN converting actions: {} -> {}", - side, format_cols(dag->getRequiredColumns()), format_cols(dag->getResultColumns())); }; log_actions("Left", left_converting_actions); log_actions("Right", right_converting_actions); diff --git a/src/Storages/StorageJoin.cpp b/src/Storages/StorageJoin.cpp index 55f3b889f22..320f05e038f 100644 --- a/src/Storages/StorageJoin.cpp +++ b/src/Storages/StorageJoin.cpp @@ -229,11 +229,13 @@ HashJoinPtr StorageJoin::getJoinLocked(std::shared_ptr analyzed_join, return join_clone; } - void StorageJoin::insertBlock(const Block & block, ContextPtr context) { + Block block_to_insert = block; + convertRightBlock(block_to_insert); + TableLockHolder holder = tryLockTimedWithContext(rwlock, RWLockImpl::Write, context); - join->addJoinedBlock(block, true); + join->addJoinedBlock(block_to_insert, true); } size_t StorageJoin::getSize(ContextPtr context) const @@ -265,6 +267,16 @@ ColumnWithTypeAndName StorageJoin::joinGet(const Block & block, const Block & bl return join->joinGet(block, block_with_columns_to_add); } +void StorageJoin::convertRightBlock(Block & block) const +{ + bool need_covert = use_nulls && isLeftOrFull(kind); + if (!need_covert) + return; + + for (auto & col : block) + JoinCommon::convertColumnToNullable(col); +} + void registerStorageJoin(StorageFactory & factory) { auto creator_fn = [](const StorageFactory::Arguments & args) diff --git a/src/Storages/StorageJoin.h b/src/Storages/StorageJoin.h index 3d7a9d9b5ec..96afd442c72 100644 --- a/src/Storages/StorageJoin.h +++ b/src/Storages/StorageJoin.h @@ -77,9 +77,7 @@ public: { auto metadata_snapshot = getInMemoryMetadataPtr(); Block block = metadata_snapshot->getSampleBlock(); - if (use_nulls && isLeftOrFull(kind)) - for (auto & col : block) - JoinCommon::convertColumnToNullable(col); + convertRightBlock(block); return block; } @@ -108,6 +106,8 @@ private: void finishInsert() override {} size_t getSize(ContextPtr context) const override; RWLockImpl::LockHolder tryLockTimedWithContext(const RWLock & lock, RWLockImpl::Type type, ContextPtr context) const; + + void convertRightBlock(Block & block) const; }; } From 1f8535ca8357e85a05faf55a7ba1553f5a0f78aa Mon Sep 17 00:00:00 2001 From: "Mikhail f. Shiryaev" Date: Wed, 11 Jan 2023 18:49:01 +0100 Subject: [PATCH 210/262] Restart NightlyBuilds if the runner died --- tests/ci/workflow_approve_rerun_lambda/app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/ci/workflow_approve_rerun_lambda/app.py b/tests/ci/workflow_approve_rerun_lambda/app.py index 9fc4266d9d4..0be93e26c13 100644 --- a/tests/ci/workflow_approve_rerun_lambda/app.py +++ b/tests/ci/workflow_approve_rerun_lambda/app.py @@ -64,6 +64,7 @@ NEED_RERUN_WORKFLOWS = { "DocsCheck", "DocsReleaseChecks", "MasterCI", + "NightlyBuilds", "PullRequestCI", "ReleaseBranchCI", } From f9240a8f903f0e8e9df399520f5c0dc4a7957586 Mon Sep 17 00:00:00 2001 From: vdimir Date: Wed, 11 Jan 2023 18:47:04 +0000 Subject: [PATCH 211/262] Add 02531_storage_join_null_44940 --- .../02531_storage_join_null_44940.reference | 3 +++ .../02531_storage_join_null_44940.sql | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) create mode 100644 tests/queries/0_stateless/02531_storage_join_null_44940.reference create mode 100644 tests/queries/0_stateless/02531_storage_join_null_44940.sql diff --git a/tests/queries/0_stateless/02531_storage_join_null_44940.reference b/tests/queries/0_stateless/02531_storage_join_null_44940.reference new file mode 100644 index 00000000000..b7e40c360c0 --- /dev/null +++ b/tests/queries/0_stateless/02531_storage_join_null_44940.reference @@ -0,0 +1,3 @@ +3 \N 3 +2 2 2 +1 1 1 diff --git a/tests/queries/0_stateless/02531_storage_join_null_44940.sql b/tests/queries/0_stateless/02531_storage_join_null_44940.sql new file mode 100644 index 00000000000..136fc8bbef1 --- /dev/null +++ b/tests/queries/0_stateless/02531_storage_join_null_44940.sql @@ -0,0 +1,18 @@ + +SET allow_suspicious_low_cardinality_types = 1; + +DROP TABLE IF EXISTS t1__fuzz_8; +DROP TABLE IF EXISTS full_join__fuzz_4; + +CREATE TABLE t1__fuzz_8 (`x` LowCardinality(UInt32), `str` Nullable(Int16)) ENGINE = Memory; +INSERT INTO t1__fuzz_8 VALUES (1, 1), (2, 2); + +CREATE TABLE full_join__fuzz_4 (`x` LowCardinality(UInt32), `s` LowCardinality(String)) ENGINE = Join(`ALL`, FULL, x) SETTINGS join_use_nulls = 1; +INSERT INTO full_join__fuzz_4 VALUES (1, '1'), (2, '2'), (3, '3'); + +SET join_use_nulls = 1; + +SELECT * FROM t1__fuzz_8 FULL OUTER JOIN full_join__fuzz_4 USING (x) ORDER BY x DESC, str ASC, s ASC NULLS LAST; + +DROP TABLE IF EXISTS t1__fuzz_8; +DROP TABLE IF EXISTS full_join__fuzz_4; From 9c4ea5a16ba8e844e65e175a4dd76b8618241254 Mon Sep 17 00:00:00 2001 From: Yakov Olkhovskiy Date: Wed, 11 Jan 2023 18:51:37 +0000 Subject: [PATCH 212/262] fix part ID generation for IP types for backward compatibility --- src/Storages/MergeTree/MergeTreePartition.cpp | 9 +++++---- .../queries/0_stateless/02530_ip_part_id.reference | 2 ++ tests/queries/0_stateless/02530_ip_part_id.sql | 14 ++++++++++++++ 3 files changed, 21 insertions(+), 4 deletions(-) create mode 100644 tests/queries/0_stateless/02530_ip_part_id.reference create mode 100644 tests/queries/0_stateless/02530_ip_part_id.sql diff --git a/src/Storages/MergeTree/MergeTreePartition.cpp b/src/Storages/MergeTree/MergeTreePartition.cpp index 0fd081a8425..5d4b4853812 100644 --- a/src/Storages/MergeTree/MergeTreePartition.cpp +++ b/src/Storages/MergeTree/MergeTreePartition.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -93,9 +94,7 @@ namespace } void operator() (const IPv6 & x) const { - UInt8 type = Field::Types::IPv6; - hash.update(type); - hash.update(x); + return operator()(String(reinterpret_cast(&x), 16)); } void operator() (const Float64 & x) const { @@ -213,7 +212,7 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const bool are_all_integral = true; for (const Field & field : value) { - if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64) + if (field.getType() != Field::Types::UInt64 && field.getType() != Field::Types::Int64 && field.getType() != Field::Types::IPv4) { are_all_integral = false; break; @@ -232,6 +231,8 @@ String MergeTreePartition::getID(const Block & partition_key_sample) const if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) result += toString(DateLUT::instance().toNumYYYYMMDD(DayNum(value[i].safeGet()))); + else if (typeid_cast(partition_key_sample.getByPosition(i).type.get())) + result += toString(value[i].get().toUnderType()); else result += applyVisitor(to_string_visitor, value[i]); diff --git a/tests/queries/0_stateless/02530_ip_part_id.reference b/tests/queries/0_stateless/02530_ip_part_id.reference new file mode 100644 index 00000000000..a13e1bafdaa --- /dev/null +++ b/tests/queries/0_stateless/02530_ip_part_id.reference @@ -0,0 +1,2 @@ +1.2.3.4 ::ffff:1.2.3.4 16909060_1_1_0 +1.2.3.4 ::ffff:1.2.3.4 1334d7cc23ffb5a5c0262304b3313426_1_1_0 diff --git a/tests/queries/0_stateless/02530_ip_part_id.sql b/tests/queries/0_stateless/02530_ip_part_id.sql new file mode 100644 index 00000000000..bf704eaa1c2 --- /dev/null +++ b/tests/queries/0_stateless/02530_ip_part_id.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS ip_part_test; + +CREATE TABLE ip_part_test ( ipv4 IPv4, ipv6 IPv6 ) ENGINE = MergeTree PARTITION BY ipv4 ORDER BY ipv4 AS SELECT '1.2.3.4', '::ffff:1.2.3.4'; + +SELECT *, _part FROM ip_part_test; + +DROP TABLE IF EXISTS ip_part_test; + +CREATE TABLE ip_part_test ( ipv4 IPv4, ipv6 IPv6 ) ENGINE = MergeTree PARTITION BY ipv6 ORDER BY ipv6 AS SELECT '1.2.3.4', '::ffff:1.2.3.4'; + +SELECT *, _part FROM ip_part_test; + +DROP TABLE IF EXISTS ip_part_test; + From 8b9d99e2e26b285f0c4545b44b09eecbae62afd8 Mon Sep 17 00:00:00 2001 From: rfraposa Date: Wed, 11 Jan 2023 11:51:53 -0700 Subject: [PATCH 213/262] Update syntax.md --- docs/en/sql-reference/syntax.md | 44 +++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 362a2cf47bb..368b7d357d0 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -128,6 +128,50 @@ Result: └────────────────────────────┘ ``` +## Defining and Using Query Parameters + +Query parameters can be defined using the syntax `param_name=value`, where `name` is the name of the parameter. Parameters can by defined using the `SET` command, or from the command-line using `--param`. + +To retrieve a query parameter, you specify the name of the parameter along with its data type surrounded by curly braces: + +```sql +{name:datatype} +``` + +For example, the following SQL defines parameters named `a`, `b`, `c` and `d` - each of a different data type: + +```sql +SET param_a = 13, param_b = 'str'; +SET param_c = '2022-08-04 18:30:53'; +SET param_d = '{\'10\': [11, 12], \'13\': [14, 15]}'; + +SELECT + {a: UInt32}, + {b: String}, + {c: DateTime}, + {d: Map(String, Array(UInt8))}; +``` + +If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is being retrieved as a `String`: + +```sql +clickhouse-client --param_message='hello' --query="SELECT {message: String}" +``` + +Result: + +```response +hello +``` + +If the query parameter represents the name of a database, table, function or other identifier, use `Identifier` for its type. For example, the following query returns rows from a table named `uk_price_paid`: + +```sql +SET param_mytablename = "uk_price_paid"; +SELECT * FROM {mytablename:Identifier}; +``` + + ## Functions Function calls are written like an identifier with a list of arguments (possibly empty) in round brackets. In contrast to standard SQL, the brackets are required, even for an empty argument list. Example: `now()`. From a389180f42a0b50ab00c99daf0b1d6d77d676690 Mon Sep 17 00:00:00 2001 From: Rich Raposa Date: Wed, 11 Jan 2023 12:05:35 -0700 Subject: [PATCH 214/262] Update syntax.md --- docs/en/sql-reference/syntax.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index 368b7d357d0..ea926d1d8c7 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -143,7 +143,7 @@ For example, the following SQL defines parameters named `a`, `b`, `c` and `d` - ```sql SET param_a = 13, param_b = 'str'; SET param_c = '2022-08-04 18:30:53'; -SET param_d = '{\'10\': [11, 12], \'13\': [14, 15]}'; +SET param_d = {'10': [11, 12], '13': [14, 15]}'; SELECT {a: UInt32}, From f8ac49bb86f69f3ffe3dac5abf03ba517383a0f9 Mon Sep 17 00:00:00 2001 From: Rich Raposa Date: Wed, 11 Jan 2023 12:09:23 -0700 Subject: [PATCH 215/262] Update syntax.md --- docs/en/sql-reference/syntax.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/en/sql-reference/syntax.md b/docs/en/sql-reference/syntax.md index ea926d1d8c7..045c9777ad7 100644 --- a/docs/en/sql-reference/syntax.md +++ b/docs/en/sql-reference/syntax.md @@ -152,6 +152,12 @@ SELECT {d: Map(String, Array(UInt8))}; ``` +Result: + +```response +13 str 2022-08-04 18:30:53 {'10':[11,12],'13':[14,15]} +``` + If you are using `clickhouse-client`, the parameters are specified as `--param_name=value`. For example, the following parameter has the name `message` and it is being retrieved as a `String`: ```sql From 1b94c839d5efc9250bdf298f2bfd0847d8fccf00 Mon Sep 17 00:00:00 2001 From: Sergei Trifonov Date: Wed, 11 Jan 2023 21:16:22 +0100 Subject: [PATCH 216/262] Add docs for `SYSTEM RELOAD USERS` --- docs/en/sql-reference/statements/system.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/en/sql-reference/statements/system.md b/docs/en/sql-reference/statements/system.md index a806cbd91a5..a82d1447453 100644 --- a/docs/en/sql-reference/statements/system.md +++ b/docs/en/sql-reference/statements/system.md @@ -111,6 +111,10 @@ This will also create system tables even if message queue is empty. Reloads ClickHouse configuration. Used when configuration is stored in ZooKeeper. +## RELOAD USERS + +Reloads all access storages, including: users.xml, local disk access storage, replicated (in ZooKeeper) access storage. Note that `SYSTEM RELOAD CONFIG` will only reload users.xml access storage. + ## SHUTDOWN Normally shuts down ClickHouse (like `service clickhouse-server stop` / `kill {$pid_clickhouse-server}`) From ca367a6a64e9f821f2d6164b97ec68318a155c25 Mon Sep 17 00:00:00 2001 From: Ilya Yatsishin <2159081+qoega@users.noreply.github.com> Date: Wed, 11 Jan 2023 22:24:01 +0100 Subject: [PATCH 217/262] Add CACHE_INVALIDATOR for sqlancer builds --- docker/test/sqlancer/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/test/sqlancer/Dockerfile b/docker/test/sqlancer/Dockerfile index 2ebc61e35a9..a2d84c7689f 100644 --- a/docker/test/sqlancer/Dockerfile +++ b/docker/test/sqlancer/Dockerfile @@ -6,6 +6,7 @@ ARG apt_archive="http://archive.ubuntu.com" RUN sed -i "s|http://archive.ubuntu.com|$apt_archive|g" /etc/apt/sources.list RUN apt-get update --yes && env DEBIAN_FRONTEND=noninteractive apt-get install wget unzip git default-jdk maven python3 --yes --no-install-recommends +ARG CACHE_INVALIDATOR=0 RUN wget https://github.com/sqlancer/sqlancer/archive/master.zip -O /sqlancer.zip RUN mkdir /sqlancer && \ cd /sqlancer && \ From 34871e6934e3b92f1ecdae82c50c092c874c15ac Mon Sep 17 00:00:00 2001 From: Alexey Milovidov Date: Thu, 12 Jan 2023 00:26:03 +0300 Subject: [PATCH 218/262] Add a checkbox for documentation --- .github/PULL_REQUEST_TEMPLATE.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d3bbefe1d65..5d09d3a9ef3 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -19,6 +19,9 @@ tests/ci/run_check.py ... ### Documentation entry for user-facing changes + +- [ ] Documentation is written (mandatory for new features) +