From 275d6b2fcc0598c74549f27fc62d673323cefa18 Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 10 Jan 2019 21:58:55 +0300 Subject: [PATCH 01/17] some QueryNormalizer refactoring --- .../Interpreters/DatabaseAndTableWithAlias.h | 1 + .../PredicateExpressionsOptimizer.cpp | 2 +- dbms/src/Interpreters/QueryNormalizer.cpp | 62 ++++++++++++------- dbms/src/Interpreters/QueryNormalizer.h | 14 ++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 31 ++++++---- 5 files changed, 65 insertions(+), 45 deletions(-) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index ad7c1f2f8d4..8076deb5ee9 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -23,6 +23,7 @@ struct DatabaseAndTableWithAlias String table; String alias; + DatabaseAndTableWithAlias() = default; DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 408b827adae..ea50ac1d6f4 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -321,7 +321,7 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); - QueryNormalizer(ast, aliases, settings, {}, {}).perform(); + QueryNormalizer(ast, aliases, settings).perform(); for (const auto & projection_column : select_query->select_expression_list->children) { diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index eff50bfb235..37d7b9221f6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -21,13 +21,10 @@ namespace ErrorCodes } -QueryNormalizer::QueryNormalizer(ASTPtr & query, const QueryNormalizer::Aliases & aliases, - ExtractedSettings && settings_, const Names & all_column_names, - const TableNamesAndColumnNames & table_names_and_column_names) - : query(query), aliases(aliases), settings(settings_), all_column_names(all_column_names), - table_names_and_column_names(table_names_and_column_names) -{ -} +QueryNormalizer::QueryNormalizer(ASTPtr & query_, const QueryNormalizer::Aliases & aliases_, ExtractedSettings && settings_, + std::vector && tables_with_columns_) + : query(query_), aliases(aliases_), settings(settings_), tables_with_columns(tables_with_columns_) +{} void QueryNormalizer::perform() { @@ -138,23 +135,42 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf else if (ASTExpressionList * expr_list = typeid_cast(ast.get())) { /// Replace *, alias.*, database.table.* with a list of columns. - ASTs & asts = expr_list->children; - for (ssize_t expr_idx = asts.size() - 1; expr_idx >= 0; --expr_idx) - { - if (typeid_cast(asts[expr_idx].get()) && !all_column_names.empty()) - { - asts.erase(asts.begin() + expr_idx); - for (size_t column_idx = 0; column_idx < all_column_names.size(); ++column_idx) - asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared(all_column_names[column_idx])); - } - else if (typeid_cast(asts[expr_idx].get()) && !table_names_and_column_names.empty()) + ASTs old_children; + if (processAsterisks()) + { + bool has_asterisk = false; + for (const auto & child : expr_list->children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + { + has_asterisk = true; + break; + } + } + + if (has_asterisk) + { + old_children.swap(expr_list->children); + expr_list->children.reserve(old_children.size()); + } + } + + for (const auto & child : old_children) + { + if (typeid_cast(child.get())) + { + for (const auto & [table_name, table_columns] : tables_with_columns) + for (const auto & column_name : table_columns) + expr_list->children.emplace_back(std::make_shared(column_name)); + } + else if (const auto * qualified_asterisk = typeid_cast(child.get())) { - const ASTQualifiedAsterisk * qualified_asterisk = static_cast(asts[expr_idx].get()); const ASTIdentifier * identifier = typeid_cast(qualified_asterisk->children[0].get()); size_t num_components = identifier->children.size(); - for (const auto & [table_name, table_all_column_names] : table_names_and_column_names) + for (const auto & [table_name, table_columns] : tables_with_columns) { if ((num_components == 2 /// database.table.* && !table_name.database.empty() /// This is normal (not a temporary) table. @@ -164,14 +180,14 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf && ((!table_name.table.empty() && identifier->name == table_name.table) /// table.* || (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.* { - asts.erase(asts.begin() + expr_idx); - for (size_t column_idx = 0; column_idx < table_all_column_names.size(); ++column_idx) - asts.insert(asts.begin() + column_idx + expr_idx, std::make_shared(table_all_column_names[column_idx])); - + for (const auto & column_name : table_columns) + expr_list->children.emplace_back(std::make_shared(column_name)); break; } } } + else + expr_list->children.emplace_back(child); } } else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast(ast.get())) diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 376b3ba6e07..a63fde8bb73 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -18,10 +18,6 @@ inline bool functionIsInOrGlobalInOperator(const String & name) } -using TableNameAndColumnNames = std::pair; -using TableNamesAndColumnNames = std::vector; - - class QueryNormalizer { /// Extracts settings, mostly to show which are used and which are not. @@ -41,9 +37,10 @@ class QueryNormalizer public: using Aliases = std::unordered_map; + using TableWithColumnNames = std::pair; - QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, const Names & all_columns_name, - const TableNamesAndColumnNames & table_names_and_column_names); + QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, + std::vector && tables_with_columns = {}); void perform(); @@ -54,8 +51,9 @@ private: ASTPtr & query; const Aliases & aliases; const ExtractedSettings settings; - const Names & all_column_names; - const TableNamesAndColumnNames & table_names_and_column_names; + const std::vector tables_with_columns; + + bool processAsterisks() const { return !tables_with_columns.empty(); } void performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level); }; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 364cf221f35..0dc9bd3670e 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -42,9 +42,9 @@ namespace ErrorCodes extern const int INVALID_JOIN_ON_EXPRESSION; } -void removeDuplicateColumns(NamesAndTypesList & columns) +NameSet removeDuplicateColumns(NamesAndTypesList & columns) { - std::set names; + NameSet names; for (auto it = columns.begin(); it != columns.end();) { if (names.emplace(it->name).second) @@ -52,6 +52,7 @@ void removeDuplicateColumns(NamesAndTypesList & columns) else columns.erase(it++); } + return names; } namespace @@ -77,8 +78,6 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam source_columns.insert(source_columns.end(), storage_aliases.begin(), storage_aliases.end()); } } - - removeDuplicateColumns(source_columns); } /// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. @@ -102,12 +101,11 @@ void normalizeTree( SyntaxAnalyzerResult & result, const Names & source_columns, const NameSet & source_columns_set, - const StoragePtr & storage, const Context & context, const ASTSelectQuery * select_query, bool asterisk_left_columns_only) { - Names all_columns_name = storage ? storage->getColumns().ordinary.getNames() : source_columns; + Names all_columns_name = source_columns; if (!asterisk_left_columns_only) { @@ -119,17 +117,20 @@ void normalizeTree( if (all_columns_name.empty()) throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); - TableNamesAndColumnNames table_names_and_column_names; + std::vector table_with_columns; if (select_query && select_query->tables && !select_query->tables->children.empty()) { std::vector tables_expression = getSelectTablesExpression(*select_query); bool first = true; + String current_database = context.getCurrentDatabase(); for (const auto * table_expression : tables_expression) { - DatabaseAndTableWithAlias table_name(*table_expression, context.getCurrentDatabase()); + DatabaseAndTableWithAlias table_name(*table_expression, current_database); NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); + removeDuplicateColumns(names_and_types); + if (!first) { /// For joined tables qualify duplicating names. @@ -140,12 +141,13 @@ void normalizeTree( first = false; - table_names_and_column_names.emplace_back(std::pair(table_name, names_and_types.getNames())); + table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); } } + else + table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - auto & settings = context.getSettingsRef(); - QueryNormalizer(query, result.aliases, settings, all_columns_name, table_names_and_column_names).perform(); + QueryNormalizer(query, result.aliases, context.getSettingsRef(), std::move(table_with_columns)).perform(); } bool hasArrayJoin(const ASTPtr & ast) @@ -739,6 +741,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( result.source_columns = source_columns_; collectSourceColumns(select_query, result.storage, result.source_columns); + NameSet source_columns_set = removeDuplicateColumns(result.source_columns); const auto & settings = context.getSettingsRef(); @@ -746,7 +749,9 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( source_columns_list.reserve(result.source_columns.size()); for (const auto & type_name : result.source_columns) source_columns_list.emplace_back(type_name.name); - NameSet source_columns_set(source_columns_list.begin(), source_columns_list.end()); + + if (source_columns_set.size() != source_columns_list.size()) + throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR); if (select_query) { @@ -768,7 +773,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( } /// Common subexpression elimination. Rewrite rules. - normalizeTree(query, result, source_columns_list, source_columns_set, result.storage, + normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set, context, select_query, settings.asterisk_left_columns_only != 0); /// Remove unneeded columns according to 'required_result_columns'. From 0084785898118fcf288fdffc562924d394e5e1e0 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 00:05:01 +0300 Subject: [PATCH 02/17] fix build --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 --- dbms/src/Interpreters/SyntaxAnalyzer.h | 2 ++ 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 4e89cabb9d5..52a6c8a5e17 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -78,9 +78,6 @@ namespace ErrorCodes extern const int EXPECTED_ALL_OR_ANY; } -/// From SyntaxAnalyzer.cpp -extern void removeDuplicateColumns(NamesAndTypesList & columns); - ExpressionAnalyzer::ExpressionAnalyzer( const ASTPtr & query_, const SyntaxAnalyzerResultPtr & syntax_analyzer_result_, diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.h b/dbms/src/Interpreters/SyntaxAnalyzer.h index 5500823b3c2..54ca4dfcf2b 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.h +++ b/dbms/src/Interpreters/SyntaxAnalyzer.h @@ -8,6 +8,8 @@ namespace DB class IStorage; using StoragePtr = std::shared_ptr; +NameSet removeDuplicateColumns(NamesAndTypesList & columns); + struct SyntaxAnalyzerResult { StoragePtr storage; From 861c225c1e95a966e21538816824a033821978c8 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 01:04:37 +0300 Subject: [PATCH 03/17] one another build fix --- dbms/src/Interpreters/QueryNormalizer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 37d7b9221f6..c2e614753fb 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -161,8 +161,8 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf { if (typeid_cast(child.get())) { - for (const auto & [table_name, table_columns] : tables_with_columns) - for (const auto & column_name : table_columns) + for (const auto & pr : tables_with_columns) + for (const auto & column_name : pr.second) expr_list->children.emplace_back(std::make_shared(column_name)); } else if (const auto * qualified_asterisk = typeid_cast(child.get())) From c53854125f275a7867070a3154cf24732bf5ff15 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 17:09:23 +0300 Subject: [PATCH 04/17] QueryNormalizer with visitor interface --- .../PredicateExpressionsOptimizer.cpp | 5 +- dbms/src/Interpreters/QueryNormalizer.cpp | 100 ++++++++++++------ dbms/src/Interpreters/QueryNormalizer.h | 47 +++++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 3 +- 4 files changed, 107 insertions(+), 48 deletions(-) diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index ea50ac1d6f4..d70ca0edd07 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -319,9 +319,12 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); + QueryAliasesVisitor::Data query_aliases_data{aliases}; QueryAliasesVisitor(query_aliases_data).visit(ast); - QueryNormalizer(ast, aliases, settings).perform(); + + QueryNormalizer::Data normalizer_data(aliases, settings); + QueryNormalizer(normalizer_data).visit(ast); for (const auto & projection_column : select_query->select_expression_list->children) { diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index c2e614753fb..ea6312f84e4 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -20,36 +20,56 @@ namespace ErrorCodes extern const int CYCLIC_ALIASES; } - -QueryNormalizer::QueryNormalizer(ASTPtr & query_, const QueryNormalizer::Aliases & aliases_, ExtractedSettings && settings_, - std::vector && tables_with_columns_) - : query(query_), aliases(aliases_), settings(settings_), tables_with_columns(tables_with_columns_) -{} - -void QueryNormalizer::perform() +class CheckASTDepth { - SetOfASTs tmp_set; - MapOfASTs tmp_map; - performImpl(query, tmp_map, tmp_set, "", 0); - - try +public: + CheckASTDepth(QueryNormalizer::Data & data_) + : data(data_) { - query->checkSize(settings.max_expanded_ast_elements); + if (data.level > data.settings.max_ast_depth) + throw Exception("Normalized AST is too deep. Maximum: " + toString(data.settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST); + ++data.level; } - catch (Exception & e) - { - e.addMessage("(after expansion of aliases)"); - throw; - } -} -/// finished_asts - already processed vertices (and by what they replaced) -/// current_asts - vertices in the current call stack of this method -/// current_alias - the alias referencing to the ancestor of ast (the deepest ancestor with aliases) -void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level) + ~CheckASTDepth() + { + --data.level; + } + +private: + QueryNormalizer::Data & data; +}; + + +class RestoreAliasOnExitScope { - if (level > settings.max_ast_depth) - throw Exception("Normalized AST is too deep. Maximum: " + toString(settings.max_ast_depth), ErrorCodes::TOO_DEEP_AST); +public: + RestoreAliasOnExitScope(String & alias_) + : alias(alias_) + , copy(alias_) + {} + + ~RestoreAliasOnExitScope() + { + alias = copy; + } + +private: + String & alias; + const String copy; +}; + + +void QueryNormalizer::visit(ASTPtr & ast, Data & data) +{ + CheckASTDepth scope1(data); + RestoreAliasOnExitScope scope2(data.current_alias); + + auto & aliases = data.aliases; + auto & tables_with_columns = data.tables_with_columns; + auto & finished_asts = data.finished_asts; + auto & current_asts = data.current_asts; + String & current_alias = data.current_alias; if (finished_asts.count(ast)) { @@ -87,7 +107,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf /// will be sent to remote servers during distributed query execution, /// and on all remote servers, function implementation will be same. if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct") - func_node->name = settings.count_distinct_implementation; + func_node->name = data.settings.count_distinct_implementation; /// As special case, treat count(*) as count(), not as count(list of all columns). if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1 @@ -137,7 +157,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf /// Replace *, alias.*, database.table.* with a list of columns. ASTs old_children; - if (processAsterisks()) + if (data.processAsterisks()) { bool has_asterisk = false; for (const auto & child : expr_list->children) @@ -206,7 +226,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias. if (replaced) { - performImpl(ast, finished_asts, current_asts, current_alias, level + 1); + visit(ast, data); current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; @@ -227,7 +247,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf if (typeid_cast(child.get()) || typeid_cast(child.get())) continue; - performImpl(child, finished_asts, current_asts, current_alias, level + 1); + visit(child, data); } } else if (identifier_node) @@ -240,7 +260,7 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf if (typeid_cast(child.get()) || typeid_cast(child.get())) continue; - performImpl(child, finished_asts, current_asts, current_alias, level + 1); + visit(child, data); } } @@ -248,16 +268,30 @@ void QueryNormalizer::performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOf if (ASTSelectQuery * select = typeid_cast(ast.get())) { if (select->prewhere_expression) - performImpl(select->prewhere_expression, finished_asts, current_asts, current_alias, level + 1); + visit(select->prewhere_expression, data); if (select->where_expression) - performImpl(select->where_expression, finished_asts, current_asts, current_alias, level + 1); + visit(select->where_expression, data); if (select->having_expression) - performImpl(select->having_expression, finished_asts, current_asts, current_alias, level + 1); + visit(select->having_expression, data); } current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; + + /// @note can not place it in CheckASTDepth dror cause of throw. + if (data.level == 1) + { + try + { + ast->checkSize(data.settings.max_expanded_ast_elements); + } + catch (Exception & e) + { + e.addMessage("(after expansion of aliases)"); + throw; + } + } } } diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index a63fde8bb73..55d33931d2f 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -39,23 +39,44 @@ public: using Aliases = std::unordered_map; using TableWithColumnNames = std::pair; - QueryNormalizer(ASTPtr & query, const Aliases & aliases, ExtractedSettings && settings, - std::vector && tables_with_columns = {}); + struct Data + { + using SetOfASTs = std::set; + using MapOfASTs = std::map; - void perform(); + const Aliases & aliases; + const ExtractedSettings settings; + const std::vector tables_with_columns; + + /// tmp data + size_t level; + MapOfASTs finished_asts; /// already processed vertices (and by what they replaced) + SetOfASTs current_asts; /// vertices in the current call stack of this method + std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases) + + Data(const Aliases & aliases_, ExtractedSettings && settings_, std::vector && tables_with_columns_ = {}) + : aliases(aliases_) + , settings(settings_) + , tables_with_columns(tables_with_columns_) + , level(0) + {} + + bool processAsterisks() const { return !tables_with_columns.empty(); } + }; + + QueryNormalizer(Data & data) + : visitor_data(data) + {} + + void visit(ASTPtr & ast) + { + visit(ast, visitor_data); + } private: - using SetOfASTs = std::set; - using MapOfASTs = std::map; + Data & visitor_data; - ASTPtr & query; - const Aliases & aliases; - const ExtractedSettings settings; - const std::vector tables_with_columns; - - bool processAsterisks() const { return !tables_with_columns.empty(); } - - void performImpl(ASTPtr & ast, MapOfASTs & finished_asts, SetOfASTs & current_asts, std::string current_alias, size_t level); + void visit(ASTPtr & query, Data & data); }; } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 0dc9bd3670e..5b40200c019 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -147,7 +147,8 @@ void normalizeTree( else table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - QueryNormalizer(query, result.aliases, context.getSettingsRef(), std::move(table_with_columns)).perform(); + QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef(), std::move(table_with_columns)); + QueryNormalizer(normalizer_data).visit(query); } bool hasArrayJoin(const ASTPtr & ast) From e098348aa4ca0d29f7b0f29459d37e3c54bf6752 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 11 Jan 2019 20:14:17 +0300 Subject: [PATCH 05/17] more QueryNormalizer refactoring: split visit function --- dbms/src/Interpreters/QueryNormalizer.cpp | 407 ++++++++++++---------- dbms/src/Interpreters/QueryNormalizer.h | 16 +- 2 files changed, 231 insertions(+), 192 deletions(-) diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index ea6312f84e4..328b3f6bf6a 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -60,16 +60,213 @@ private: }; +void QueryNormalizer::visit(ASTFunction & node, const ASTPtr &, Data & data) +{ + auto & aliases = data.aliases; + String & func_name = node.name; + ASTPtr & func_arguments = node.arguments; + + /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. + if (functionIsInOrGlobalInOperator(func_name)) + if (ASTIdentifier * right = typeid_cast(func_arguments->children.at(1).get())) + if (!aliases.count(right->name)) + right->setSpecial(); + + /// Special cases for count function. + String func_name_lowercase = Poco::toLower(func_name); + if (startsWith(func_name_lowercase, "count")) + { + /// Select implementation of countDistinct based on settings. + /// Important that it is done as query rewrite. It means rewritten query + /// will be sent to remote servers during distributed query execution, + /// and on all remote servers, function implementation will be same. + if (endsWith(func_name, "Distinct") && func_name_lowercase == "countdistinct") + func_name = data.settings.count_distinct_implementation; + + /// As special case, treat count(*) as count(), not as count(list of all columns). + if (func_name_lowercase == "count" && func_arguments->children.size() == 1 + && typeid_cast(func_arguments->children[0].get())) + { + func_arguments->children.clear(); + } + } +} + +void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) +{ + auto & current_asts = data.current_asts; + String & current_alias = data.current_alias; + + if (!node.general()) + return; + + /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). + auto it_alias = data.aliases.find(node.name); + if (it_alias != data.aliases.end() && current_alias != node.name) + { + auto & alias_node = it_alias->second; + + /// Let's replace it with the corresponding tree node. + if (current_asts.count(alias_node.get())) + throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); + + String my_alias = ast->tryGetAlias(); + if (!my_alias.empty() && my_alias != alias_node->getAliasOrColumnName()) + { + /// Avoid infinite recursion here + auto replace_to_identifier = typeid_cast(alias_node.get()); + bool is_cycle = replace_to_identifier && replace_to_identifier->general() + && replace_to_identifier->name == node.name; + + if (!is_cycle) + { + /// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a. + ast = alias_node->clone(); + ast->setAlias(my_alias); + } + } + else + ast = alias_node; + } +} + +/// Replace *, alias.*, database.table.* with a list of columns. +void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data) +{ + auto & tables_with_columns = data.tables_with_columns; + + ASTs old_children; + if (data.processAsterisks()) + { + bool has_asterisk = false; + for (const auto & child : node.children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + { + has_asterisk = true; + break; + } + } + + if (has_asterisk) + { + old_children.swap(node.children); + node.children.reserve(old_children.size()); + } + } + + for (const auto & child : old_children) + { + if (typeid_cast(child.get())) + { + for (const auto & pr : tables_with_columns) + for (const auto & column_name : pr.second) + node.children.emplace_back(std::make_shared(column_name)); + } + else if (const auto * qualified_asterisk = typeid_cast(child.get())) + { + const ASTIdentifier * identifier = typeid_cast(qualified_asterisk->children[0].get()); + size_t num_components = identifier->children.size(); + + for (const auto & [table_name, table_columns] : tables_with_columns) + { + if ((num_components == 2 /// database.table.* + && !table_name.database.empty() /// This is normal (not a temporary) table. + && static_cast(*identifier->children[0]).name == table_name.database + && static_cast(*identifier->children[1]).name == table_name.table) + || (num_components == 0 /// t.* + && ((!table_name.table.empty() && identifier->name == table_name.table) /// table.* + || (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.* + { + for (const auto & column_name : table_columns) + node.children.emplace_back(std::make_shared(column_name)); + break; + } + } + } + else + node.children.emplace_back(child); + } +} + +/// mark table identifiers as 'not columns' +void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr &, Data &) +{ + if (node.table_expression) + { + auto & database_and_table_name = static_cast(*node.table_expression).database_and_table_name; + if (database_and_table_name) + if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) + right->setSpecial(); + } +} + +/// special visitChildren() for ASTSelectQuery +void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data) +{ + for (auto & child : ast->children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + continue; + + visit(child, data); + } + + /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, + /// but also in where_expression and having_expression. + if (select.prewhere_expression) + visit(select.prewhere_expression, data); + if (select.where_expression) + visit(select.where_expression, data); + if (select.having_expression) + visit(select.having_expression, data); +} + +/// Don't go into subqueries. +/// Don't go into components of compound identifiers. +/// Don't go into select query. It processes children itself. +/// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters +/// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]). +void QueryNormalizer::visitChildren(const ASTPtr & node, Data & data) +{ + ASTFunction * func_node = typeid_cast(node.get()); + if (func_node && func_node->name == "lambda") + { + /// We skip the first argument. We also assume that the lambda function can not have parameters. + for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i) + { + auto & child = func_node->arguments->children[i]; + + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + continue; + + visit(child, data); + } + } + else if (!typeid_cast(node.get()) && + !typeid_cast(node.get())) + { + for (auto & child : node->children) + { + if (typeid_cast(child.get()) || + typeid_cast(child.get())) + continue; + + visit(child, data); + } + } +} + void QueryNormalizer::visit(ASTPtr & ast, Data & data) { CheckASTDepth scope1(data); RestoreAliasOnExitScope scope2(data.current_alias); - auto & aliases = data.aliases; - auto & tables_with_columns = data.tables_with_columns; auto & finished_asts = data.finished_asts; auto & current_asts = data.current_asts; - String & current_alias = data.current_alias; if (finished_asts.count(ast)) { @@ -80,206 +277,34 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) ASTPtr initial_ast = ast; current_asts.insert(initial_ast.get()); - String my_alias = ast->tryGetAlias(); - if (!my_alias.empty()) - current_alias = my_alias; - - /// rewrite rules that act when you go from top to bottom. - bool replaced = false; - - ASTIdentifier * identifier_node = nullptr; - ASTFunction * func_node = nullptr; - - if ((func_node = typeid_cast(ast.get()))) { - /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. - if (functionIsInOrGlobalInOperator(func_node->name)) - if (ASTIdentifier * right = typeid_cast(func_node->arguments->children.at(1).get())) - if (!aliases.count(right->name)) - right->setSpecial(); - - /// Special cases for count function. - String func_name_lowercase = Poco::toLower(func_node->name); - if (startsWith(func_name_lowercase, "count")) - { - /// Select implementation of countDistinct based on settings. - /// Important that it is done as query rewrite. It means rewritten query - /// will be sent to remote servers during distributed query execution, - /// and on all remote servers, function implementation will be same. - if (endsWith(func_node->name, "Distinct") && func_name_lowercase == "countdistinct") - func_node->name = data.settings.count_distinct_implementation; - - /// As special case, treat count(*) as count(), not as count(list of all columns). - if (func_name_lowercase == "count" && func_node->arguments->children.size() == 1 - && typeid_cast(func_node->arguments->children[0].get())) - { - func_node->arguments->children.clear(); - } - } + String my_alias = ast->tryGetAlias(); + if (!my_alias.empty()) + data.current_alias = my_alias; } - else if ((identifier_node = typeid_cast(ast.get()))) - { - if (identifier_node->general()) - { - /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). - auto it_alias = aliases.find(identifier_node->name); - if (it_alias != aliases.end() && current_alias != identifier_node->name) - { - /// Let's replace it with the corresponding tree node. - if (current_asts.count(it_alias->second.get())) - throw Exception("Cyclic aliases", ErrorCodes::CYCLIC_ALIASES); - if (!my_alias.empty() && my_alias != it_alias->second->getAliasOrColumnName()) - { - /// Avoid infinite recursion here - auto replace_to_identifier = typeid_cast(it_alias->second.get()); - bool is_cycle = replace_to_identifier && replace_to_identifier->general() - && replace_to_identifier->name == identifier_node->name; - - if (!is_cycle) - { - /// In a construct like "a AS b", where a is an alias, you must set alias b to the result of substituting alias a. - ast = it_alias->second->clone(); - ast->setAlias(my_alias); - replaced = true; - } - } - else - { - ast = it_alias->second; - replaced = true; - } - } - } - } - else if (ASTExpressionList * expr_list = typeid_cast(ast.get())) - { - /// Replace *, alias.*, database.table.* with a list of columns. - - ASTs old_children; - if (data.processAsterisks()) - { - bool has_asterisk = false; - for (const auto & child : expr_list->children) - { - if (typeid_cast(child.get()) || - typeid_cast(child.get())) - { - has_asterisk = true; - break; - } - } - - if (has_asterisk) - { - old_children.swap(expr_list->children); - expr_list->children.reserve(old_children.size()); - } - } - - for (const auto & child : old_children) - { - if (typeid_cast(child.get())) - { - for (const auto & pr : tables_with_columns) - for (const auto & column_name : pr.second) - expr_list->children.emplace_back(std::make_shared(column_name)); - } - else if (const auto * qualified_asterisk = typeid_cast(child.get())) - { - const ASTIdentifier * identifier = typeid_cast(qualified_asterisk->children[0].get()); - size_t num_components = identifier->children.size(); - - for (const auto & [table_name, table_columns] : tables_with_columns) - { - if ((num_components == 2 /// database.table.* - && !table_name.database.empty() /// This is normal (not a temporary) table. - && static_cast(*identifier->children[0]).name == table_name.database - && static_cast(*identifier->children[1]).name == table_name.table) - || (num_components == 0 /// t.* - && ((!table_name.table.empty() && identifier->name == table_name.table) /// table.* - || (!table_name.alias.empty() && identifier->name == table_name.alias)))) /// alias.* - { - for (const auto & column_name : table_columns) - expr_list->children.emplace_back(std::make_shared(column_name)); - break; - } - } - } - else - expr_list->children.emplace_back(child); - } - } - else if (ASTTablesInSelectQueryElement * tables_elem = typeid_cast(ast.get())) - { - if (tables_elem->table_expression) - { - auto & database_and_table_name = static_cast(*tables_elem->table_expression).database_and_table_name; - if (database_and_table_name) - { - if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) - right->setSpecial(); - } - } - } + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); + if (auto * node = typeid_cast(ast.get())) + visit(*node, ast, data); /// If we replace the root of the subtree, we will be called again for the new root, in case the alias is replaced by an alias. - if (replaced) - { + if (ast.get() != initial_ast.get()) visit(ast, data); - current_asts.erase(initial_ast.get()); - current_asts.erase(ast.get()); - finished_asts[initial_ast] = ast; - return; - } - - /// Recurring calls. Don't go into subqueries. Don't go into components of compound identifiers. - /// We also do not go to the left argument of lambda expressions, so as not to replace the formal parameters - /// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]). - - if (func_node && func_node->name == "lambda") - { - /// We skip the first argument. We also assume that the lambda function can not have parameters. - for (size_t i = 1, size = func_node->arguments->children.size(); i < size; ++i) - { - auto & child = func_node->arguments->children[i]; - - if (typeid_cast(child.get()) || typeid_cast(child.get())) - continue; - - visit(child, data); - } - } - else if (identifier_node) - { - } else - { - for (auto & child : ast->children) - { - if (typeid_cast(child.get()) || typeid_cast(child.get())) - continue; - - visit(child, data); - } - } - - /// If the WHERE clause or HAVING consists of a single alias, the reference must be replaced not only in children, but also in where_expression and having_expression. - if (ASTSelectQuery * select = typeid_cast(ast.get())) - { - if (select->prewhere_expression) - visit(select->prewhere_expression, data); - if (select->where_expression) - visit(select->where_expression, data); - if (select->having_expression) - visit(select->having_expression, data); - } + visitChildren(ast, data); current_asts.erase(initial_ast.get()); current_asts.erase(ast.get()); finished_asts[initial_ast] = ast; - /// @note can not place it in CheckASTDepth dror cause of throw. + /// @note can not place it in CheckASTDepth dtor cause of exception. if (data.level == 1) { try diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 55d33931d2f..3e55e0253e6 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -18,6 +18,12 @@ inline bool functionIsInOrGlobalInOperator(const String & name) } +class ASTFunction; +class ASTIdentifier; +class ASTExpressionList; +struct ASTTablesInSelectQueryElement; + + class QueryNormalizer { /// Extracts settings, mostly to show which are used and which are not. @@ -76,7 +82,15 @@ public: private: Data & visitor_data; - void visit(ASTPtr & query, Data & data); + static void visit(ASTPtr & query, Data & data); + + static void visit(ASTIdentifier &, ASTPtr &, Data &); + static void visit(ASTFunction &, const ASTPtr &, Data &); + static void visit(ASTExpressionList &, const ASTPtr &, Data &); + static void visit(ASTTablesInSelectQueryElement &, const ASTPtr &, Data &); + static void visit(ASTSelectQuery &, const ASTPtr &, Data &); + + static void visitChildren(const ASTPtr &, Data & data); }; } From 9c35598373cd17755b52768be4ce75abd4851bea Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 27 Dec 2018 16:27:01 +0300 Subject: [PATCH 06/17] Add cppkafka to contrib --- .gitmodules | 3 + cmake/find_rdkafka.cmake | 4 +- contrib/CMakeLists.txt | 1 + contrib/cppkafka | 1 + contrib/cppkafka-cmake/CMakeLists.txt | 31 ++++++ contrib/librdkafka-cmake/CMakeLists.txt | 104 +++++++++--------- .../include/librdkafka/rdkafka.h | 5 + dbms/CMakeLists.txt | 1 + dbms/src/Storages/Kafka/StorageKafka.cpp | 36 +++--- dbms/src/Storages/Kafka/StorageKafka.h | 12 +- 10 files changed, 120 insertions(+), 78 deletions(-) create mode 160000 contrib/cppkafka create mode 100644 contrib/cppkafka-cmake/CMakeLists.txt create mode 100644 contrib/librdkafka-cmake/include/librdkafka/rdkafka.h diff --git a/.gitmodules b/.gitmodules index 923554a1532..24211b6707e 100644 --- a/.gitmodules +++ b/.gitmodules @@ -61,3 +61,6 @@ [submodule "contrib/libgsasl"] path = contrib/libgsasl url = https://github.com/ClickHouse-Extras/libgsasl.git +[submodule "contrib/cppkafka"] + path = contrib/cppkafka + url = https://github.com/mfontanini/cppkafka.git diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index 9ba48cadfcd..b0a0a98b382 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -25,6 +25,7 @@ endif () if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR) set (USE_RDKAFKA 1) set (RDKAFKA_LIBRARY ${RDKAFKA_LIB} ${OPENSSL_LIBRARIES}) + set (CPPKAFKA_LIBRARY cppkafka) if (SASL2_LIBRARY) list (APPEND RDKAFKA_LIBRARY ${SASL2_LIBRARY}) endif () @@ -35,9 +36,10 @@ elseif (NOT MISSING_INTERNAL_RDKAFKA_LIBRARY AND NOT ARCH_ARM) set (USE_INTERNAL_RDKAFKA_LIBRARY 1) set (RDKAFKA_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/librdkafka/src") set (RDKAFKA_LIBRARY rdkafka) + set (CPPKAFKA_LIBRARY cppkafka) set (USE_RDKAFKA 1) endif () endif () -message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY}") +message (STATUS "Using librdkafka=${USE_RDKAFKA}: ${RDKAFKA_INCLUDE_DIR} : ${RDKAFKA_LIBRARY} ${CPPKAFKA_LIBRARY}") diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 9d964f288d8..25ad30e02eb 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -121,6 +121,7 @@ endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) add_subdirectory (librdkafka-cmake) + add_subdirectory (cppkafka-cmake) target_include_directories(rdkafka BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) endif () diff --git a/contrib/cppkafka b/contrib/cppkafka new file mode 160000 index 00000000000..520465510ef --- /dev/null +++ b/contrib/cppkafka @@ -0,0 +1 @@ +Subproject commit 520465510efef7704346cf8d140967c4abb057c1 diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt new file mode 100644 index 00000000000..f62fa471172 --- /dev/null +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -0,0 +1,31 @@ +set(CPPKAFKA_DIR ${CMAKE_SOURCE_DIR}/contrib/cppkafka) + +set(SRCS + ${CPPKAFKA_DIR}/src/configuration.cpp + ${CPPKAFKA_DIR}/src/topic_configuration.cpp + ${CPPKAFKA_DIR}/src/configuration_option.cpp + ${CPPKAFKA_DIR}/src/exceptions.cpp + ${CPPKAFKA_DIR}/src/topic.cpp + ${CPPKAFKA_DIR}/src/buffer.cpp + ${CPPKAFKA_DIR}/src/queue.cpp + ${CPPKAFKA_DIR}/src/message.cpp + ${CPPKAFKA_DIR}/src/message_timestamp.cpp + ${CPPKAFKA_DIR}/src/message_internal.cpp + ${CPPKAFKA_DIR}/src/topic_partition.cpp + ${CPPKAFKA_DIR}/src/topic_partition_list.cpp + ${CPPKAFKA_DIR}/src/metadata.cpp + ${CPPKAFKA_DIR}/src/group_information.cpp + ${CPPKAFKA_DIR}/src/error.cpp + ${CPPKAFKA_DIR}/src/event.cpp + + ${CPPKAFKA_DIR}/src/kafka_handle_base.cpp + ${CPPKAFKA_DIR}/src/producer.cpp + ${CPPKAFKA_DIR}/src/consumer.cpp +) + +add_library(cppkafka ${LINK_MODE} ${SRCS}) + +target_link_libraries(cppkafka ${RDKAFKA_LIBRARY}) +target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka) +target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS}) +target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include) diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 90421cfb31d..3b35634dabc 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -1,60 +1,60 @@ set(RDKAFKA_SOURCE_DIR ${CMAKE_SOURCE_DIR}/contrib/librdkafka/src) set(SRCS -${RDKAFKA_SOURCE_DIR}/crc32c.c -${RDKAFKA_SOURCE_DIR}/rdaddr.c -${RDKAFKA_SOURCE_DIR}/rdavl.c -${RDKAFKA_SOURCE_DIR}/rdbuf.c -${RDKAFKA_SOURCE_DIR}/rdcrc32.c -${RDKAFKA_SOURCE_DIR}/rdkafka.c -${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c -${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c -${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c -${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c -${RDKAFKA_SOURCE_DIR}/rdkafka_event.c -${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c -${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c -${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c -${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c -${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c -${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c -${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c -${RDKAFKA_SOURCE_DIR}/rdkafka_offset.c -${RDKAFKA_SOURCE_DIR}/rdkafka_op.c -${RDKAFKA_SOURCE_DIR}/rdkafka_partition.c -${RDKAFKA_SOURCE_DIR}/rdkafka_pattern.c -${RDKAFKA_SOURCE_DIR}/rdkafka_queue.c -${RDKAFKA_SOURCE_DIR}/rdkafka_range_assignor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_request.c -${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c -${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c -${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c -${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c -${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c -${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c -${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c -${RDKAFKA_SOURCE_DIR}/rdkafka_header.c -${RDKAFKA_SOURCE_DIR}/rdlist.c -${RDKAFKA_SOURCE_DIR}/rdlog.c -${RDKAFKA_SOURCE_DIR}/rdmurmur2.c -${RDKAFKA_SOURCE_DIR}/rdports.c -${RDKAFKA_SOURCE_DIR}/rdrand.c -${RDKAFKA_SOURCE_DIR}/rdregex.c -${RDKAFKA_SOURCE_DIR}/rdstring.c -${RDKAFKA_SOURCE_DIR}/rdunittest.c -${RDKAFKA_SOURCE_DIR}/rdvarint.c -${RDKAFKA_SOURCE_DIR}/snappy.c -${RDKAFKA_SOURCE_DIR}/tinycthread.c -${RDKAFKA_SOURCE_DIR}/xxhash.c -${RDKAFKA_SOURCE_DIR}/lz4.c -${RDKAFKA_SOURCE_DIR}/lz4frame.c -${RDKAFKA_SOURCE_DIR}/lz4hc.c -${RDKAFKA_SOURCE_DIR}/rdgz.c + ${RDKAFKA_SOURCE_DIR}/crc32c.c + ${RDKAFKA_SOURCE_DIR}/rdaddr.c + ${RDKAFKA_SOURCE_DIR}/rdavl.c + ${RDKAFKA_SOURCE_DIR}/rdbuf.c + ${RDKAFKA_SOURCE_DIR}/rdcrc32.c + ${RDKAFKA_SOURCE_DIR}/rdkafka.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_event.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_msg.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_reader.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_msgset_writer.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_offset.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_op.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_partition.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_pattern.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_queue.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_range_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_request.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_roundrobin_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_sasl_plain.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_subscription.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_timer.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_topic.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_transport.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_interceptor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_header.c + ${RDKAFKA_SOURCE_DIR}/rdlist.c + ${RDKAFKA_SOURCE_DIR}/rdlog.c + ${RDKAFKA_SOURCE_DIR}/rdmurmur2.c + ${RDKAFKA_SOURCE_DIR}/rdports.c + ${RDKAFKA_SOURCE_DIR}/rdrand.c + ${RDKAFKA_SOURCE_DIR}/rdregex.c + ${RDKAFKA_SOURCE_DIR}/rdstring.c + ${RDKAFKA_SOURCE_DIR}/rdunittest.c + ${RDKAFKA_SOURCE_DIR}/rdvarint.c + ${RDKAFKA_SOURCE_DIR}/snappy.c + ${RDKAFKA_SOURCE_DIR}/tinycthread.c + ${RDKAFKA_SOURCE_DIR}/xxhash.c + ${RDKAFKA_SOURCE_DIR}/lz4.c + ${RDKAFKA_SOURCE_DIR}/lz4frame.c + ${RDKAFKA_SOURCE_DIR}/lz4hc.c + ${RDKAFKA_SOURCE_DIR}/rdgz.c ) add_library(rdkafka ${LINK_MODE} ${SRCS}) -target_include_directories(rdkafka PRIVATE include) +target_include_directories(rdkafka SYSTEM PUBLIC include) target_include_directories(rdkafka SYSTEM PUBLIC ${RDKAFKA_SOURCE_DIR}) target_link_libraries(rdkafka PUBLIC ${ZLIB_LIBRARIES} ${OPENSSL_SSL_LIBRARY} ${OPENSSL_CRYPTO_LIBRARY}) diff --git a/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h b/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h new file mode 100644 index 00000000000..3387659281a --- /dev/null +++ b/contrib/librdkafka-cmake/include/librdkafka/rdkafka.h @@ -0,0 +1,5 @@ +#if __has_include() // maybe bundled +# include_next // Y_IGNORE +#else // system +# include_next +#endif diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt index 7e39fd2f7af..84099810164 100644 --- a/dbms/CMakeLists.txt +++ b/dbms/CMakeLists.txt @@ -287,6 +287,7 @@ endif () if (USE_RDKAFKA) target_link_libraries (dbms PRIVATE ${RDKAFKA_LIBRARY}) + target_link_libraries (dbms PRIVATE ${CPPKAFKA_LIBRARY}) if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) target_include_directories (dbms SYSTEM BEFORE PRIVATE ${RDKAFKA_INCLUDE_DIR}) endif () diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index e6ccf544ba1..f855ea7e877 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -1,39 +1,35 @@ -#include -#include +#include + #if USE_RDKAFKA -#include -#include -#include -#include -#include -#include -#include -#include -#include #include #include #include #include +#include +#include #include #include #include +#include #include #include #include -#include #include -#include -#include #include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include -#if __has_include() // maybe bundled -#include // Y_IGNORE -#else // system -#include -#endif +#include namespace DB diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index 561349ac474..e7cce510166 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -1,16 +1,18 @@ #pragma once + #include + #if USE_RDKAFKA -#include - -#include -#include #include -#include +#include #include +#include #include #include +#include + +#include struct rd_kafka_s; struct rd_kafka_conf_s; From 337c092c7e152ccc1bc80ea006412c1b878b8bb5 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Thu, 27 Dec 2018 17:18:20 +0300 Subject: [PATCH 07/17] Use cppkafka instead of raw C interface --- dbms/src/Storages/Kafka/StorageKafka.cpp | 173 +++++------------------ dbms/src/Storages/Kafka/StorageKafka.h | 24 +--- 2 files changed, 42 insertions(+), 155 deletions(-) diff --git a/dbms/src/Storages/Kafka/StorageKafka.cpp b/dbms/src/Storages/Kafka/StorageKafka.cpp index f855ea7e877..063fff81e2d 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.cpp +++ b/dbms/src/Storages/Kafka/StorageKafka.cpp @@ -29,8 +29,6 @@ #include #include -#include - namespace DB { @@ -58,8 +56,8 @@ static const String CONFIG_PREFIX = "kafka"; class ReadBufferFromKafkaConsumer : public ReadBuffer { - rd_kafka_t * consumer; - rd_kafka_message_t * current = nullptr; + ConsumerPtr consumer; + cppkafka::Message current; bool current_pending = false; /// We've fetched "current" message and need to process it on the next iteration. Poco::Logger * log; size_t read_messages = 0; @@ -69,42 +67,36 @@ class ReadBufferFromKafkaConsumer : public ReadBuffer { if (current_pending) { - BufferBase::set(reinterpret_cast(current->payload), current->len, 0); + // XXX: very fishy place with const casting. + BufferBase::set(reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); current_pending = false; return true; } // Process next buffered message - rd_kafka_message_t * msg = rd_kafka_consumer_poll(consumer, READ_POLL_MS); // XXX: use RAII. - if (msg == nullptr) + auto message = consumer->poll(std::chrono::milliseconds(READ_POLL_MS)); + if (!message) return false; - if (msg->err) + if (message.is_eof()) { - if (msg->err != RD_KAFKA_RESP_ERR__PARTITION_EOF) - { - LOG_ERROR(log, "Consumer error: " << rd_kafka_err2str(msg->err) << " " << rd_kafka_message_errstr(msg)); - rd_kafka_message_destroy(msg); - return false; - } - - // Reach EOF while reading current batch, skip it - LOG_TRACE(log, "EOF reached for partition " << msg->partition << " offset " << msg->offset); - rd_kafka_message_destroy(msg); + // Reached EOF while reading current batch, skip it. + LOG_TRACE(log, "EOF reached for partition " << message.get_partition() << " offset " << message.get_offset()); return nextImpl(); } - - if (msg->len && !msg->payload) - throw Exception("Logical error: nullptr message returned with non-zero length", ErrorCodes::LOGICAL_ERROR); + else if (auto err = message.get_error()) + { + LOG_ERROR(log, "Consumer error: " << err); + return false; + } ++read_messages; // Now we've received a new message. Check if we need to produce a delimiter - if (row_delimiter != '\0' && current != nullptr) + if (row_delimiter != '\0' && current) { BufferBase::set(&row_delimiter, 1, 0); - reset(); - current = msg; + current = std::move(message); current_pending = true; return true; } @@ -112,31 +104,21 @@ class ReadBufferFromKafkaConsumer : public ReadBuffer // Consume message and mark the topic/partition offset // The offsets will be committed in the readSuffix() method after the block is completed // If an exception is thrown before that would occur, the client will rejoin without committing offsets - reset(); - current = msg; - BufferBase::set(reinterpret_cast(current->payload), current->len, 0); + current = std::move(message); + + // XXX: very fishy place with const casting. + BufferBase::set(reinterpret_cast(const_cast(current.get_payload().get_data())), current.get_payload().get_size(), 0); return true; } - void reset() - { - if (current != nullptr) - { - rd_kafka_message_destroy(current); - current = nullptr; - } - } - public: - ReadBufferFromKafkaConsumer(rd_kafka_t * consumer_, Poco::Logger * log_, char row_delimiter_) + ReadBufferFromKafkaConsumer(ConsumerPtr consumer_, Poco::Logger * log_, char row_delimiter_) : ReadBuffer(nullptr, 0), consumer(consumer_), log(log_), row_delimiter(row_delimiter_) { if (row_delimiter != '\0') LOG_TRACE(log, "Row delimiter is: " << row_delimiter); } - ~ReadBufferFromKafkaConsumer() override { reset(); } - /// Commit messages read with this consumer void commit() { @@ -144,10 +126,7 @@ public: if (read_messages == 0) return; - auto err = rd_kafka_commit(consumer, nullptr, 1 /* async */); - if (err) - throw Exception("Failed to commit offsets: " + String(rd_kafka_err2str(err)), ErrorCodes::UNKNOWN_EXCEPTION); - + consumer->async_commit(); read_messages = 0; } }; @@ -211,7 +190,7 @@ public: if (consumer == nullptr) throw Exception("Failed to claim consumer: ", ErrorCodes::TIMEOUT_EXCEEDED); - read_buf = std::make_unique(consumer->stream, storage.log, storage.row_delimiter); + read_buf = std::make_unique(consumer, storage.log, storage.row_delimiter); reader = FormatFactory::instance().getInput(storage.format_name, *read_buf, storage.getSampleBlock(), context, max_block_size); } @@ -235,7 +214,7 @@ public: private: StorageKafka & storage; - StorageKafka::ConsumerPtr consumer; + ConsumerPtr consumer; Context context; size_t max_block_size; Block sample_block; @@ -247,7 +226,7 @@ private: bool hasClaimed() { return consumer != nullptr; } }; -static void loadFromConfig(struct rd_kafka_conf_s * conf, const AbstractConfiguration & config, const std::string & path) +static void loadFromConfig(cppkafka::Configuration & conf, const AbstractConfiguration & config, const std::string & path) { AbstractConfiguration::Keys keys; std::vector errstr(512); @@ -258,8 +237,7 @@ static void loadFromConfig(struct rd_kafka_conf_s * conf, const AbstractConfigur { const String key_path = path + "." + key; const String key_name = boost::replace_all_copy(key, "_", "."); - if (rd_kafka_conf_set(conf, key_name.c_str(), config.getString(key_path).c_str(), errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception("Invalid Kafka setting " + key_path + " in config: " + String(errstr.data()), ErrorCodes::INVALID_CONFIG_PARAMETER); + conf.set(key_name, config.getString(key_path)); } } @@ -322,21 +300,8 @@ void StorageKafka::startup() { for (size_t i = 0; i < num_consumers; ++i) { - // Building configuration may throw, the consumer configuration must be destroyed in that case - auto consumer_conf = rd_kafka_conf_new(); - try - { - consumerConfiguration(consumer_conf); - } - catch (...) - { - rd_kafka_conf_destroy(consumer_conf); - throw; - } - // Create a consumer and subscribe to topics - // Note: consumer takes ownership of the configuration - auto consumer = std::make_shared(consumer_conf); + auto consumer = std::make_shared(createConsumerConfiguration()); consumer->subscribe(topics); // Make consumer available @@ -358,7 +323,7 @@ void StorageKafka::shutdown() for (size_t i = 0; i < num_created_consumers; ++i) { auto consumer = claimConsumer(); - consumer->close(); + // FIXME: not sure if really close consumers here, and if we really need to close them here. } LOG_TRACE(log, "Waiting for cleanup"); @@ -374,24 +339,20 @@ void StorageKafka::updateDependencies() } -void StorageKafka::consumerConfiguration(struct rd_kafka_conf_s * conf) +cppkafka::Configuration StorageKafka::createConsumerConfiguration() { - std::vector errstr(512); + cppkafka::Configuration conf; LOG_TRACE(log, "Setting brokers: " << brokers); - if (rd_kafka_conf_set(conf, "metadata.broker.list", brokers.c_str(), errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception(String(errstr.data()), ErrorCodes::INCORRECT_DATA); + conf.set("metadata.broker.list", brokers); LOG_TRACE(log, "Setting Group ID: " << group << " Client ID: clickhouse"); + conf.set("group.id", group); - if (rd_kafka_conf_set(conf, "group.id", group.c_str(), errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception(String(errstr.data()), ErrorCodes::INCORRECT_DATA); - - if (rd_kafka_conf_set(conf, "client.id", VERSION_FULL, errstr.data(), errstr.size()) != RD_KAFKA_CONF_OK) - throw Exception(String(errstr.data()), ErrorCodes::INCORRECT_DATA); + conf.set("client.id", VERSION_FULL); // We manually commit offsets after a stream successfully finished - rd_kafka_conf_set(conf, "enable.auto.commit", "false", nullptr, 0); + conf.set("enable.auto.commit", "false"); // Update consumer configuration from the configuration const auto & config = global_context.getConfigRef(); @@ -405,14 +366,16 @@ void StorageKafka::consumerConfiguration(struct rd_kafka_conf_s * conf) if (config.has(topic_config_key)) loadFromConfig(conf, config, topic_config_key); } + + return conf; } -StorageKafka::ConsumerPtr StorageKafka::claimConsumer() +ConsumerPtr StorageKafka::claimConsumer() { return tryClaimConsumer(-1L); } -StorageKafka::ConsumerPtr StorageKafka::tryClaimConsumer(long wait_ms) +ConsumerPtr StorageKafka::tryClaimConsumer(long wait_ms) { // Wait for the first free consumer if (wait_ms >= 0) @@ -430,7 +393,7 @@ StorageKafka::ConsumerPtr StorageKafka::tryClaimConsumer(long wait_ms) return consumer; } -void StorageKafka::pushConsumer(StorageKafka::ConsumerPtr consumer) +void StorageKafka::pushConsumer(ConsumerPtr consumer) { std::lock_guard lock(mutex); consumers.push_back(consumer); @@ -553,64 +516,6 @@ bool StorageKafka::streamToViews() } -StorageKafka::Consumer::Consumer(struct rd_kafka_conf_s * conf) -{ - std::vector errstr(512); - stream = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr.data(), errstr.size()); - if (stream == nullptr) - { - rd_kafka_conf_destroy(conf); - throw Exception("Failed to create consumer handle: " + String(errstr.data()), ErrorCodes::UNKNOWN_EXCEPTION); - } - - rd_kafka_poll_set_consumer(stream); -} - - -StorageKafka::Consumer::~Consumer() -{ - close(); -} - - -void StorageKafka::Consumer::subscribe(const Names & topics_to_subscribe) -{ - if (stream == nullptr) - throw Exception("Cannot subscribe to topics when consumer is closed", ErrorCodes::UNKNOWN_EXCEPTION); - - // Create a list of partitions - auto * topic_list = rd_kafka_topic_partition_list_new(topics_to_subscribe.size()); - for (const auto & topic : topics_to_subscribe) - rd_kafka_topic_partition_list_add(topic_list, topic.c_str(), RD_KAFKA_PARTITION_UA); - - // Subscribe to requested topics - auto err = rd_kafka_subscribe(stream, topic_list); - if (err) - { - rd_kafka_topic_partition_list_destroy(topic_list); - throw Exception("Failed to subscribe: " + String(rd_kafka_err2str(err)), ErrorCodes::UNKNOWN_EXCEPTION); - } - - rd_kafka_topic_partition_list_destroy(topic_list); -} - - -void StorageKafka::Consumer::unsubscribe() -{ - if (stream != nullptr) - rd_kafka_unsubscribe(stream); -} - -void StorageKafka::Consumer::close() -{ - if (stream != nullptr) - { - rd_kafka_consumer_close(stream); - rd_kafka_destroy(stream); - stream = nullptr; - } -} - void registerStorageKafka(StorageFactory & factory) { factory.registerStorage("Kafka", [](const StorageFactory::Arguments & args) diff --git a/dbms/src/Storages/Kafka/StorageKafka.h b/dbms/src/Storages/Kafka/StorageKafka.h index e7cce510166..d6b324e8a85 100644 --- a/dbms/src/Storages/Kafka/StorageKafka.h +++ b/dbms/src/Storages/Kafka/StorageKafka.h @@ -12,15 +12,13 @@ #include #include +#include #include -struct rd_kafka_s; -struct rd_kafka_conf_s; - namespace DB { -class StorageKafka; +using ConsumerPtr = std::shared_ptr; /** Implements a Kafka queue table engine that can be used as a persistent queue / buffer, * or as a basic building block for creating pipelines with a continuous insertion / ETL. @@ -55,22 +53,6 @@ public: void updateDependencies() override; private: - /// Each engine typically has one consumer (able to process 1..N partitions) - /// It is however possible to create multiple consumers per table, as long - /// as the total number of consumers is <= number of partitions. - struct Consumer - { - Consumer(struct rd_kafka_conf_s * conf); - ~Consumer(); - - void subscribe(const Names & topics); - void unsubscribe(); - void close(); - - struct rd_kafka_s * stream = nullptr; - }; - using ConsumerPtr = std::shared_ptr; - // Configuration and state String table_name; String database_name; @@ -102,7 +84,7 @@ private: BackgroundSchedulePool::TaskHolder task; std::atomic stream_cancelled{false}; - void consumerConfiguration(struct rd_kafka_conf_s * conf); + cppkafka::Configuration createConsumerConfiguration(); ConsumerPtr claimConsumer(); ConsumerPtr tryClaimConsumer(long wait_ms); void pushConsumer(ConsumerPtr c); From 77daa519ff3659021ad50559f29b54e2b5f27127 Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 28 Dec 2018 12:07:58 +0300 Subject: [PATCH 08/17] Update librdkafka to v1.0.0-RC5 --- .gitignore | 3 +++ contrib/cppkafka-cmake/CMakeLists.txt | 2 +- contrib/librdkafka | 2 +- contrib/librdkafka-cmake/CMakeLists.txt | 3 +++ contrib/librdkafka-cmake/config.h | 2 ++ 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 9816f1cbb6c..3fa4a095a1b 100644 --- a/.gitignore +++ b/.gitignore @@ -251,3 +251,6 @@ website/package-lock.json # cquery cache /.cquery-cache + +# ccls cache +/.ccls-cache diff --git a/contrib/cppkafka-cmake/CMakeLists.txt b/contrib/cppkafka-cmake/CMakeLists.txt index f62fa471172..9fb98e35b47 100644 --- a/contrib/cppkafka-cmake/CMakeLists.txt +++ b/contrib/cppkafka-cmake/CMakeLists.txt @@ -25,7 +25,7 @@ set(SRCS add_library(cppkafka ${LINK_MODE} ${SRCS}) -target_link_libraries(cppkafka ${RDKAFKA_LIBRARY}) +target_link_libraries(cppkafka PRIVATE ${RDKAFKA_LIBRARY}) target_include_directories(cppkafka PRIVATE ${CPPKAFKA_DIR}/include/cppkafka) target_include_directories(cppkafka PRIVATE ${Boost_INCLUDE_DIRS}) target_include_directories(cppkafka SYSTEM PUBLIC ${CPPKAFKA_DIR}/include) diff --git a/contrib/librdkafka b/contrib/librdkafka index 7478b5ef16a..363dcad5a23 160000 --- a/contrib/librdkafka +++ b/contrib/librdkafka @@ -1 +1 @@ -Subproject commit 7478b5ef16aadd6543fe38bc6a2deb895c70da98 +Subproject commit 363dcad5a23dc29381cc626620e68ae418b3af19 diff --git a/contrib/librdkafka-cmake/CMakeLists.txt b/contrib/librdkafka-cmake/CMakeLists.txt index 3b35634dabc..115c916e9f4 100644 --- a/contrib/librdkafka-cmake/CMakeLists.txt +++ b/contrib/librdkafka-cmake/CMakeLists.txt @@ -8,12 +8,14 @@ set(SRCS ${RDKAFKA_SOURCE_DIR}/rdcrc32.c ${RDKAFKA_SOURCE_DIR}/rdkafka.c ${RDKAFKA_SOURCE_DIR}/rdkafka_assignor.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_background.c ${RDKAFKA_SOURCE_DIR}/rdkafka_broker.c ${RDKAFKA_SOURCE_DIR}/rdkafka_buf.c ${RDKAFKA_SOURCE_DIR}/rdkafka_cgrp.c ${RDKAFKA_SOURCE_DIR}/rdkafka_conf.c ${RDKAFKA_SOURCE_DIR}/rdkafka_event.c ${RDKAFKA_SOURCE_DIR}/rdkafka_feature.c + ${RDKAFKA_SOURCE_DIR}/rdkafka_idempotence.c ${RDKAFKA_SOURCE_DIR}/rdkafka_lz4.c ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata.c ${RDKAFKA_SOURCE_DIR}/rdkafka_metadata_cache.c @@ -47,6 +49,7 @@ set(SRCS ${RDKAFKA_SOURCE_DIR}/rdvarint.c ${RDKAFKA_SOURCE_DIR}/snappy.c ${RDKAFKA_SOURCE_DIR}/tinycthread.c + ${RDKAFKA_SOURCE_DIR}/tinycthread_extra.c ${RDKAFKA_SOURCE_DIR}/xxhash.c ${RDKAFKA_SOURCE_DIR}/lz4.c ${RDKAFKA_SOURCE_DIR}/lz4frame.c diff --git a/contrib/librdkafka-cmake/config.h b/contrib/librdkafka-cmake/config.h index 68e93a10ff1..f94c6bfc630 100644 --- a/contrib/librdkafka-cmake/config.h +++ b/contrib/librdkafka-cmake/config.h @@ -71,4 +71,6 @@ #define HAVE_PTHREAD_SETNAME_GNU 1 // python //#define HAVE_PYTHON 1 +// C11 threads +#define WITH_C11THREADS 1 #endif /* _CONFIG_H_ */ From 07f8ef4f4c601cdd3dadf653cb29c137eb82f75a Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Fri, 11 Jan 2019 16:36:30 +0300 Subject: [PATCH 09/17] Use C11 threads only if available --- contrib/librdkafka-cmake/config.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/contrib/librdkafka-cmake/config.h b/contrib/librdkafka-cmake/config.h index f94c6bfc630..2ffc5a497ae 100644 --- a/contrib/librdkafka-cmake/config.h +++ b/contrib/librdkafka-cmake/config.h @@ -1,4 +1,4 @@ -// Automatically generated by ./configure +// Automatically generated by ./configure #ifndef _CONFIG_H_ #define _CONFIG_H_ #define ARCH "x86_64" @@ -72,5 +72,7 @@ // python //#define HAVE_PYTHON 1 // C11 threads -#define WITH_C11THREADS 1 +#if (__STDC_VERSION__ >= 201112L) && !defined(__STDC_NO_THREADS__) +# define WITH_C11THREADS 1 +#endif #endif /* _CONFIG_H_ */ From 5024d1b7f762b72e7d5ca3370c4526ec204dc414 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 14:52:12 +0300 Subject: [PATCH 10/17] Fix macos build --- dbms/src/DataTypes/DataTypeLowCardinality.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/DataTypes/DataTypeLowCardinality.cpp b/dbms/src/DataTypes/DataTypeLowCardinality.cpp index b823a9257ad..e73deaae2ca 100644 --- a/dbms/src/DataTypes/DataTypeLowCardinality.cpp +++ b/dbms/src/DataTypes/DataTypeLowCardinality.cpp @@ -713,7 +713,7 @@ void DataTypeLowCardinality::deserializeBinaryBulkWithMultipleStreams( readIntBinary(low_cardinality_state->num_pending_rows, *indexes_stream); } - size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows); + size_t num_rows_to_read = std::min(limit, low_cardinality_state->num_pending_rows); readIndexes(num_rows_to_read); limit -= num_rows_to_read; low_cardinality_state->num_pending_rows -= num_rows_to_read; From c0b72492c09164f23a68d9ad7c38dab28e6d3a63 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 16:33:14 +0300 Subject: [PATCH 11/17] Fix macos build --- .../Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp index 1c847eb0e11..9dce4edb239 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeBaseSelectBlockInputStream.cpp @@ -91,7 +91,7 @@ Block MergeTreeBaseSelectBlockInputStream::readFromPart() UInt64 rows_to_read = current_task.size_predictor->estimateNumRows(current_preferred_block_size_bytes); if (!rows_to_read) return rows_to_read; - rows_to_read = std::max(index_granularity, rows_to_read); + rows_to_read = std::max(index_granularity, rows_to_read); if (current_preferred_max_column_in_block_size_bytes) { From af0b875f57aa65d5095563a218da85aa457a37b0 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 17:15:39 +0300 Subject: [PATCH 12/17] Fix macos build --- .../src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index 2f1ee2a2943..d0b5636f3dc 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -199,7 +199,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldLogs() min_saved_log_pointer = std::min(min_saved_log_pointer, min_log_pointer_lost_candidate); /// We will not touch the last `min_replicated_logs_to_keep` records. - entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end()); + entries.erase(entries.end() - std::min(entries.size(), storage.data.settings.min_replicated_logs_to_keep.value), entries.end()); /// We will not touch records that are no less than `min_saved_log_pointer`. entries.erase(std::lower_bound(entries.begin(), entries.end(), "log-" + padIndex(min_saved_log_pointer)), entries.end()); From 72df7ceee6c126086b081a054b06b02569a154c2 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 18:18:56 +0300 Subject: [PATCH 13/17] Macos build fix --- dbms/src/Formats/PrettyBlockOutputStream.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dbms/src/Formats/PrettyBlockOutputStream.cpp b/dbms/src/Formats/PrettyBlockOutputStream.cpp index 16df780993e..fe102ea5739 100644 --- a/dbms/src/Formats/PrettyBlockOutputStream.cpp +++ b/dbms/src/Formats/PrettyBlockOutputStream.cpp @@ -61,7 +61,7 @@ void PrettyBlockOutputStream::calculateWidths( elem.type->serializeText(*elem.column, j, out, format_settings); } - widths[i][j] = std::min(format_settings.pretty.max_column_pad_width, + widths[i][j] = std::min(format_settings.pretty.max_column_pad_width, UTF8::computeWidth(reinterpret_cast(serialized_value.data()), serialized_value.size(), prefix)); max_widths[i] = std::max(max_widths[i], widths[i][j]); } @@ -69,7 +69,7 @@ void PrettyBlockOutputStream::calculateWidths( /// And also calculate widths for names of columns. { // name string doesn't contain Tab, no need to pass `prefix` - name_widths[i] = std::min(format_settings.pretty.max_column_pad_width, + name_widths[i] = std::min(format_settings.pretty.max_column_pad_width, UTF8::computeWidth(reinterpret_cast(elem.name.data()), elem.name.size())); max_widths[i] = std::max(max_widths[i], name_widths[i]); } From 4b6af5788a6efe703f3402615bc0e6b1b6250417 Mon Sep 17 00:00:00 2001 From: proller Date: Mon, 14 Jan 2019 19:30:36 +0300 Subject: [PATCH 14/17] Fix macos build --- .../src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp index d0b5636f3dc..d6ae21fc8be 100644 --- a/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp +++ b/dbms/src/Storages/MergeTree/ReplicatedMergeTreeCleanupThread.cpp @@ -295,7 +295,7 @@ void ReplicatedMergeTreeCleanupThread::clearOldBlocks() /// Virtual node, all nodes that are "greater" than this one will be deleted NodeWithStat block_threshold{{}, time_threshold}; - size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window.value); + size_t current_deduplication_window = std::min(timed_blocks.size(), storage.data.settings.replicated_deduplication_window.value); auto first_outdated_block_fixed_threshold = timed_blocks.begin() + current_deduplication_window; auto first_outdated_block_time_threshold = std::upper_bound(timed_blocks.begin(), timed_blocks.end(), block_threshold, NodeWithStat::greaterByTime); auto first_outdated_block = std::min(first_outdated_block_fixed_threshold, first_outdated_block_time_threshold); From 15eee83be7b05b2629c599322b0c0f2c24b615dc Mon Sep 17 00:00:00 2001 From: Ivan Lezhankin Date: Mon, 14 Jan 2019 16:06:14 +0300 Subject: [PATCH 15/17] Use internal cppkafka in unbundled configuration --- cmake/find_rdkafka.cmake | 1 + contrib/CMakeLists.txt | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/cmake/find_rdkafka.cmake b/cmake/find_rdkafka.cmake index b0a0a98b382..1c93b99a344 100644 --- a/cmake/find_rdkafka.cmake +++ b/cmake/find_rdkafka.cmake @@ -20,6 +20,7 @@ if (NOT USE_INTERNAL_RDKAFKA_LIBRARY) if (USE_STATIC_LIBRARIES AND NOT OS_FREEBSD) find_library (SASL2_LIBRARY sasl2) endif () + set (CPPKAFKA_LIBRARY cppkafka) # TODO: try to use unbundled version. endif () if (RDKAFKA_LIB AND RDKAFKA_INCLUDE_DIR) diff --git a/contrib/CMakeLists.txt b/contrib/CMakeLists.txt index 25ad30e02eb..8504e5facdb 100644 --- a/contrib/CMakeLists.txt +++ b/contrib/CMakeLists.txt @@ -121,11 +121,14 @@ endif () if (USE_INTERNAL_RDKAFKA_LIBRARY) add_subdirectory (librdkafka-cmake) - add_subdirectory (cppkafka-cmake) target_include_directories(rdkafka BEFORE PRIVATE ${ZLIB_INCLUDE_DIR}) target_include_directories(rdkafka BEFORE PRIVATE ${OPENSSL_INCLUDE_DIR}) endif () +if (USE_RDKAFKA) + add_subdirectory (cppkafka-cmake) +endif() + if (ENABLE_ODBC AND USE_INTERNAL_ODBC_LIBRARY) add_subdirectory (unixodbc-cmake) endif () From dbb88e8cb4075d9bed616d36c9bb3122e0068d8b Mon Sep 17 00:00:00 2001 From: chertus Date: Mon, 14 Jan 2019 21:15:04 +0300 Subject: [PATCH 16/17] helpers for ASTIdentifier (hide semantic and casts) --- .../DataTypes/DataTypeAggregateFunction.cpp | 4 +- dbms/src/Databases/DatabaseOrdinary.cpp | 1 + dbms/src/Databases/DatabasesCommon.cpp | 1 + dbms/src/Interpreters/AnalyzedJoin.cpp | 4 +- .../Interpreters/ArrayJoinedColumnsVisitor.h | 2 +- dbms/src/Interpreters/ColumnNamesContext.cpp | 2 +- dbms/src/Interpreters/ColumnNamesContext.h | 4 +- .../DatabaseAndTableWithAlias.cpp | 44 +++++-------- .../Interpreters/DatabaseAndTableWithAlias.h | 1 + dbms/src/Interpreters/ExpressionAnalyzer.cpp | 3 +- dbms/src/Interpreters/ExternalTablesVisitor.h | 6 +- .../InJoinSubqueriesPreprocessor.cpp | 8 +-- .../Interpreters/InterpreterDescribeQuery.cpp | 8 +-- .../JoinToSubqueryTransformVisitor.cpp | 4 +- dbms/src/Interpreters/QueryNormalizer.cpp | 22 +++---- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 15 +++-- .../TranslateQualifiedNamesVisitor.cpp | 2 +- dbms/src/Interpreters/executeQuery.cpp | 2 +- dbms/src/Interpreters/loadMetadata.cpp | 1 + dbms/src/Parsers/ASTIdentifier.cpp | 66 +++++++++++++++++++ dbms/src/Parsers/ASTIdentifier.h | 38 +++++++++-- dbms/src/Parsers/ExpressionElementParsers.cpp | 17 +++-- dbms/src/Parsers/ExpressionListParsers.cpp | 3 +- dbms/src/Parsers/ParserAlterQuery.cpp | 3 +- dbms/src/Parsers/ParserCheckQuery.cpp | 8 +-- dbms/src/Parsers/ParserCreateQuery.cpp | 29 ++++---- dbms/src/Parsers/ParserCreateQuery.h | 5 +- dbms/src/Parsers/ParserDropQuery.cpp | 10 ++- dbms/src/Parsers/ParserInsertQuery.cpp | 11 +--- dbms/src/Parsers/ParserOptimizeQuery.cpp | 8 +-- dbms/src/Parsers/ParserQueryWithOutput.cpp | 2 +- dbms/src/Parsers/ParserRenameQuery.cpp | 7 +- dbms/src/Parsers/ParserSetQuery.cpp | 2 +- dbms/src/Parsers/ParserShowTablesQuery.cpp | 3 +- .../Parsers/ParserTablePropertiesQuery.cpp | 6 +- dbms/src/Parsers/ParserUseQuery.cpp | 5 +- .../src/Parsers/parseDatabaseAndTableName.cpp | 7 +- .../parseIdentifierOrStringLiteral.cpp | 2 +- dbms/src/Storages/AlterCommands.cpp | 7 +- .../MergeTree/MergeTreeWhereOptimizer.cpp | 27 ++++---- .../MergeTree/MergeTreeWhereOptimizer.h | 2 +- .../MergeTree/registerStorageMergeTree.cpp | 24 ++----- dbms/src/Storages/PartitionCommands.cpp | 3 +- dbms/src/Storages/StorageFile.cpp | 10 +-- dbms/src/Storages/StorageHDFS.cpp | 2 - dbms/src/Storages/StorageJoin.cpp | 19 +++--- dbms/src/Storages/VirtualColumnUtils.cpp | 8 +-- .../transformQueryForExternalDatabase.cpp | 2 +- .../TableFunctions/TableFunctionRemote.cpp | 9 +-- 49 files changed, 253 insertions(+), 226 deletions(-) diff --git a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp index 04786922966..7a1b163f3b6 100644 --- a/dbms/src/DataTypes/DataTypeAggregateFunction.cpp +++ b/dbms/src/DataTypes/DataTypeAggregateFunction.cpp @@ -317,9 +317,9 @@ static DataTypePtr create(const ASTPtr & arguments) params_row[i] = lit->value; } } - else if (const ASTIdentifier * identifier = typeid_cast(arguments->children[0].get())) + else if (auto opt_name = getIdentifierName(arguments->children[0])) { - function_name = identifier->name; + function_name = *opt_name; } else if (typeid_cast(arguments->children[0].get())) { diff --git a/dbms/src/Databases/DatabaseOrdinary.cpp b/dbms/src/Databases/DatabaseOrdinary.cpp index cb1c7587080..958d65b7128 100644 --- a/dbms/src/Databases/DatabaseOrdinary.cpp +++ b/dbms/src/Databases/DatabaseOrdinary.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/dbms/src/Databases/DatabasesCommon.cpp b/dbms/src/Databases/DatabasesCommon.cpp index e64851bf470..3189701d13c 100644 --- a/dbms/src/Databases/DatabasesCommon.cpp +++ b/dbms/src/Databases/DatabasesCommon.cpp @@ -1,5 +1,6 @@ #include +#include #include #include #include diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index c39ea9c9495..c3ea45bf817 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include @@ -118,8 +117,7 @@ NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpressio } else if (table_expression.database_and_table_name) { - const auto & identifier = static_cast(*table_expression.database_and_table_name); - DatabaseAndTableWithAlias database_table(identifier); + DatabaseAndTableWithAlias database_table(table_expression.database_and_table_name); const auto & table = context.getTable(database_table.database, database_table.table); names_and_type_list = table->getSampleBlockNonMaterialized().getNamesAndTypesList(); } diff --git a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h index de75f4622ef..15985eb7ba2 100644 --- a/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h +++ b/dbms/src/Interpreters/ArrayJoinedColumnsVisitor.h @@ -58,7 +58,7 @@ private: NameToNameMap & array_join_alias_to_name = data.array_join_alias_to_name; NameToNameMap & array_join_result_to_source = data.array_join_result_to_source; - if (!node.general()) + if (!getColumnIdentifierName(node)) return; auto splitted = Nested::splitName(node.name); /// ParsedParams, Key1 diff --git a/dbms/src/Interpreters/ColumnNamesContext.cpp b/dbms/src/Interpreters/ColumnNamesContext.cpp index 1d17106d3e7..246b5f5306e 100644 --- a/dbms/src/Interpreters/ColumnNamesContext.cpp +++ b/dbms/src/Interpreters/ColumnNamesContext.cpp @@ -31,7 +31,7 @@ bool ColumnNamesContext::addColumnAliasIfAny(const IAST & ast, bool is_public) void ColumnNamesContext::addColumnIdentifier(const ASTIdentifier & node, bool is_public) { - if (!node.general()) + if (!getColumnIdentifierName(node)) return; required_names.insert(node.name); diff --git a/dbms/src/Interpreters/ColumnNamesContext.h b/dbms/src/Interpreters/ColumnNamesContext.h index a605903580f..0827463692c 100644 --- a/dbms/src/Interpreters/ColumnNamesContext.h +++ b/dbms/src/Interpreters/ColumnNamesContext.h @@ -39,9 +39,7 @@ struct ColumnNamesContext std::optional name() const { if (expr) - if (auto * node = expr->database_and_table_name.get()) - if (auto * identifier = typeid_cast(node)) - return identifier->name; + return getIdentifierName(expr->database_and_table_name); return {}; } diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index df430ee0fbd..154484ab5b6 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -54,8 +54,6 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie { size_t num_qualifiers_to_strip = 0; - auto get_identifier_name = [](const ASTPtr & ast) { return static_cast(*ast).name; }; - /// It is compound identifier if (!identifier.children.empty()) { @@ -64,16 +62,16 @@ size_t getNumComponentsToStripInOrderToTranslateQualifiedName(const ASTIdentifie /// database.table.column if (num_components >= 3 && !names.database.empty() - && get_identifier_name(identifier.children[0]) == names.database - && get_identifier_name(identifier.children[1]) == names.table) + && *getIdentifierName(identifier.children[0]) == names.database + && *getIdentifierName(identifier.children[1]) == names.table) { num_qualifiers_to_strip = 2; } /// table.column or alias.column. If num_components > 2, it is like table.nested.column. if (num_components >= 2 - && ((!names.table.empty() && get_identifier_name(identifier.children[0]) == names.table) - || (!names.alias.empty() && get_identifier_name(identifier.children[0]) == names.alias))) + && ((!names.table.empty() && *getIdentifierName(identifier.children[0]) == names.table) + || (!names.alias.empty() && *getIdentifierName(identifier.children[0]) == names.alias))) { num_qualifiers_to_strip = 1; } @@ -94,26 +92,24 @@ DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & ident if (identifier.children.size() != 2) throw Exception("Logical error: number of components in table expression not equal to two", ErrorCodes::LOGICAL_ERROR); - const ASTIdentifier * db_identifier = typeid_cast(identifier.children[0].get()); - const ASTIdentifier * table_identifier = typeid_cast(identifier.children[1].get()); - if (!db_identifier || !table_identifier) - throw Exception("Logical error: identifiers expected", ErrorCodes::LOGICAL_ERROR); - - database = db_identifier->name; - table = table_identifier->name; + getIdentifierName(identifier.children[0], database); + getIdentifierName(identifier.children[1], table); } } +DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTPtr & node, const String & current_database) +{ + const auto * identifier = typeid_cast(node.get()); + if (!identifier) + throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); + + *this = DatabaseAndTableWithAlias(*identifier, current_database); +} + DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database) { if (table_expression.database_and_table_name) - { - const auto * identifier = typeid_cast(table_expression.database_and_table_name.get()); - if (!identifier) - throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); - - *this = DatabaseAndTableWithAlias(*identifier, current_database); - } + *this = DatabaseAndTableWithAlias(table_expression.database_and_table_name, current_database); else if (table_expression.table_function) alias = table_expression.table_function->tryGetAlias(); else if (table_expression.subquery) @@ -207,14 +203,10 @@ std::optional getDatabaseAndTable(const ASTSelectQuer return {}; ASTPtr database_and_table_name = table_expression->database_and_table_name; - if (!database_and_table_name) + if (!database_and_table_name || !isIdentifier(database_and_table_name)) return {}; - const ASTIdentifier * identifier = typeid_cast(database_and_table_name.get()); - if (!identifier) - return {}; - - return *identifier; + return DatabaseAndTableWithAlias(database_and_table_name); } ASTPtr getTableFunctionOrSubquery(const ASTSelectQuery & select, size_t table_number) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 8076deb5ee9..601bde82e2f 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -24,6 +24,7 @@ struct DatabaseAndTableWithAlias String alias; DatabaseAndTableWithAlias() = default; + DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = ""); DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database); diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 52a6c8a5e17..78b2c2cfffb 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -548,8 +548,7 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty /// TODO This syntax does not support specifying a database name. if (table_to_join.database_and_table_name) { - const auto & identifier = static_cast(*table_to_join.database_and_table_name); - DatabaseAndTableWithAlias database_table(identifier); + DatabaseAndTableWithAlias database_table(table_to_join.database_and_table_name); StoragePtr table = context.tryGetTable(database_table.database, database_table.table); if (table) diff --git a/dbms/src/Interpreters/ExternalTablesVisitor.h b/dbms/src/Interpreters/ExternalTablesVisitor.h index ffc51bf7890..d8b177b1ed3 100644 --- a/dbms/src/Interpreters/ExternalTablesVisitor.h +++ b/dbms/src/Interpreters/ExternalTablesVisitor.h @@ -33,9 +33,9 @@ public: private: static std::vector visit(const ASTIdentifier & node, ASTPtr &, Data & data) { - if (node.special()) - if (StoragePtr external_storage = data.context.tryGetExternalTable(node.name)) - data.external_tables[node.name] = external_storage; + if (auto opt_name = getTableIdentifierName(node)) + if (StoragePtr external_storage = data.context.tryGetExternalTable(*opt_name)) + data.external_tables[*opt_name] = external_storage; return {}; } }; diff --git a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp index 58e28f9bfc6..0b118f98057 100644 --- a/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp +++ b/dbms/src/Interpreters/InJoinSubqueriesPreprocessor.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include @@ -82,12 +81,7 @@ void forEachTable(IAST * node, F && f) StoragePtr tryGetTable(const ASTPtr & database_and_table, const Context & context) { - const ASTIdentifier * id = typeid_cast(database_and_table.get()); - if (!id) - throw Exception("Logical error: identifier expected", ErrorCodes::LOGICAL_ERROR); - - DatabaseAndTableWithAlias db_and_table(*id); - + DatabaseAndTableWithAlias db_and_table(database_and_table); return context.tryGetTable(db_and_table.database, db_and_table.table); } diff --git a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp index 9aec49f27f1..9e7fbec6217 100644 --- a/dbms/src/Interpreters/InterpreterDescribeQuery.cpp +++ b/dbms/src/Interpreters/InterpreterDescribeQuery.cpp @@ -95,13 +95,11 @@ BlockInputStreamPtr InterpreterDescribeQuery::executeImpl() auto database_ptr = identifier->children[0]; auto table_ptr = identifier->children[1]; - if (database_ptr) - database_name = typeid_cast(*database_ptr).name; - if (table_ptr) - table_name = typeid_cast(*table_ptr).name; + getIdentifierName(database_ptr, database_name); + getIdentifierName(table_ptr, table_name); } else - table_name = typeid_cast(*identifier).name; + getIdentifierName(identifier, table_name); table = context.getTable(database_name, table_name); } diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 80c670c8794..a52df54e626 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -83,8 +83,8 @@ static void appendTableNameAndAlias(std::vector & hidden, const ASTPtr & if (!alias.empty()) hidden.push_back(alias); - if (auto * identifier = typeid_cast(table_expression->children[0].get())) - hidden.push_back(identifier->name); + if (auto opt_name = getIdentifierName(table_expression->children[0])) + hidden.push_back(*opt_name); else if (alias.empty()) throw Exception("Expected Identifier or subquery with alias", ErrorCodes::LOGICAL_ERROR); } diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 328b3f6bf6a..e422d247a9c 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -68,9 +68,12 @@ void QueryNormalizer::visit(ASTFunction & node, const ASTPtr &, Data & data) /// `IN t` can be specified, where t is a table, which is equivalent to `IN (SELECT * FROM t)`. if (functionIsInOrGlobalInOperator(func_name)) - if (ASTIdentifier * right = typeid_cast(func_arguments->children.at(1).get())) - if (!aliases.count(right->name)) - right->setSpecial(); + { + auto & ast = func_arguments->children.at(1); + if (auto opt_name = getIdentifierName(ast)) + if (!aliases.count(*opt_name)) + setIdentifierSpecial(ast); + } /// Special cases for count function. String func_name_lowercase = Poco::toLower(func_name); @@ -97,7 +100,7 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) auto & current_asts = data.current_asts; String & current_alias = data.current_alias; - if (!node.general()) + if (!getColumnIdentifierName(node)) return; /// If it is an alias, but not a parent alias (for constructs like "SELECT column + 1 AS column"). @@ -114,9 +117,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) if (!my_alias.empty() && my_alias != alias_node->getAliasOrColumnName()) { /// Avoid infinite recursion here - auto replace_to_identifier = typeid_cast(alias_node.get()); - bool is_cycle = replace_to_identifier && replace_to_identifier->general() - && replace_to_identifier->name == node.name; + auto opt_name = getColumnIdentifierName(alias_node); + bool is_cycle = opt_name && *opt_name == node.name; if (!is_cycle) { @@ -195,10 +197,8 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & { if (node.table_expression) { - auto & database_and_table_name = static_cast(*node.table_expression).database_and_table_name; - if (database_and_table_name) - if (ASTIdentifier * right = typeid_cast(database_and_table_name.get())) - right->setSpecial(); + auto & expr = static_cast(*node.table_expression); + setIdentifierSpecial(expr.database_and_table_name); } } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 5b40200c019..81bcca0bbc6 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -443,7 +443,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const const String nested_table_name = ast->getColumnName(); const String nested_table_alias = ast->getAliasOrColumnName(); - if (nested_table_alias == nested_table_name && !typeid_cast(ast.get())) + if (nested_table_alias == nested_table_name && !isIdentifier(ast)) throw Exception("No alias for non-trivial value in ARRAY JOIN: " + nested_table_name, ErrorCodes::ALIAS_REQUIRED); @@ -471,7 +471,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const String result_name = expr->getAliasOrColumnName(); /// This is an array. - if (!typeid_cast(expr.get()) || source_columns_set.count(source_name)) + if (!isIdentifier(expr) || source_columns_set.count(source_name)) { result.array_join_result_to_source[result_name] = source_name; } @@ -528,10 +528,10 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS std::function get_table_belonging; get_table_belonging = [&](const ASTPtr & ast) -> TableBelonging { - auto * identifier = typeid_cast(ast.get()); - if (identifier) + if (getColumnIdentifierName(ast)) { - if (identifier->general()) + auto * identifier = typeid_cast(ast.get()); + { auto left_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, left_source_names); auto right_num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, right_source_names); @@ -567,9 +567,10 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS std::function translate_qualified_names; translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table) { - if (auto * identifier = typeid_cast(ast.get())) + if (getColumnIdentifierName(ast)) { - if (identifier->general()) + auto * identifier = typeid_cast(ast.get()); + { auto num_components = getNumComponentsToStripInOrderToTranslateQualifiedName(*identifier, source_names); stripIdentifier(ast, num_components); diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 6ceb0cfe524..03726995f17 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -55,7 +55,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTIdentifier const NameSet & source_columns = data.source_columns; const std::vector & tables = data.tables; - if (identifier.general()) + if (getColumnIdentifierName(identifier)) { /// Select first table name with max number of qualifiers which can be stripped. size_t max_num_qualifiers_to_strip = 0; diff --git a/dbms/src/Interpreters/executeQuery.cpp b/dbms/src/Interpreters/executeQuery.cpp index 75fd09f5676..3911e437fa6 100644 --- a/dbms/src/Interpreters/executeQuery.cpp +++ b/dbms/src/Interpreters/executeQuery.cpp @@ -499,7 +499,7 @@ void executeQuery( } String format_name = ast_query_with_output && (ast_query_with_output->format != nullptr) - ? typeid_cast(*ast_query_with_output->format).name + ? *getIdentifierName(ast_query_with_output->format) : context.getDefaultFormat(); BlockOutputStreamPtr out = context.getOutputFormat(format_name, *out_buf, streams.in->getHeader()); diff --git a/dbms/src/Interpreters/loadMetadata.cpp b/dbms/src/Interpreters/loadMetadata.cpp index 38e8407082d..2eae6ba4c29 100644 --- a/dbms/src/Interpreters/loadMetadata.cpp +++ b/dbms/src/Interpreters/loadMetadata.cpp @@ -22,6 +22,7 @@ #include #include +#include namespace DB diff --git a/dbms/src/Parsers/ASTIdentifier.cpp b/dbms/src/Parsers/ASTIdentifier.cpp index efe796c3018..b463a65a84a 100644 --- a/dbms/src/Parsers/ASTIdentifier.cpp +++ b/dbms/src/Parsers/ASTIdentifier.cpp @@ -1,4 +1,5 @@ #include +#include #include #include @@ -38,4 +39,69 @@ void ASTIdentifier::appendColumnNameImpl(WriteBuffer & ostr) const writeString(name, ostr); } +bool isIdentifier(const IAST * const ast) +{ + if (ast) + return typeid_cast(ast); + return false; +} + +std::optional getIdentifierName(const IAST * const ast) +{ + if (ast) + if (auto node = typeid_cast(ast)) + return node->name; + return {}; +} + +bool getIdentifierName(const ASTPtr & ast, String & name) +{ + if (ast) + if (auto node = typeid_cast(ast.get())) + { + name = node->name; + return true; + } + return false; +} + +std::optional getColumnIdentifierName(const ASTIdentifier & node) +{ + if (!node.special()) + return node.name; + return {}; +} + +std::optional getColumnIdentifierName(const ASTPtr & ast) +{ + if (ast) + if (auto id = typeid_cast(ast.get())) + if (!id->special()) + return id->name; + return {}; +} + +std::optional getTableIdentifierName(const ASTIdentifier & node) +{ + if (node.special()) + return node.name; + return {}; +} + +std::optional getTableIdentifierName(const ASTPtr & ast) +{ + if (ast) + if (auto id = typeid_cast(ast.get())) + if (id->special()) + return id->name; + return {}; +} + +void setIdentifierSpecial(ASTPtr & ast) +{ + if (ast) + if (ASTIdentifier * id = typeid_cast(ast.get())) + id->setSpecial(); +} + } diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index b8c56727e17..8ae65065e1d 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -1,13 +1,14 @@ #pragma once +#include + #include namespace DB { -/** Identifier (column or alias) - */ +/// Identifier (column, table or alias) class ASTIdentifier : public ASTWithAlias { enum Kind /// TODO This is semantic, not syntax. Remove it. @@ -33,10 +34,6 @@ public: set.insert(name); } - void setSpecial() { kind = Special; } - bool general() const { return kind == General; } - bool special() const { return kind == Special; } - static std::shared_ptr createSpecial(const String & name_) { return std::make_shared(name_, ASTIdentifier::Special); @@ -48,6 +45,35 @@ protected: private: Kind kind; + + void setSpecial() { kind = Special; } + bool special() const { return kind == Special; } + + friend void setIdentifierSpecial(ASTPtr &); + friend std::optional getColumnIdentifierName(const ASTIdentifier & node); + friend std::optional getColumnIdentifierName(const ASTPtr & ast); + friend std::optional getTableIdentifierName(const ASTIdentifier & node); + friend std::optional getTableIdentifierName(const ASTPtr & ast); }; + +/// ASTIdentifier Helpers: hide casts and semantic. + +bool isIdentifier(const IAST * const ast); +inline bool isIdentifier(const ASTPtr & ast) { return isIdentifier(ast.get()); } + +std::optional getIdentifierName(const IAST * const ast); +inline std::optional getIdentifierName(const ASTPtr & ast) { return getIdentifierName(ast.get()); } +bool getIdentifierName(const ASTPtr & ast, String & name); + +/// @returns name for column identifiers +std::optional getColumnIdentifierName(const ASTIdentifier & node); +std::optional getColumnIdentifierName(const ASTPtr & ast); + +/// @returns name for 'not a column' identifiers +std::optional getTableIdentifierName(const ASTIdentifier & node); +std::optional getTableIdentifierName(const ASTPtr & ast); + +void setIdentifierSpecial(ASTPtr & ast); + } diff --git a/dbms/src/Parsers/ExpressionElementParsers.cpp b/dbms/src/Parsers/ExpressionElementParsers.cpp index 89081da9211..9ba848dbe1a 100644 --- a/dbms/src/Parsers/ExpressionElementParsers.cpp +++ b/dbms/src/Parsers/ExpressionElementParsers.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include @@ -173,7 +174,7 @@ bool ParserCompoundIdentifier::parseImpl(Pos & pos, ASTPtr & node, Expected & ex { if (!name.empty()) name += '.'; - name += static_cast(*child.get()).name; + name += *getIdentifierName(child); } node = std::make_shared(name); @@ -222,7 +223,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) * If you do not report that the first option is an error, then the argument will be interpreted as 2014 - 01 - 01 - some number, * and the query silently returns an unexpected result. */ - if (typeid_cast(*identifier).name == "toDate" + if (*getIdentifierName(identifier) == "toDate" && contents_end - contents_begin == strlen("2014-01-01") && contents_begin[0] >= '2' && contents_begin[0] <= '3' && contents_begin[1] >= '0' && contents_begin[1] <= '9' @@ -264,7 +265,7 @@ bool ParserFunction::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } auto function_node = std::make_shared(); - function_node->name = typeid_cast(*identifier).name; + getIdentifierName(identifier, function_node->name); /// func(DISTINCT ...) is equivalent to funcDistinct(...) if (has_distinct_modifier) @@ -1157,7 +1158,7 @@ bool ParserAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) * and in the query "SELECT x FRO FROM t", the word FRO was considered an alias. */ - const String & name = static_cast(*node.get()).name; + const String name = *getIdentifierName(node); for (const char ** keyword = restricted_keywords; *keyword != nullptr; ++keyword) if (0 == strcasecmp(name.data(), *keyword)) @@ -1249,18 +1250,16 @@ bool ParserWithOptionalAlias::parseImpl(Pos & pos, ASTPtr & node, Expected & exp */ bool allow_alias_without_as_keyword_now = allow_alias_without_as_keyword; if (allow_alias_without_as_keyword) - if (const ASTIdentifier * id = typeid_cast(node.get())) - if (0 == strcasecmp(id->name.data(), "FROM")) + if (auto opt_id = getIdentifierName(node)) + if (0 == strcasecmp(opt_id->data(), "FROM")) allow_alias_without_as_keyword_now = false; ASTPtr alias_node; if (ParserAlias(allow_alias_without_as_keyword_now).parse(pos, alias_node, expected)) { - String alias_name = typeid_cast(*alias_node).name; - if (ASTWithAlias * ast_with_alias = dynamic_cast(node.get())) { - ast_with_alias->alias = alias_name; + getIdentifierName(alias_node, ast_with_alias->alias); ast_with_alias->prefer_alias_to_column_name = prefer_alias_to_column_name; } else diff --git a/dbms/src/Parsers/ExpressionListParsers.cpp b/dbms/src/Parsers/ExpressionListParsers.cpp index de6fc2dc129..9f17b3935f5 100644 --- a/dbms/src/Parsers/ExpressionListParsers.cpp +++ b/dbms/src/Parsers/ExpressionListParsers.cpp @@ -1,10 +1,9 @@ +#include #include #include #include - #include #include - #include #include diff --git a/dbms/src/Parsers/ParserAlterQuery.cpp b/dbms/src/Parsers/ParserAlterQuery.cpp index b17467ed365..91aa211065e 100644 --- a/dbms/src/Parsers/ParserAlterQuery.cpp +++ b/dbms/src/Parsers/ParserAlterQuery.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -312,7 +313,7 @@ bool ParserAssignment::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) if (!p_expression.parse(pos, assignment->expression, expected)) return false; - assignment->column_name = typeid_cast(*column).name; + getIdentifierName(column, assignment->column_name); if (assignment->expression) assignment->children.push_back(assignment->expression); diff --git a/dbms/src/Parsers/ParserCheckQuery.cpp b/dbms/src/Parsers/ParserCheckQuery.cpp index d9fd46694d6..cd25e60b887 100644 --- a/dbms/src/Parsers/ParserCheckQuery.cpp +++ b/dbms/src/Parsers/ParserCheckQuery.cpp @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { @@ -31,15 +29,15 @@ bool ParserCheckQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto query = std::make_shared(); - query->database = typeid_cast(*database).name; - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); node = query; } else { table = database; auto query = std::make_shared(); - query->table = typeid_cast(*table).name; + getIdentifierName(table, query->table); node = query; } diff --git a/dbms/src/Parsers/ParserCreateQuery.cpp b/dbms/src/Parsers/ParserCreateQuery.cpp index f44b7d35eb6..79767218d22 100644 --- a/dbms/src/Parsers/ParserCreateQuery.cpp +++ b/dbms/src/Parsers/ParserCreateQuery.cpp @@ -35,7 +35,7 @@ bool ParserNestedTable::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) return false; auto func = std::make_shared(); - func->name = typeid_cast(*name).name; + getIdentifierName(name, func->name); func->arguments = columns; func->children.push_back(columns); node = func; @@ -70,7 +70,7 @@ bool ParserIdentifierWithOptionalParameters::parseImpl(Pos & pos, ASTPtr & node, if (non_parametric.parse(pos, ident, expected)) { auto func = std::make_shared(); - func->name = typeid_cast(*ident).name; + getIdentifierName(ident, func->name); node = func; return true; } @@ -257,10 +257,8 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->if_not_exists = if_not_exists; query->cluster = cluster_str; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); return true; } @@ -405,23 +403,18 @@ bool ParserCreateQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) query->is_populate = is_populate; query->temporary = is_temporary; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); query->cluster = cluster_str; - if (to_database) - query->to_database = typeid_cast(*to_database).name; - if (to_table) - query->to_table = typeid_cast(*to_table).name; + getIdentifierName(to_database, query->to_database); + getIdentifierName(to_table, query->to_table); query->set(query->columns, columns); query->set(query->storage, storage); - if (as_database) - query->as_database = typeid_cast(*as_database).name; - if (as_table) - query->as_table = typeid_cast(*as_table).name; + + getIdentifierName(as_database, query->as_database); + getIdentifierName(as_table, query->as_table); query->set(query->select, select); return true; diff --git a/dbms/src/Parsers/ParserCreateQuery.h b/dbms/src/Parsers/ParserCreateQuery.h index 27b4cd21fbd..1efe4e41bde 100644 --- a/dbms/src/Parsers/ParserCreateQuery.h +++ b/dbms/src/Parsers/ParserCreateQuery.h @@ -8,7 +8,6 @@ #include #include #include -#include #include @@ -74,7 +73,7 @@ bool IParserNameTypePair::parseImpl(Pos & pos, ASTPtr & node, Expect && type_parser.parse(pos, type, expected)) { auto name_type_pair = std::make_shared(); - name_type_pair->name = typeid_cast(*name).name; + getIdentifierName(name, name_type_pair->name); name_type_pair->type = type; name_type_pair->children.push_back(type); node = name_type_pair; @@ -181,7 +180,7 @@ bool IParserColumnDeclaration::parseImpl(Pos & pos, ASTPtr & node, E const auto column_declaration = std::make_shared(); node = column_declaration; - column_declaration->name = typeid_cast(*name).name; + getIdentifierName(name, column_declaration->name); if (type) { diff --git a/dbms/src/Parsers/ParserDropQuery.cpp b/dbms/src/Parsers/ParserDropQuery.cpp index 5ff69291e2f..c3a97a222d2 100644 --- a/dbms/src/Parsers/ParserDropQuery.cpp +++ b/dbms/src/Parsers/ParserDropQuery.cpp @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { @@ -117,10 +115,10 @@ bool ParserDropQuery::parseDropQuery(Pos & pos, ASTPtr & node, Expected & expect query->kind = ASTDropQuery::Kind::Drop; query->if_exists = if_exists; query->temporary = temporary; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); + query->cluster = cluster_str; return true; diff --git a/dbms/src/Parsers/ParserInsertQuery.cpp b/dbms/src/Parsers/ParserInsertQuery.cpp index 73aca09c210..017c4ad67ab 100644 --- a/dbms/src/Parsers/ParserInsertQuery.cpp +++ b/dbms/src/Parsers/ParserInsertQuery.cpp @@ -9,8 +9,6 @@ #include #include -#include - namespace DB { @@ -136,14 +134,11 @@ bool ParserInsertQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) } else { - if (database) - query->database = typeid_cast(*database).name; - - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); } - if (format) - query->format = typeid_cast(*format).name; + getIdentifierName(format, query->format); query->columns = columns; query->select = select; diff --git a/dbms/src/Parsers/ParserOptimizeQuery.cpp b/dbms/src/Parsers/ParserOptimizeQuery.cpp index 835db12cbb3..f749b316794 100644 --- a/dbms/src/Parsers/ParserOptimizeQuery.cpp +++ b/dbms/src/Parsers/ParserOptimizeQuery.cpp @@ -5,8 +5,6 @@ #include #include -#include - namespace DB { @@ -60,10 +58,8 @@ bool ParserOptimizeQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expecte auto query = std::make_shared(); node = query; - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); query->cluster = cluster_str; query->partition = partition; diff --git a/dbms/src/Parsers/ParserQueryWithOutput.cpp b/dbms/src/Parsers/ParserQueryWithOutput.cpp index 48f0fd9c33b..2e1a5ff529a 100644 --- a/dbms/src/Parsers/ParserQueryWithOutput.cpp +++ b/dbms/src/Parsers/ParserQueryWithOutput.cpp @@ -76,7 +76,7 @@ bool ParserQueryWithOutput::parseImpl(Pos & pos, ASTPtr & node, Expected & expec if (!format_p.parse(pos, query_with_output.format, expected)) return false; - typeid_cast(*(query_with_output.format)).setSpecial(); + setIdentifierSpecial(query_with_output.format); query_with_output.children.push_back(query_with_output.format); } diff --git a/dbms/src/Parsers/ParserRenameQuery.cpp b/dbms/src/Parsers/ParserRenameQuery.cpp index 6eb8d768df9..aa5fb43742b 100644 --- a/dbms/src/Parsers/ParserRenameQuery.cpp +++ b/dbms/src/Parsers/ParserRenameQuery.cpp @@ -4,8 +4,6 @@ #include #include -#include - namespace DB { @@ -31,8 +29,9 @@ static bool parseDatabaseAndTable( return false; } - db_and_table.database = database ? typeid_cast(*database).name : ""; - db_and_table.table = typeid_cast(*table).name; + db_and_table.database.clear(); + getIdentifierName(database, db_and_table.database); + getIdentifierName(table, db_and_table.table); return true; } diff --git a/dbms/src/Parsers/ParserSetQuery.cpp b/dbms/src/Parsers/ParserSetQuery.cpp index 11f125bb955..14b5b4bec5e 100644 --- a/dbms/src/Parsers/ParserSetQuery.cpp +++ b/dbms/src/Parsers/ParserSetQuery.cpp @@ -31,7 +31,7 @@ static bool parseNameValuePair(ASTSetQuery::Change & change, IParser::Pos & pos, if (!value_p.parse(pos, value, expected)) return false; - change.name = typeid_cast(*name).name; + getIdentifierName(name, change.name); change.value = typeid_cast(*value).value; return true; diff --git a/dbms/src/Parsers/ParserShowTablesQuery.cpp b/dbms/src/Parsers/ParserShowTablesQuery.cpp index e4d6b5288d2..dc854883cfe 100644 --- a/dbms/src/Parsers/ParserShowTablesQuery.cpp +++ b/dbms/src/Parsers/ParserShowTablesQuery.cpp @@ -65,8 +65,7 @@ bool ParserShowTablesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expec return false; } - if (database) - query->from = typeid_cast(*database).name; + getIdentifierName(database, query->from); if (like) query->like = safeGet(typeid_cast(*like).value); diff --git a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp index 45e92f9e181..f736023e0d5 100644 --- a/dbms/src/Parsers/ParserTablePropertiesQuery.cpp +++ b/dbms/src/Parsers/ParserTablePropertiesQuery.cpp @@ -75,10 +75,8 @@ bool ParserTablePropertiesQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & } } - if (database) - query->database = typeid_cast(*database).name; - if (table) - query->table = typeid_cast(*table).name; + getIdentifierName(database, query->database); + getIdentifierName(table, query->table); node = query; diff --git a/dbms/src/Parsers/ParserUseQuery.cpp b/dbms/src/Parsers/ParserUseQuery.cpp index 9e521a0d746..a7c66c570b8 100644 --- a/dbms/src/Parsers/ParserUseQuery.cpp +++ b/dbms/src/Parsers/ParserUseQuery.cpp @@ -15,16 +15,15 @@ bool ParserUseQuery::parseImpl(Pos & pos, ASTPtr & node, Expected & expected) ParserKeyword s_use("USE"); ParserIdentifier name_p; - ASTPtr database; - if (!s_use.ignore(pos, expected)) return false; + ASTPtr database; if (!name_p.parse(pos, database, expected)) return false; auto query = std::make_shared(); - query->database = typeid_cast(*database).name; + getIdentifierName(database, query->database); node = query; return true; diff --git a/dbms/src/Parsers/parseDatabaseAndTableName.cpp b/dbms/src/Parsers/parseDatabaseAndTableName.cpp index b7885eb293b..d7a199a3486 100644 --- a/dbms/src/Parsers/parseDatabaseAndTableName.cpp +++ b/dbms/src/Parsers/parseDatabaseAndTableName.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB @@ -30,13 +29,13 @@ bool parseDatabaseAndTableName(IParser::Pos & pos, Expected & expected, String & return false; } - database_str = typeid_cast(*database).name; - table_str = typeid_cast(*table).name; + getIdentifierName(database, database_str); + getIdentifierName(table, table_str); } else { database_str = ""; - table_str = typeid_cast(*database).name; + getIdentifierName(database, table_str); } return true; diff --git a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp index eaff2e85a9a..2fa71415efb 100644 --- a/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp +++ b/dbms/src/Parsers/parseIdentifierOrStringLiteral.cpp @@ -20,7 +20,7 @@ bool parseIdentifierOrStringLiteral(IParser::Pos & pos, Expected & expected, Str result = typeid_cast(*res).value.safeGet(); } else - result = typeid_cast(*res).name; + result = *getIdentifierName(res); return true; } diff --git a/dbms/src/Storages/AlterCommands.cpp b/dbms/src/Storages/AlterCommands.cpp index b5fbe0f3314..164ede64fab 100644 --- a/dbms/src/Storages/AlterCommands.cpp +++ b/dbms/src/Storages/AlterCommands.cpp @@ -50,7 +50,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ } if (command_ast->column) - command.after_column = typeid_cast(*command_ast->column).name; + command.after_column = *getIdentifierName(command_ast->column); command.if_not_exists = command_ast->if_not_exists; @@ -63,7 +63,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ AlterCommand command; command.type = AlterCommand::DROP_COLUMN; - command.column_name = typeid_cast(*(command_ast->column)).name; + command.column_name = *getIdentifierName(command_ast->column); command.if_exists = command_ast->if_exists; return command; } @@ -99,8 +99,7 @@ std::optional AlterCommand::parse(const ASTAlterCommand * command_ { AlterCommand command; command.type = COMMENT_COLUMN; - const auto & ast_identifier = typeid_cast(*command_ast->column); - command.column_name = ast_identifier.name; + command.column_name = *getIdentifierName(command_ast->column); const auto & ast_comment = typeid_cast(*command_ast->comment); command.comment = ast_comment.value.get(); command.if_exists = command_ast->if_exists; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp index 5e8dbee81d4..850c696b266 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp @@ -121,7 +121,7 @@ void MergeTreeWhereOptimizer::optimizeConjunction(ASTSelectQuery & select, ASTFu SCOPE_EXIT(++idx); - if (cannotBeMoved(condition)) + if (cannotBeMoved(conditions[idx])) continue; IdentifierNameSet identifiers{}; @@ -193,7 +193,7 @@ void MergeTreeWhereOptimizer::optimizeArbitrary(ASTSelectQuery & select) const auto & condition = select.where_expression; /// do not optimize restricted expressions - if (cannotBeMoved(select.where_expression.get())) + if (cannotBeMoved(select.where_expression)) return; IdentifierNameSet identifiers{}; @@ -250,10 +250,10 @@ bool MergeTreeWhereOptimizer::isConditionGood(const IAST * condition) const auto right_arg = function->arguments->children.back().get(); /// try to ensure left_arg points to ASTIdentifier - if (!typeid_cast(left_arg) && typeid_cast(right_arg)) + if (!isIdentifier(left_arg) && isIdentifier(right_arg)) std::swap(left_arg, right_arg); - if (typeid_cast(left_arg)) + if (isIdentifier(left_arg)) { /// condition may be "good" if only right_arg is a constant and its value is outside the threshold if (const auto literal = typeid_cast(right_arg)) @@ -286,8 +286,8 @@ bool MergeTreeWhereOptimizer::isConditionGood(const IAST * condition) const void MergeTreeWhereOptimizer::collectIdentifiersNoSubqueries(const IAST * const ast, IdentifierNameSet & set) { - if (const auto identifier = typeid_cast(ast)) - return (void) set.insert(identifier->name); + if (auto opt_name = getIdentifierName(ast)) + return (void) set.insert(*opt_name); if (typeid_cast(ast)) return; @@ -364,9 +364,9 @@ bool MergeTreeWhereOptimizer::isSubsetOfTableColumns(const IdentifierNameSet & i } -bool MergeTreeWhereOptimizer::cannotBeMoved(const IAST * ptr) const +bool MergeTreeWhereOptimizer::cannotBeMoved(const ASTPtr & ptr) const { - if (const auto function_ptr = typeid_cast(ptr)) + if (const auto function_ptr = typeid_cast(ptr.get())) { /// disallow arrayJoin expressions to be moved to PREWHERE for now if (array_join_function_name == function_ptr->name) @@ -381,17 +381,16 @@ bool MergeTreeWhereOptimizer::cannotBeMoved(const IAST * ptr) const if ("indexHint" == function_ptr->name) return true; } - else if (const auto identifier_ptr = typeid_cast(ptr)) + else if (auto opt_name = getColumnIdentifierName(ptr)) { /// disallow moving result of ARRAY JOIN to PREWHERE - if (identifier_ptr->general()) - if (array_joined_names.count(identifier_ptr->name) || - array_joined_names.count(Nested::extractTableName(identifier_ptr->name))) - return true; + if (array_joined_names.count(*opt_name) || + array_joined_names.count(Nested::extractTableName(*opt_name))) + return true; } for (const auto & child : ptr->children) - if (cannotBeMoved(child.get())) + if (cannotBeMoved(child)) return true; return false; diff --git a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h index 4d0ccbcff7e..ccf4970d300 100644 --- a/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h +++ b/dbms/src/Storages/MergeTree/MergeTreeWhereOptimizer.h @@ -68,7 +68,7 @@ private: * * Also, disallow moving expressions with GLOBAL [NOT] IN. */ - bool cannotBeMoved(const IAST * ptr) const; + bool cannotBeMoved(const ASTPtr & ptr) const; void determineArrayJoinedNames(ASTSelectQuery & select); diff --git a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp index 54b092fdb62..e0903138220 100644 --- a/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp +++ b/dbms/src/Storages/MergeTree/registerStorageMergeTree.cpp @@ -42,13 +42,13 @@ static Names extractColumnNames(const ASTPtr & node) Names res; res.reserve(elements.size()); for (const auto & elem : elements) - res.push_back(typeid_cast(*elem).name); + res.push_back(*getIdentifierName(elem)); return res; } else { - return { typeid_cast(*node).name }; + return { *getIdentifierName(node) }; } } @@ -481,9 +481,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) if (merging_params.mode == MergeTreeData::MergingParams::Collapsing) { - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.sign_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.sign_column)) throw Exception( "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -495,9 +493,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// If the last element is not index_granularity or replica_name (a literal), then this is the name of the version column. if (!engine_args.empty() && !typeid_cast(engine_args.back().get())) { - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.version_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.version_column)) throw Exception( "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -535,18 +531,14 @@ static StoragePtr create(const StorageFactory::Arguments & args) } else if (merging_params.mode == MergeTreeData::MergingParams::VersionedCollapsing) { - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.version_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.version_column)) throw Exception( "Version column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); engine_args.pop_back(); - if (auto ast = typeid_cast(engine_args.back().get())) - merging_params.sign_column = ast->name; - else + if (!getIdentifierName(engine_args.back(), merging_params.sign_column)) throw Exception( "Sign column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); @@ -592,9 +584,7 @@ static StoragePtr create(const StorageFactory::Arguments & args) /// Now only three parameters remain - date (or partitioning expression), primary_key, index_granularity. - if (auto ast = typeid_cast(engine_args[0].get())) - date_column_name = ast->name; - else + if (!getIdentifierName(engine_args[0], date_column_name)) throw Exception( "Date column name must be an unquoted string" + getMergeTreeVerboseHelp(is_extended_storage_def), ErrorCodes::BAD_ARGUMENTS); diff --git a/dbms/src/Storages/PartitionCommands.cpp b/dbms/src/Storages/PartitionCommands.cpp index b6ea7794cbe..c7a228c3e78 100644 --- a/dbms/src/Storages/PartitionCommands.cpp +++ b/dbms/src/Storages/PartitionCommands.cpp @@ -2,7 +2,6 @@ #include #include #include -#include namespace DB @@ -66,7 +65,7 @@ std::optional PartitionCommand::parse(const ASTAlterCommand * PartitionCommand res; res.type = CLEAR_COLUMN; res.partition = command_ast->partition; - const Field & column_name = typeid_cast(*(command_ast->column)).name; + const Field & column_name = *getIdentifierName(command_ast->column); res.column_name = column_name; return res; } diff --git a/dbms/src/Storages/StorageFile.cpp b/dbms/src/Storages/StorageFile.cpp index de017c8612e..f24badfa82f 100644 --- a/dbms/src/Storages/StorageFile.cpp +++ b/dbms/src/Storages/StorageFile.cpp @@ -304,16 +304,16 @@ void registerStorageFile(StorageFactory & factory) { /// Will use FD if engine_args[1] is int literal or identifier with std* name - if (const ASTIdentifier * identifier = typeid_cast(engine_args[1].get())) + if (auto opt_name = getIdentifierName(engine_args[1])) { - if (identifier->name == "stdin") + if (*opt_name == "stdin") source_fd = STDIN_FILENO; - else if (identifier->name == "stdout") + else if (*opt_name == "stdout") source_fd = STDOUT_FILENO; - else if (identifier->name == "stderr") + else if (*opt_name == "stderr") source_fd = STDERR_FILENO; else - throw Exception("Unknown identifier '" + identifier->name + "' in second arg of File storage constructor", + throw Exception("Unknown identifier '" + *opt_name + "' in second arg of File storage constructor", ErrorCodes::UNKNOWN_IDENTIFIER); } else if (const ASTLiteral * literal = typeid_cast(engine_args[1].get())) diff --git a/dbms/src/Storages/StorageHDFS.cpp b/dbms/src/Storages/StorageHDFS.cpp index 97b0af65f87..6dd5cf4c92e 100644 --- a/dbms/src/Storages/StorageHDFS.cpp +++ b/dbms/src/Storages/StorageHDFS.cpp @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -16,7 +15,6 @@ #include #include #include -#include namespace DB diff --git a/dbms/src/Storages/StorageJoin.cpp b/dbms/src/Storages/StorageJoin.cpp index 8d1d6d52fbf..e188afc1dab 100644 --- a/dbms/src/Storages/StorageJoin.cpp +++ b/dbms/src/Storages/StorageJoin.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include #include #include @@ -88,11 +87,11 @@ void registerStorageJoin(StorageFactory & factory) "Storage Join requires at least 3 parameters: Join(ANY|ALL, LEFT|INNER, keys...).", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); - const ASTIdentifier * strictness_id = typeid_cast(engine_args[0].get()); - if (!strictness_id) + auto opt_strictness_id = getIdentifierName(engine_args[0]); + if (!opt_strictness_id) throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const String strictness_str = Poco::toLower(strictness_id->name); + const String strictness_str = Poco::toLower(*opt_strictness_id); ASTTableJoin::Strictness strictness; if (strictness_str == "any") strictness = ASTTableJoin::Strictness::Any; @@ -101,11 +100,11 @@ void registerStorageJoin(StorageFactory & factory) else throw Exception("First parameter of storage Join must be ANY or ALL (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const ASTIdentifier * kind_id = typeid_cast(engine_args[1].get()); - if (!kind_id) + auto opt_kind_id = getIdentifierName(engine_args[1]); + if (!opt_kind_id) throw Exception("Second parameter of storage Join must be LEFT or INNER (without quotes).", ErrorCodes::BAD_ARGUMENTS); - const String kind_str = Poco::toLower(kind_id->name); + const String kind_str = Poco::toLower(*opt_kind_id); ASTTableJoin::Kind kind; if (kind_str == "left") kind = ASTTableJoin::Kind::Left; @@ -122,11 +121,11 @@ void registerStorageJoin(StorageFactory & factory) key_names.reserve(engine_args.size() - 2); for (size_t i = 2, size = engine_args.size(); i < size; ++i) { - const ASTIdentifier * key = typeid_cast(engine_args[i].get()); - if (!key) + auto opt_key = getIdentifierName(engine_args[i]); + if (!opt_key) throw Exception("Parameter №" + toString(i + 1) + " of storage Join don't look like column name.", ErrorCodes::BAD_ARGUMENTS); - key_names.push_back(key->name); + key_names.push_back(*opt_key); } auto & settings = args.context.getSettingsRef(); diff --git a/dbms/src/Storages/VirtualColumnUtils.cpp b/dbms/src/Storages/VirtualColumnUtils.cpp index 6ce3e58cc75..990a587445c 100644 --- a/dbms/src/Storages/VirtualColumnUtils.cpp +++ b/dbms/src/Storages/VirtualColumnUtils.cpp @@ -96,11 +96,9 @@ static bool isValidFunction(const ASTPtr & expression, const NameSet & columns) if (!isValidFunction(expression->children[i], columns)) return false; - if (const ASTIdentifier * identifier = typeid_cast(&*expression)) - { - if (identifier->general()) - return columns.count(identifier->name); - } + if (auto opt_name = getColumnIdentifierName(expression)) + return columns.count(*opt_name); + return true; } diff --git a/dbms/src/Storages/transformQueryForExternalDatabase.cpp b/dbms/src/Storages/transformQueryForExternalDatabase.cpp index 0131d9f2162..aea176def3b 100644 --- a/dbms/src/Storages/transformQueryForExternalDatabase.cpp +++ b/dbms/src/Storages/transformQueryForExternalDatabase.cpp @@ -76,7 +76,7 @@ static bool isCompatible(const IAST & node) return true; } - if (typeid_cast(&node)) + if (isIdentifier(&node)) return true; return false; diff --git a/dbms/src/TableFunctions/TableFunctionRemote.cpp b/dbms/src/TableFunctions/TableFunctionRemote.cpp index fc23956ef4f..5f81a9c21ec 100644 --- a/dbms/src/TableFunctions/TableFunctionRemote.cpp +++ b/dbms/src/TableFunctions/TableFunctionRemote.cpp @@ -65,9 +65,7 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C } else { - if (auto ast_cluster = typeid_cast(args[arg_num].get())) - cluster_name = ast_cluster->name; - else + if (!getIdentifierName(args[arg_num], cluster_name)) cluster_description = getStringLiteral(*args[arg_num], "Hosts pattern"); } ++arg_num; @@ -132,9 +130,8 @@ StoragePtr TableFunctionRemote::executeImpl(const ASTPtr & ast_function, const C /// ExpressionAnalyzer will be created in InterpreterSelectQuery that will meet these `Identifier` when processing the request. /// We need to mark them as the name of the database or table, because the default value is column. - for (auto & arg : args) - if (ASTIdentifier * id = typeid_cast(arg.get())) - id->setSpecial(); + for (auto ast : args) + setIdentifierSpecial(ast); ClusterPtr cluster; if (!cluster_name.empty()) From f4a18ccb32305b82ba66047c6b6507a32d25acf3 Mon Sep 17 00:00:00 2001 From: ogorbacheva Date: Tue, 15 Jan 2019 11:04:52 +0300 Subject: [PATCH 17/17] PR2: Adding description of the functions for working with UUID (#4059) * PR2: add description of UUID functions * Fix type of returned value --- docs/en/data_types/uuid.md | 60 ++++++++++ .../query_language/agg_functions/reference.md | 12 +- .../functions/arithmetic_functions.md | 2 +- .../functions/ext_dict_functions.md | 4 +- .../functions/uuid_functions.md | 108 ++++++++++++++++++ docs/ru/interfaces/formats.md | 2 +- docs/toc_en.yml | 2 + 7 files changed, 180 insertions(+), 10 deletions(-) create mode 100644 docs/en/data_types/uuid.md create mode 100644 docs/en/query_language/functions/uuid_functions.md diff --git a/docs/en/data_types/uuid.md b/docs/en/data_types/uuid.md new file mode 100644 index 00000000000..5f334bb6728 --- /dev/null +++ b/docs/en/data_types/uuid.md @@ -0,0 +1,60 @@ +# UUID {#uuid-data-type} + +Universally unique identifier (UUID) is a 16-byte number used to identify the records. For detailed information about the UUID, see [Wikipedia](https://en.wikipedia.org/wiki/Universally_unique_identifier). + +The example of UUID type value is represented below: + +``` +61f0c404-5cb3-11e7-907b-a6006ad3dba0 +``` + +If you do not specify the UUID column value when inserting a new record, the UUID value is filled with zero: + +``` +00000000-0000-0000-0000-000000000000 +``` + +## How to generate + +To generate the UUID value, ClickHouse provides the [generateUUIDv4](../query_language/functions/uuid_functions.md) function. + +## Usage example + +**Example 1** + +This example demonstrates creating a table with the UUID type column and inserting a value into the table. + +``` sql +:) CREATE TABLE t_uuid (x UUID, y String) ENGINE=TinyLog + +:) INSERT INTO t_uuid SELECT generateUUIDv4(), 'Example 1' + +:) SELECT * FROM t_uuid + +┌────────────────────────────────────x─┬─y─────────┐ +│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ +└──────────────────────────────────────┴───────────┘ +``` + +**Example 2** + +In this example, the UUID column value is not specified when inserting a new record. + +``` sql +:) INSERT INTO t_uuid (y) VALUES ('Example 2') + +:) SELECT * FROM t_uuid + +┌────────────────────────────────────x─┬─y─────────┐ +│ 417ddc5d-e556-4d27-95dd-a34d84e46a50 │ Example 1 │ +│ 00000000-0000-0000-0000-000000000000 │ Example 2 │ +└──────────────────────────────────────┴───────────┘ +``` + +## Restrictions + +The UUID data type only supports functions which [String](string.md) data type also supports (for example, [min](../query_language/agg_functions/reference.md#agg_function-min), [max](../query_language/agg_functions/reference.md#agg_function-max), and [count](../query_language/agg_functions/reference.md#agg_function-count)). + +The UUID data type is not supported by arithmetic operations (for example, [abs](../query_language/functions/arithmetic_functions.md#arithm_func-abs)) neither aggregate functions, such as [sum](../query_language/agg_functions/reference.md#agg_function-sum) and [avg](../query_language/agg_functions/reference.md#agg_function-avg). + +[Original article](https://clickhouse.yandex/docs/en/data_types/uuid/) diff --git a/docs/en/query_language/agg_functions/reference.md b/docs/en/query_language/agg_functions/reference.md index 782be3374b1..b8bd95d376d 100644 --- a/docs/en/query_language/agg_functions/reference.md +++ b/docs/en/query_language/agg_functions/reference.md @@ -1,7 +1,7 @@ # Function reference -## count() +## count() {#agg_function-count} Counts the number of rows. Accepts zero arguments and returns UInt64. The syntax `COUNT(DISTINCT x)` is not supported. The separate `uniq` aggregate function exists for this purpose. @@ -179,15 +179,15 @@ binary decimal 01101000 = 104 ``` -## min(x) +## min(x) {#agg_function-min} Calculates the minimum. -## max(x) +## max(x) {#agg_function-max} Calculates the maximum. -## argMin(arg, val) +## argMin(arg, val) {#agg_function-argMin} Calculates the 'arg' value for a minimal 'val' value. If there are several different values of 'arg' for minimal values of 'val', the first of these values encountered is output. @@ -206,7 +206,7 @@ SELECT argMin(user, salary) FROM salary └──────────────────────┘ ``` -## argMax(arg, val) +## argMax(arg, val) {#agg_function-argMax} Calculates the 'arg' value for a maximum 'val' value. If there are several different values of 'arg' for maximum values of 'val', the first of these values encountered is output. @@ -259,7 +259,7 @@ GROUP BY timeslot └─────────────────────┴──────────────────────────────────────────────┘ ``` -## avg(x) +## avg(x) {#agg_function-avg} Calculates the average. Only works for numbers. diff --git a/docs/en/query_language/functions/arithmetic_functions.md b/docs/en/query_language/functions/arithmetic_functions.md index 7420fbe532f..0f4795ec0b9 100644 --- a/docs/en/query_language/functions/arithmetic_functions.md +++ b/docs/en/query_language/functions/arithmetic_functions.md @@ -59,7 +59,7 @@ An exception is thrown when dividing by zero or when dividing a minimal negative Calculates a number with the reverse sign. The result is always signed. -## abs(a) +## abs(a) {#arithm_func-abs} Calculates the absolute value of the number (a). That is, if a < 0, it returns -a. For unsigned types it doesn't do anything. For signed integer types, it returns an unsigned number. diff --git a/docs/en/query_language/functions/ext_dict_functions.md b/docs/en/query_language/functions/ext_dict_functions.md index 99059d79016..d370e47e3f7 100644 --- a/docs/en/query_language/functions/ext_dict_functions.md +++ b/docs/en/query_language/functions/ext_dict_functions.md @@ -1,6 +1,6 @@ # Functions for working with external dictionaries {#ext_dict_functions} -For information on connecting and configuring external dictionaries, see "[External dictionaries](../dicts/external_dicts.md)". +For information on connecting and configuring external dictionaries, see [External dictionaries](../dicts/external_dicts.md). ## dictGetUInt8, dictGetUInt16, dictGetUInt32, dictGetUInt64 @@ -19,7 +19,7 @@ For information on connecting and configuring external dictionaries, see "[Exter - Get the value of the attr_name attribute from the dict_name dictionary using the 'id' key.`dict_name` and `attr_name` are constant strings.`id`must be UInt64. If there is no `id` key in the dictionary, it returns the default value specified in the dictionary description. -## dictGetTOrDefault +## dictGetTOrDefault {#ext_dict_functions_dictGetTOrDefault} `dictGetT('dict_name', 'attr_name', id, default)` diff --git a/docs/en/query_language/functions/uuid_functions.md b/docs/en/query_language/functions/uuid_functions.md new file mode 100644 index 00000000000..ddf3617e77a --- /dev/null +++ b/docs/en/query_language/functions/uuid_functions.md @@ -0,0 +1,108 @@ +# Functions for working with UUID + +The functions for working with UUID are listed below. + +## generateUUIDv4 {#uuid_function-generate} + +Generates [UUID](../../data_types/uuid.md) of [version 4](https://tools.ietf.org/html/rfc4122#section-4.4). + +```sql +generateUUIDv4() +``` + +**Returned value** + +The UUID type value. + +**Usage example** + +This example demonstrates creating a table with the UUID type column and inserting a value into the table. + +``` sql +:) CREATE TABLE t_uuid (x UUID) ENGINE=TinyLog + +:) INSERT INTO t_uuid SELECT generateUUIDv4() + +:) SELECT * FROM t_uuid + +┌────────────────────────────────────x─┐ +│ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │ +└──────────────────────────────────────┘ +``` + +## toUUID (x) + +Converts String type value to UUID type. + +```sql +toUUID(String) +``` + +**Returned value** + +The UUID type value. + +**Usage example** + +``` sql +:) SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid + +┌─────────────────────────────────uuid─┐ +│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │ +└──────────────────────────────────────┘ +``` + +## UUIDStringToNum + +Accepts a string containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns it as a set of bytes in a [FixedString(16)](../../data_types/fixedstring.md). + +``` sql +UUIDStringToNum(String) +``` + +**Returned value** + +FixedString(16) + +**Usage examples** + +``` sql +:) SELECT + '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid, + UUIDStringToNum(uuid) AS bytes + +┌─uuid─────────────────────────────────┬─bytes────────────┐ +│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │ +└──────────────────────────────────────┴──────────────────┘ +``` + +## UUIDNumToString + +Accepts a [FixedString(16)](../../data_types/fixedstring.md) value, and returns a string containing 36 characters in text format. + +``` sql +UUIDNumToString(FixedString(16)) +``` + +**Returned value** + +String. + +**Usage example** + +``` sql +SELECT + 'a/<@];!~p{jTj={)' AS bytes, + UUIDNumToString(toFixedString(bytes, 16)) AS uuid + +┌─bytes────────────┬─uuid─────────────────────────────────┐ +│ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ +└──────────────────┴──────────────────────────────────────┘ +``` + +## See also + +- [dictGetUUID](ext_dict_functions.md) +- [dictGetUUIDOrDefault](ext_dict_functions#ext_dict_functions_dictGetTOrDefault) + +[Original article](https://clickhouse.yandex/docs/en/query_language/functions/uuid_function/) diff --git a/docs/ru/interfaces/formats.md b/docs/ru/interfaces/formats.md index 659f3327d18..303ed85cd73 100644 --- a/docs/ru/interfaces/formats.md +++ b/docs/ru/interfaces/formats.md @@ -157,7 +157,7 @@ x=1 y=\N clickhouse-client --format_csv_delimiter="|" --query="INSERT INTO test.csv FORMAT CSV" < data.csv ``` -*По умолчанию — `,`. См. настройку [format_csv_delimiter](/operations/settings/settings/#settings-format_csv_delimiter) для дополнительной информации. +*По умолчанию — `,`. См. настройку [format_csv_delimiter](../operations/settings/settings.md#settings-format_csv_delimiter) для дополнительной информации. При парсинге, все значения могут парситься как в кавычках, так и без кавычек. Поддерживаются как двойные, так и одинарные кавычки. В том числе, строки могут быть расположены без кавычек - тогда они парсятся до символа-разделителя или перевода строки (CR или LF). В нарушение RFC, в случае парсинга строк не в кавычках, начальные и конечные пробелы и табы игнорируются. В качестве перевода строки, поддерживаются как Unix (LF), так и Windows (CR LF) и Mac OS Classic (LF CR) варианты. diff --git a/docs/toc_en.yml b/docs/toc_en.yml index fd0e7dbd769..f8706e3b44e 100644 --- a/docs/toc_en.yml +++ b/docs/toc_en.yml @@ -39,6 +39,7 @@ nav: - 'Boolean': 'data_types/boolean.md' - 'String': 'data_types/string.md' - 'FixedString(N)': 'data_types/fixedstring.md' + - 'UUID': 'data_types/uuid.md' - 'Date': 'data_types/date.md' - 'DateTime': 'data_types/datetime.md' - 'Enum': 'data_types/enum.md' @@ -81,6 +82,7 @@ nav: - 'Hash': 'query_language/functions/hash_functions.md' - 'Generating Pseudo-Random Numbers': 'query_language/functions/random_functions.md' - 'Encoding': 'query_language/functions/encoding_functions.md' + - 'Working with UUID': 'query_language/functions/uuid_functions.md' - 'Working with URLs': 'query_language/functions/url_functions.md' - 'Working with IP Addresses': 'query_language/functions/ip_address_functions.md' - 'Working with JSON.': 'query_language/functions/json_functions.md'