From 9a8db441c3ba9dbcc51eb9d0b7ab0f5230e9a197 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 6 Feb 2019 17:48:05 +0300 Subject: [PATCH 1/6] some mote ExpressionAnalyzer refactoring --- dbms/src/Interpreters/AnalyzedJoin.cpp | 48 +++++++++++++------- dbms/src/Interpreters/AnalyzedJoin.h | 8 ++-- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 31 +++++++++---- dbms/src/Interpreters/ExpressionAnalyzer.h | 6 --- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 25 ++-------- 5 files changed, 61 insertions(+), 57 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index f249a451312..d7e1d2aed67 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -9,6 +9,7 @@ #include #include +#include namespace DB { @@ -51,24 +52,13 @@ ExpressionActionsPtr AnalyzedJoin::createJoinedBlockActions( return analyzer.getActions(false); } -NameSet AnalyzedJoin::getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, - const ExpressionActionsPtr & joined_block_actions) const +Names AnalyzedJoin::getOriginalColumnNames(const NameSet & required_columns_from_joined_table) const { - NameSet required_columns_from_joined_table; - - auto required_action_columns = joined_block_actions->getRequiredColumns(); - required_columns_from_joined_table.insert(required_action_columns.begin(), required_action_columns.end()); - auto sample = joined_block_actions->getSampleBlock(); - - for (auto & column : key_names_right) - if (!sample.has(column)) - required_columns_from_joined_table.insert(column); - - for (auto & column : columns_added_by_join) - if (!sample.has(column.name_and_type.name)) - required_columns_from_joined_table.insert(column.name_and_type.name); - - return required_columns_from_joined_table; + Names original_columns; + for (const auto & column : columns_from_joined_table) + if (required_columns_from_joined_table.count(column.name_and_type.name)) + original_columns.emplace_back(column.original_name); + return original_columns; } const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable( @@ -104,6 +94,30 @@ const JoinedColumnsList & AnalyzedJoin::getColumnsFromJoinedTable( return columns_from_joined_table; } +void AnalyzedJoin::calculateAvailableJoinedColumns( + const NameSet & source_columns, const Context & context, const ASTSelectQuery * select_query_with_join, bool make_nullable) +{ + const auto & columns = getColumnsFromJoinedTable(source_columns, context, select_query_with_join); + + NameSet joined_columns; + + for (auto & column : columns) + { + auto & column_name = column.name_and_type.name; + auto & column_type = column.name_and_type.type; + auto & original_name = column.original_name; + { + if (joined_columns.count(column_name)) /// Duplicate columns in the subquery for JOIN do not make sense. + continue; + + joined_columns.insert(column_name); + + auto type = make_nullable ? makeNullable(column_type) : column_type; + available_joined_columns.emplace_back(NameAndTypePair(column_name, std::move(type)), original_name); + } + } +} + NamesAndTypesList getNamesAndTypeListFromTableExpression(const ASTTableExpression & table_expression, const Context & context) { diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index d8d8673ba15..396b92cefbd 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -66,13 +66,15 @@ struct AnalyzedJoin const ASTSelectQuery * select_query_with_join, const Context & context) const; - /// Columns which will be used in query from joined table. - NameSet getRequiredColumnsFromJoinedTable(const JoinedColumnsList & columns_added_by_join, - const ExpressionActionsPtr & joined_block_actions) const; + Names getOriginalColumnNames(const NameSet & required_columns) const; const JoinedColumnsList & getColumnsFromJoinedTable(const NameSet & source_columns, const Context & context, const ASTSelectQuery * select_query_with_join); + void calculateAvailableJoinedColumns(const NameSet & source_columns, + const Context & context, + const ASTSelectQuery * select_query_with_join, + bool make_nullable); }; struct ASTTableExpression; diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index e3ace8aba38..7c7b3d16622 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -510,6 +510,17 @@ void ExpressionAnalyzer::addJoinAction(ExpressionActionsPtr & actions, bool only columns_added_by_join_list)); } +static void appendRequiredColumns(NameSet & required_columns, const Block & sample, const AnalyzedJoin & analyzed_join) +{ + for (auto & column : analyzed_join.key_names_right) + if (!sample.has(column)) + required_columns.insert(column); + + for (auto & column : analyzed_join.columns_from_joined_table) + if (!sample.has(column.name_and_type.name)) + required_columns.insert(column.name_and_type.name); +} + bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_types) { assertSelect(); @@ -566,6 +577,11 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty if (!subquery_for_set.join) { + auto & analyzed_join = analyzedJoin(); + /// Actions which need to be calculated on joined block. + ExpressionActionsPtr joined_block_actions = + analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context); + /** For GLOBAL JOINs (in the case, for example, of the push method for executing GLOBAL subqueries), the following occurs * - in the addExternalStorage function, the JOIN (SELECT ...) subquery is replaced with JOIN _data1, * in the subquery_for_set object this subquery is exposed as source and the temporary table _data1 as the `table`. @@ -582,15 +598,15 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty else if (table_to_join.database_and_table_name) table = table_to_join.database_and_table_name; - const JoinedColumnsList & columns_from_joined_table = analyzedJoin().columns_from_joined_table; + Names action_columns = joined_block_actions->getRequiredColumns(); + NameSet required_columns(action_columns.begin(), action_columns.end()); - Names original_columns; - for (const auto & column : columns_from_joined_table) - if (required_columns_from_joined_table.count(column.name_and_type.name)) - original_columns.emplace_back(column.original_name); + appendRequiredColumns(required_columns, joined_block_actions->getSampleBlock(), analyzed_join); + + Names original_columns = analyzed_join.getOriginalColumnNames(required_columns); auto interpreter = interpretSubquery(table, context, subquery_depth, original_columns); - subquery_for_set.makeSource(interpreter, columns_from_joined_table, required_columns_from_joined_table); + subquery_for_set.makeSource(interpreter, analyzed_join.columns_from_joined_table, required_columns); } Block sample_block = subquery_for_set.renamedSampleBlock(); @@ -1038,9 +1054,6 @@ void ExpressionAnalyzer::collectUsedColumns() required.swap(fixed_required); } - - joined_block_actions = analyzed_join.createJoinedBlockActions(columns_added_by_join, select_query, context); - required_columns_from_joined_table = analyzed_join.getRequiredColumnsFromJoinedTable(columns_added_by_join, joined_block_actions); } if (columns_context.has_array_join) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.h b/dbms/src/Interpreters/ExpressionAnalyzer.h index d8872f1b8d1..83598aba72d 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.h +++ b/dbms/src/Interpreters/ExpressionAnalyzer.h @@ -67,12 +67,6 @@ struct ExpressionAnalyzerData /// Columns will be added to block by join. JoinedColumnsList columns_added_by_join; /// Subset of analyzed_join.available_joined_columns - /// Actions which need to be calculated on joined block. - ExpressionActionsPtr joined_block_actions; - - /// Columns which will be used in query from joined table. Duplicate names are qualified. - NameSet required_columns_from_joined_table; - protected: ExpressionAnalyzerData(const NamesAndTypesList & source_columns_, const NameSet & required_result_columns_, diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index cc9fe20a69b..fd61112d108 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -22,7 +22,6 @@ #include #include -#include #include #include @@ -641,29 +640,11 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s else if (table_join.on_expression) collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context); - auto & columns_from_joined_table = analyzed_join.getColumnsFromJoinedTable(source_columns, context, select_query); - - NameSet joined_columns; - auto & settings = context.getSettingsRef(); + bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || + table_join.kind == ASTTableJoin::Kind::Full); - for (auto & column : columns_from_joined_table) - { - auto & column_name = column.name_and_type.name; - auto & column_type = column.name_and_type.type; - auto & original_name = column.original_name; - { - if (joined_columns.count(column_name)) /// Duplicate columns in the subquery for JOIN do not make sense. - continue; - - joined_columns.insert(column_name); - - bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || - table_join.kind == ASTTableJoin::Kind::Full); - auto type = make_nullable ? makeNullable(column_type) : column_type; - analyzed_join.available_joined_columns.emplace_back(NameAndTypePair(column_name, std::move(type)), original_name); - } - } + analyzed_join.calculateAvailableJoinedColumns(source_columns, context, select_query, make_nullable); } } From 3fd3884b321fff60d367fa166d481b429f7daa16 Mon Sep 17 00:00:00 2001 From: chertus Date: Wed, 6 Feb 2019 19:44:47 +0300 Subject: [PATCH 2/6] one more minor refactoring --- dbms/src/Interpreters/AnalyzedJoin.h | 9 ++++++++ dbms/src/Interpreters/SyntaxAnalyzer.cpp | 26 ++++++------------------ 2 files changed, 15 insertions(+), 20 deletions(-) diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 396b92cefbd..e53363e8f73 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -61,6 +61,15 @@ struct AnalyzedJoin /// It's columns_from_joined_table without duplicate columns and possibly modified types. JoinedColumnsList available_joined_columns; + void addSimpleKey(const ASTPtr & ast) + { + key_names_left.push_back(ast->getColumnName()); + key_names_right.push_back(ast->getAliasOrColumnName()); + + key_asts_left.push_back(ast); + key_asts_right.push_back(ast); + } + ExpressionActionsPtr createJoinedBlockActions( const JoinedColumnsList & columns_added_by_join, /// Subset of available_joined_columns. const ASTSelectQuery * select_query_with_join, diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index fd61112d108..90ed9b6065b 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -612,30 +612,16 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s const auto & table_expression = static_cast(*node->table_expression); DatabaseAndTableWithAlias joined_table_name(table_expression, context.getCurrentDatabase()); - auto add_name_to_join_keys = [&](Names & join_keys, ASTs & join_asts, const ASTPtr & ast, bool right_table) - { - String name; - if (right_table) - { - name = ast->getAliasOrColumnName(); - if (source_columns.count(name)) - name = joined_table_name.getQualifiedNamePrefix() + name; - } - else - name = ast->getColumnName(); - - join_keys.push_back(name); - join_asts.push_back(ast); - }; - if (table_join.using_expression_list) { auto & keys = typeid_cast(*table_join.using_expression_list); for (const auto & key : keys.children) - { - add_name_to_join_keys(analyzed_join.key_names_left, analyzed_join.key_asts_left, key, false); - add_name_to_join_keys(analyzed_join.key_names_right, analyzed_join.key_asts_right, key, true); - } + analyzed_join.addSimpleKey(key); + + /// @warning wrong qualification if the right key is an alias + for (auto & name : analyzed_join.key_names_right) + if (source_columns.count(name)) + name = joined_table_name.getQualifiedNamePrefix() + name; } else if (table_join.on_expression) collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context); From db9a2060bf72c0222b68556c6788dbee5a04d5ab Mon Sep 17 00:00:00 2001 From: chertus Date: Thu, 7 Feb 2019 22:18:40 +0300 Subject: [PATCH 3/6] get rid of custom JOIN ON names qualification --- .../DatabaseAndTableWithAlias.cpp | 26 ++++++++++ .../Interpreters/DatabaseAndTableWithAlias.h | 6 +++ dbms/src/Interpreters/IdentifierSemantic.cpp | 19 +++++-- dbms/src/Interpreters/IdentifierSemantic.h | 6 ++- .../PredicateExpressionsOptimizer.cpp | 4 +- dbms/src/Interpreters/QueryNormalizer.cpp | 29 ++--------- dbms/src/Interpreters/QueryNormalizer.h | 15 +++--- dbms/src/Interpreters/SyntaxAnalyzer.cpp | 49 ++++++------------- .../TranslateQualifiedNamesVisitor.cpp | 16 +++--- .../TranslateQualifiedNamesVisitor.h | 11 ++++- dbms/src/Parsers/ASTIdentifier.h | 7 +++ 11 files changed, 106 insertions(+), 82 deletions(-) diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index c9afb5da722..c6fbfaad088 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -1,5 +1,6 @@ #include #include +#include /// for getNamesAndTypeListFromTableExpression #include #include @@ -12,6 +13,9 @@ namespace DB { +NameSet removeDuplicateColumns(NamesAndTypesList & columns); + + DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database) { alias = identifier.tryGetAlias(); @@ -144,4 +148,26 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number return nullptr; } +std::vector getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context) +{ + std::vector tables_with_columns; + + if (select_query.tables && !select_query.tables->children.empty()) + { + String current_database = context.getCurrentDatabase(); + + for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query)) + { + DatabaseAndTableWithAlias table_name(*table_expression, current_database); + + NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); + removeDuplicateColumns(names_and_types); + + tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); + } + } + + return tables_with_columns; +} + } diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 79e8da3f156..e9d8ee409a6 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -4,6 +4,7 @@ #include #include +#include namespace DB @@ -15,6 +16,7 @@ using ASTPtr = std::shared_ptr; class ASTSelectQuery; class ASTIdentifier; struct ASTTableExpression; +class Context; /// Extracts database name (and/or alias) from table expression or identifier @@ -36,9 +38,13 @@ struct DatabaseAndTableWithAlias bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias); }; +using TableWithColumnNames = std::pair; + std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); std::optional getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number); +std::vector getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context); + std::vector getSelectTablesExpression(const ASTSelectQuery & select_query); ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number); diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index e6fe2257d20..337bbc27cfb 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -37,6 +37,12 @@ std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) return {}; } + +void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value) +{ + identifier.semantic->need_long_name = value; +} + std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) { if (identifier.name_parts.size() > 2) @@ -97,10 +103,17 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t identifier.name.swap(new_name); } -void IdentifierSemantic::setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) +void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) { - String prefix = db_and_table.getQualifiedNamePrefix(); - identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end()); + size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); + + setColumnShortName(identifier, match); + + if (identifier.semantic->need_long_name) + { + String prefix = db_and_table.getQualifiedNamePrefix(); + identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end()); + } } } diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 895a51899fe..8bef3543a43 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -9,6 +9,7 @@ namespace DB struct IdentifierSemanticImpl { bool special = false; + bool need_long_name = false; }; /// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier @@ -24,12 +25,13 @@ struct IdentifierSemantic static std::pair extractDatabaseAndTable(const ASTIdentifier & identifier); static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static void setColumnShortName(ASTIdentifier & identifier, size_t match); - static void setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table); + static void setColumnShortName(ASTIdentifier & identifier, size_t match); }; } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 3154e3665c2..af556ebc01e 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -326,7 +326,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables}; + std::vector tables_with_columns; + TranslateQualifiedNamesVisitor::Data::setTablesOnly(tables, tables_with_columns); + TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables_with_columns}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 56529ae595c..1a0f5bb5ac8 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -25,8 +25,6 @@ namespace ErrorCodes extern const int CYCLIC_ALIASES; } -NameSet removeDuplicateColumns(NamesAndTypesList & columns); - class CheckASTDepth { @@ -143,7 +141,10 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) /// Replace *, alias.*, database.table.* with a list of columns. void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data) { - const auto & tables_with_columns = data.tables_with_columns; + if (!data.tables_with_columns) + return; + + const auto & tables_with_columns = *data.tables_with_columns; const auto & source_columns_set = data.source_columns_set; ASTs old_children; @@ -228,8 +229,6 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data) { - extractTablesWithColumns(select, data); - if (auto join = select.join()) extractJoinUsingColumns(join->table_join, data); @@ -253,7 +252,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & } /// Don't go into subqueries. -/// Don't go into components of compound identifiers. /// Don't go into select query. It processes children itself. /// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters /// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]). @@ -346,25 +344,6 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) } } -void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data) -{ - if (data.context && select_query.tables && !select_query.tables->children.empty()) - { - data.tables_with_columns.clear(); - String current_database = data.context->getCurrentDatabase(); - - for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query)) - { - DatabaseAndTableWithAlias table_name(*table_expression, current_database); - - NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context); - removeDuplicateColumns(names_and_types); - - data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); - } - } -} - /// 'select * from a join b using id' should result one 'id' column void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data) { diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 62aaa09bb34..517f9416959 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -46,8 +46,6 @@ class QueryNormalizer }; public: - using TableWithColumnNames = std::pair; - struct Data { using SetOfASTs = std::set; @@ -57,7 +55,7 @@ public: const ExtractedSettings settings; const Context * context; const NameSet * source_columns_set; - std::vector tables_with_columns; + const std::vector * tables_with_columns; std::unordered_set join_using_columns; /// tmp data @@ -67,25 +65,25 @@ public: std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases) Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_, - const NameSet & source_columns_set, Names && all_columns) + const NameSet & source_columns_set, const std::vector & tables_with_columns_) : aliases(aliases_) , settings(settings_) , context(&context_) , source_columns_set(&source_columns_set) + , tables_with_columns(&tables_with_columns_) , level(0) - { - tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns)); - } + {} Data(const Aliases & aliases_, ExtractedSettings && settings_) : aliases(aliases_) , settings(settings_) , context(nullptr) , source_columns_set(nullptr) + , tables_with_columns(nullptr) , level(0) {} - bool processAsterisks() const { return !tables_with_columns.empty(); } + bool processAsterisks() const { return tables_with_columns && !tables_with_columns->empty(); } }; QueryNormalizer(Data & data) @@ -110,7 +108,6 @@ private: static void visitChildren(const ASTPtr &, Data & data); - static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data); static void extractJoinUsingColumns(const ASTPtr ast, Data & data); }; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 90ed9b6065b..b4dc9a31211 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -79,16 +79,14 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam } /// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. -void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context) +void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, + const std::vector & tables_with_columns) { if (!select_query->tables || select_query->tables->children.empty()) return; - std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - LogAST log; - TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables}; + TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables_with_columns}; TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } @@ -100,7 +98,8 @@ void normalizeTree( const Names & source_columns, const NameSet & source_columns_set, const Context & context, - const ASTSelectQuery * select_query) + const ASTSelectQuery * select_query, + std::vector & tables_with_columns) { const auto & settings = context.getSettingsRef(); @@ -116,10 +115,12 @@ void normalizeTree( if (all_columns_name.empty()) throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); - QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name)); + if (tables_with_columns.empty()) + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); + + QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, tables_with_columns); QueryNormalizer(normalizer_data).visit(query); } - bool hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) @@ -446,7 +447,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const /// Parse JOIN ON expression and collect ASTs for joined columns. void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context) + const Context & context) { const auto & tables = static_cast(*select_query->tables); const auto * left_tables_element = static_cast(tables.children.at(0).get()); @@ -511,24 +512,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS return table_belonging; }; - std::function translate_qualified_names; - translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table) - { - if (IdentifierSemantic::getColumnName(ast)) - { - auto * identifier = typeid_cast(ast.get()); - - size_t match = IdentifierSemantic::canReferColumnToTable(*identifier, source_names); - IdentifierSemantic::setColumnShortName(*identifier, match); - - if (right_table && source_columns.count(ast->getColumnName())) - IdentifierSemantic::setColumnQualifiedName(*identifier, source_names); - } - - for (auto & child : ast->children) - translate_qualified_names(child, source_names, right_table); - }; - const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]"; auto throwSyntaxException = [&](const String & msg) @@ -556,9 +539,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS auto add_join_keys = [&](ASTPtr & ast_to_left_table, ASTPtr & ast_to_right_table) { - translate_qualified_names(ast_to_left_table, left_source_names, false); - translate_qualified_names(ast_to_right_table, right_source_names, true); - analyzed_join.key_asts_left.push_back(ast_to_left_table); analyzed_join.key_names_left.push_back(ast_to_left_table->getColumnName()); analyzed_join.key_asts_right.push_back(ast_to_right_table); @@ -624,7 +604,7 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s name = joined_table_name.getQualifiedNamePrefix() + name; } else if (table_join.on_expression) - collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context); + collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, context); auto & settings = context.getSettingsRef(); bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || @@ -666,9 +646,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( if (source_columns_set.size() != source_columns_list.size()) throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR); + std::vector tables_with_columns; + if (select_query) { - translateQualifiedNames(query, select_query, source_columns_set, context); + tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context); + translateQualifiedNames(query, select_query, source_columns_set, tables_with_columns); /// Depending on the user's profile, check for the execution rights /// distributed subqueries inside the IN or JOIN sections and process these subqueries. @@ -687,7 +670,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Common subexpression elimination. Rewrite rules. normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set, - context, select_query); + context, select_query, tables_with_columns); /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 016d176caba..f6e5ebe956a 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -50,25 +50,24 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data) { if (IdentifierSemantic::getColumnName(identifier)) { size_t best_table_pos = 0; size_t best_match = 0; for (size_t i = 0; i < data.tables.size(); ++i) - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i])) + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i].first)) if (match > best_match) { best_match = match; best_table_pos = i; } - IdentifierSemantic::setColumnShortName(identifier, best_match); - /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && data.source_columns.count(ast->getColumnName())) - IdentifierSemantic::setColumnQualifiedName(identifier, data.tables[best_table_pos]); + if (best_table_pos && data.source_columns.count(identifier.shortName())) + IdentifierSemantic::setNeedLongName(identifier, true); + IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first); } return {}; @@ -85,7 +84,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs DatabaseAndTableWithAlias db_and_table(ident); for (const auto & known_table : data.tables) - if (db_and_table.satisfies(known_table, true)) + if (db_and_table.satisfies(known_table.first, true)) return {}; throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); @@ -93,10 +92,11 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs std::vector TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &) { - /// Don't translate on_expression here in order to resolve equation parts later. std::vector out; if (join.using_expression_list) out.push_back(&join.using_expression_list); + else if (join.on_expression) + out.push_back(&join.on_expression); return out; } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 48d41213cb8..bee5e7022f4 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -21,7 +21,16 @@ public: struct Data { const NameSet & source_columns; - const std::vector & tables; + const std::vector & tables; + + static void setTablesOnly(const std::vector & tables, + std::vector & tables_with_columns) + { + tables_with_columns.clear(); + tables_with_columns.reserve(tables.size()); + for (const auto & table : tables) + tables_with_columns.emplace_back(TableWithColumnNames{table, {}}); + } }; static constexpr const char * label = "TranslateQualifiedNames"; diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 5c287eb9da4..6bf7eac5260 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -42,6 +42,13 @@ public: name_parts.clear(); } + const String & shortName() const + { + if (!name_parts.empty()) + return name_parts.back(); + return name; + } + protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void appendColumnNameImpl(WriteBuffer & ostr) const override; From 630937732317c8e444e517d93dc4a7bb942a7809 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 8 Feb 2019 14:14:48 +0300 Subject: [PATCH 4/6] remove fix for JOIN ON columns in collectUsedColumns --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 54 -------------------- 1 file changed, 54 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 7c7b3d16622..55f3b901e8a 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -941,15 +941,6 @@ void ExpressionAnalyzer::getAggregateInfo(Names & key_names, AggregateDescriptio aggregates = aggregate_descriptions; } -/// db.table.column -> table.column / table.column -> column -static String cropDatabaseOrTableName(const String & name) -{ - size_t pos = name.find('.', 0); - if (pos != std::string::npos) - return name.substr(pos + 1, name.size() - pos - 1); - return name; -} - void ExpressionAnalyzer::collectUsedColumns() { /** Calculate which columns are required to execute the expression. @@ -1009,51 +1000,6 @@ void ExpressionAnalyzer::collectUsedColumns() required.erase(name); } } - - /// @fix filter required columns according to misqualified names in JOIN ON - if (columns_context.has_table_join && - columns_context.tables.size() >= 2 && - columns_context.tables[1].join && - columns_context.tables[1].join->on_expression) - { - NameSet fixed_required; - - for (const auto & req_name : required) - { - bool collated = false; - String cropped_name = req_name; - static const constexpr size_t max_column_prefix = 2; - - for (size_t i = 0; i < max_column_prefix && !collated; ++i) - { - cropped_name = cropDatabaseOrTableName(cropped_name); - - if (avaliable_columns.count(cropped_name)) - { - fixed_required.insert(cropped_name); - collated = true; - break; - } - - for (const auto & joined_column : analyzed_join.available_joined_columns) - { - auto & name = joined_column.name_and_type.name; - - if (cropped_name == name) - { - columns_added_by_join.push_back(joined_column); - collated = true; - break; - } - } - } - - if (!collated) - fixed_required.insert(req_name); - } - - required.swap(fixed_required); - } } if (columns_context.has_array_join) From 668b22025886acbc921b41a2e426c71fea410723 Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 8 Feb 2019 18:37:43 +0300 Subject: [PATCH 5/6] fix push down --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 1 + .../FindIdentifierBestTableVisitor.h | 4 +++- dbms/src/Interpreters/IdentifierSemantic.cpp | 15 ++++++++++++++- dbms/src/Interpreters/IdentifierSemantic.h | 1 + .../PredicateExpressionsOptimizer.cpp | 18 +++++++++++++++--- dbms/src/Parsers/ASTIdentifier.h | 2 +- 6 files changed, 35 insertions(+), 6 deletions(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 55f3b901e8a..02a74ab094f 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1049,6 +1049,7 @@ void ExpressionAnalyzer::collectUsedColumns() if (!unknown_required_source_columns.empty()) { std::stringstream ss; + ss << "query: '" << query << "'" << std::endl; ss << columns_context; ss << "source_columns: "; for (const auto & name : source_columns) diff --git a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h index 4ad4fc09ff6..96e801f7ed2 100644 --- a/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h +++ b/dbms/src/Interpreters/FindIdentifierBestTableVisitor.h @@ -10,8 +10,10 @@ namespace DB struct FindIdentifierBestTableData { using TypeToVisit = ASTIdentifier; + using IdentifierWithTable = std::pair; + const std::vector & tables; - std::vector> identifier_table; + std::vector identifier_table; FindIdentifierBestTableData(const std::vector & tables_); diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index 337bbc27cfb..13a9c49c3e0 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -112,8 +112,21 @@ void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const D if (identifier.semantic->need_long_name) { String prefix = db_and_table.getQualifiedNamePrefix(); - identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end()); + if (!prefix.empty()) + { + String short_name = identifier.shortName(); + identifier.name = prefix + short_name; + prefix.resize(prefix.size() - 1); /// crop dot + identifier.name_parts = {prefix, short_name}; + } } } +String IdentifierSemantic::columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) +{ + ASTPtr copy = identifier.clone(); + setColumnNormalName(typeid_cast(*copy), db_and_table); + return copy->getAliasOrColumnName(); +} + } diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 8bef3543a43..be721627e1a 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -25,6 +25,7 @@ struct IdentifierSemantic static std::pair extractDatabaseAndTable(const ASTIdentifier & identifier); static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static String columnNormalName(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index af556ebc01e..b49f02a14fa 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -236,10 +236,22 @@ void PredicateExpressionsOptimizer::setNewAliasesForInnerPredicate( { if (alias == qualified_name) { - if (!isIdentifier(ast) && ast->tryGetAlias().empty()) - ast->setAlias(ast->getColumnName()); + String name; + if (auto * id = typeid_cast(ast.get())) + { + name = id->tryGetAlias(); + if (name.empty()) + name = id->shortName(); + } + else + { + if (ast->tryGetAlias().empty()) + ast->setAlias(ast->getColumnName()); + name = ast->getAliasOrColumnName(); + } - identifier->resetWithAlias(ast->getAliasOrColumnName()); + IdentifierSemantic::setNeedLongName(*identifier, false); + identifier->setShortName(name); } } } diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 6bf7eac5260..9457b7d9156 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -36,7 +36,7 @@ public: bool compound() const { return !name_parts.empty(); } bool isShort() const { return name_parts.empty() || name == name_parts.back(); } - void resetWithAlias(const String & new_name) + void setShortName(const String & new_name) { name = new_name; name_parts.clear(); From c24ba390b38b7d8c8467eac776e9dabcd6137d9f Mon Sep 17 00:00:00 2001 From: chertus Date: Fri, 8 Feb 2019 20:21:28 +0300 Subject: [PATCH 6/6] fix test --- dbms/src/Interpreters/ExpressionAnalyzer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Interpreters/ExpressionAnalyzer.cpp b/dbms/src/Interpreters/ExpressionAnalyzer.cpp index 02a74ab094f..31e08a95ec3 100644 --- a/dbms/src/Interpreters/ExpressionAnalyzer.cpp +++ b/dbms/src/Interpreters/ExpressionAnalyzer.cpp @@ -1049,7 +1049,7 @@ void ExpressionAnalyzer::collectUsedColumns() if (!unknown_required_source_columns.empty()) { std::stringstream ss; - ss << "query: '" << query << "'" << std::endl; + ss << "query: '" << query << "' "; ss << columns_context; ss << "source_columns: "; for (const auto & name : source_columns)