diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp index c9afb5da722..c6fbfaad088 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.cpp @@ -1,5 +1,6 @@ #include #include +#include /// for getNamesAndTypeListFromTableExpression #include #include @@ -12,6 +13,9 @@ namespace DB { +NameSet removeDuplicateColumns(NamesAndTypesList & columns); + + DatabaseAndTableWithAlias::DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database) { alias = identifier.tryGetAlias(); @@ -144,4 +148,26 @@ ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number return nullptr; } +std::vector getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context) +{ + std::vector tables_with_columns; + + if (select_query.tables && !select_query.tables->children.empty()) + { + String current_database = context.getCurrentDatabase(); + + for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query)) + { + DatabaseAndTableWithAlias table_name(*table_expression, current_database); + + NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); + removeDuplicateColumns(names_and_types); + + tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); + } + } + + return tables_with_columns; +} + } diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 79e8da3f156..e9d8ee409a6 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -4,6 +4,7 @@ #include #include +#include namespace DB @@ -15,6 +16,7 @@ using ASTPtr = std::shared_ptr; class ASTSelectQuery; class ASTIdentifier; struct ASTTableExpression; +class Context; /// Extracts database name (and/or alias) from table expression or identifier @@ -36,9 +38,13 @@ struct DatabaseAndTableWithAlias bool satisfies(const DatabaseAndTableWithAlias & table, bool table_may_be_an_alias); }; +using TableWithColumnNames = std::pair; + std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); std::optional getDatabaseAndTable(const ASTSelectQuery & select, size_t table_number); +std::vector getDatabaseAndTablesWithColumnNames(const ASTSelectQuery & select_query, const Context & context); + std::vector getSelectTablesExpression(const ASTSelectQuery & select_query); ASTPtr extractTableExpression(const ASTSelectQuery & select, size_t table_number); diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index e6fe2257d20..337bbc27cfb 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -37,6 +37,12 @@ std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) return {}; } + +void IdentifierSemantic::setNeedLongName(ASTIdentifier & identifier, bool value) +{ + identifier.semantic->need_long_name = value; +} + std::pair IdentifierSemantic::extractDatabaseAndTable(const ASTIdentifier & identifier) { if (identifier.name_parts.size() > 2) @@ -97,10 +103,17 @@ void IdentifierSemantic::setColumnShortName(ASTIdentifier & identifier, size_t t identifier.name.swap(new_name); } -void IdentifierSemantic::setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) +void IdentifierSemantic::setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table) { - String prefix = db_and_table.getQualifiedNamePrefix(); - identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end()); + size_t match = IdentifierSemantic::canReferColumnToTable(identifier, db_and_table); + + setColumnShortName(identifier, match); + + if (identifier.semantic->need_long_name) + { + String prefix = db_and_table.getQualifiedNamePrefix(); + identifier.name.insert(identifier.name.begin(), prefix.begin(), prefix.end()); + } } } diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index 895a51899fe..8bef3543a43 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -9,6 +9,7 @@ namespace DB struct IdentifierSemanticImpl { bool special = false; + bool need_long_name = false; }; /// Static calss to manipulate IdentifierSemanticImpl via ASTIdentifier @@ -24,12 +25,13 @@ struct IdentifierSemantic static std::pair extractDatabaseAndTable(const ASTIdentifier & identifier); static size_t canReferColumnToTable(const ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); - static void setColumnShortName(ASTIdentifier & identifier, size_t match); - static void setColumnQualifiedName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static void setColumnNormalName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); + static void setNeedLongName(ASTIdentifier & identifier, bool); /// if set setColumnNormalName makes qualified name private: static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & database, const String & table); static bool doesIdentifierBelongTo(const ASTIdentifier & identifier, const String & table); + static void setColumnShortName(ASTIdentifier & identifier, size_t match); }; } diff --git a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp index 3154e3665c2..af556ebc01e 100644 --- a/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp +++ b/dbms/src/Interpreters/PredicateExpressionsOptimizer.cpp @@ -326,7 +326,9 @@ ASTs PredicateExpressionsOptimizer::getSelectQueryProjectionColumns(ASTPtr & ast std::unordered_map aliases; std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables}; + std::vector tables_with_columns; + TranslateQualifiedNamesVisitor::Data::setTablesOnly(tables, tables_with_columns); + TranslateQualifiedNamesVisitor::Data qn_visitor_data{{}, tables_with_columns}; TranslateQualifiedNamesVisitor(qn_visitor_data).visit(ast); QueryAliasesVisitor::Data query_aliases_data{aliases}; diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index 56529ae595c..1a0f5bb5ac8 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -25,8 +25,6 @@ namespace ErrorCodes extern const int CYCLIC_ALIASES; } -NameSet removeDuplicateColumns(NamesAndTypesList & columns); - class CheckASTDepth { @@ -143,7 +141,10 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) /// Replace *, alias.*, database.table.* with a list of columns. void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data) { - const auto & tables_with_columns = data.tables_with_columns; + if (!data.tables_with_columns) + return; + + const auto & tables_with_columns = *data.tables_with_columns; const auto & source_columns_set = data.source_columns_set; ASTs old_children; @@ -228,8 +229,6 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data) { - extractTablesWithColumns(select, data); - if (auto join = select.join()) extractJoinUsingColumns(join->table_join, data); @@ -253,7 +252,6 @@ void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & } /// Don't go into subqueries. -/// Don't go into components of compound identifiers. /// Don't go into select query. It processes children itself. /// Do not go to the left argument of lambda expressions, so as not to replace the formal parameters /// on aliases in expressions of the form 123 AS x, arrayMap(x -> 1, [2]). @@ -346,25 +344,6 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) } } -void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data) -{ - if (data.context && select_query.tables && !select_query.tables->children.empty()) - { - data.tables_with_columns.clear(); - String current_database = data.context->getCurrentDatabase(); - - for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query)) - { - DatabaseAndTableWithAlias table_name(*table_expression, current_database); - - NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context); - removeDuplicateColumns(names_and_types); - - data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); - } - } -} - /// 'select * from a join b using id' should result one 'id' column void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data) { diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 62aaa09bb34..517f9416959 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -46,8 +46,6 @@ class QueryNormalizer }; public: - using TableWithColumnNames = std::pair; - struct Data { using SetOfASTs = std::set; @@ -57,7 +55,7 @@ public: const ExtractedSettings settings; const Context * context; const NameSet * source_columns_set; - std::vector tables_with_columns; + const std::vector * tables_with_columns; std::unordered_set join_using_columns; /// tmp data @@ -67,25 +65,25 @@ public: std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases) Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_, - const NameSet & source_columns_set, Names && all_columns) + const NameSet & source_columns_set, const std::vector & tables_with_columns_) : aliases(aliases_) , settings(settings_) , context(&context_) , source_columns_set(&source_columns_set) + , tables_with_columns(&tables_with_columns_) , level(0) - { - tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns)); - } + {} Data(const Aliases & aliases_, ExtractedSettings && settings_) : aliases(aliases_) , settings(settings_) , context(nullptr) , source_columns_set(nullptr) + , tables_with_columns(nullptr) , level(0) {} - bool processAsterisks() const { return !tables_with_columns.empty(); } + bool processAsterisks() const { return tables_with_columns && !tables_with_columns->empty(); } }; QueryNormalizer(Data & data) @@ -110,7 +108,6 @@ private: static void visitChildren(const ASTPtr &, Data & data); - static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data); static void extractJoinUsingColumns(const ASTPtr ast, Data & data); }; diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index 90ed9b6065b..b4dc9a31211 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -79,16 +79,14 @@ void collectSourceColumns(ASTSelectQuery * select_query, StoragePtr storage, Nam } /// Translate qualified names such as db.table.column, table.column, table_alias.column to unqualified names. -void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context) +void translateQualifiedNames(ASTPtr & query, ASTSelectQuery * select_query, const NameSet & source_columns, + const std::vector & tables_with_columns) { if (!select_query->tables || select_query->tables->children.empty()) return; - std::vector tables = getDatabaseAndTables(*select_query, context.getCurrentDatabase()); - LogAST log; - TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables}; + TranslateQualifiedNamesVisitor::Data visitor_data{source_columns, tables_with_columns}; TranslateQualifiedNamesVisitor visitor(visitor_data, log.stream()); visitor.visit(query); } @@ -100,7 +98,8 @@ void normalizeTree( const Names & source_columns, const NameSet & source_columns_set, const Context & context, - const ASTSelectQuery * select_query) + const ASTSelectQuery * select_query, + std::vector & tables_with_columns) { const auto & settings = context.getSettingsRef(); @@ -116,10 +115,12 @@ void normalizeTree( if (all_columns_name.empty()) throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); - QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name)); + if (tables_with_columns.empty()) + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); + + QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, tables_with_columns); QueryNormalizer(normalizer_data).visit(query); } - bool hasArrayJoin(const ASTPtr & ast) { if (const ASTFunction * function = typeid_cast(&*ast)) @@ -446,7 +447,7 @@ void getArrayJoinedColumns(ASTPtr & query, SyntaxAnalyzerResult & result, const /// Parse JOIN ON expression and collect ASTs for joined columns. void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTSelectQuery * select_query, - const NameSet & source_columns, const Context & context) + const Context & context) { const auto & tables = static_cast(*select_query->tables); const auto * left_tables_element = static_cast(tables.children.at(0).get()); @@ -511,24 +512,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS return table_belonging; }; - std::function translate_qualified_names; - translate_qualified_names = [&](ASTPtr & ast, const DatabaseAndTableWithAlias & source_names, bool right_table) - { - if (IdentifierSemantic::getColumnName(ast)) - { - auto * identifier = typeid_cast(ast.get()); - - size_t match = IdentifierSemantic::canReferColumnToTable(*identifier, source_names); - IdentifierSemantic::setColumnShortName(*identifier, match); - - if (right_table && source_columns.count(ast->getColumnName())) - IdentifierSemantic::setColumnQualifiedName(*identifier, source_names); - } - - for (auto & child : ast->children) - translate_qualified_names(child, source_names, right_table); - }; - const auto supported_syntax = " Supported syntax: JOIN ON Expr([table.]column, ...) = Expr([table.]column, ...) " "[AND Expr([table.]column, ...) = Expr([table.]column, ...) ...]"; auto throwSyntaxException = [&](const String & msg) @@ -556,9 +539,6 @@ void collectJoinedColumnsFromJoinOnExpr(AnalyzedJoin & analyzed_join, const ASTS auto add_join_keys = [&](ASTPtr & ast_to_left_table, ASTPtr & ast_to_right_table) { - translate_qualified_names(ast_to_left_table, left_source_names, false); - translate_qualified_names(ast_to_right_table, right_source_names, true); - analyzed_join.key_asts_left.push_back(ast_to_left_table); analyzed_join.key_names_left.push_back(ast_to_left_table->getColumnName()); analyzed_join.key_asts_right.push_back(ast_to_right_table); @@ -624,7 +604,7 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery * s name = joined_table_name.getQualifiedNamePrefix() + name; } else if (table_join.on_expression) - collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, source_columns, context); + collectJoinedColumnsFromJoinOnExpr(analyzed_join, select_query, context); auto & settings = context.getSettingsRef(); bool make_nullable = settings.join_use_nulls && (table_join.kind == ASTTableJoin::Kind::Left || @@ -666,9 +646,12 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( if (source_columns_set.size() != source_columns_list.size()) throw Exception("Unexpected duplicates in source columns list.", ErrorCodes::LOGICAL_ERROR); + std::vector tables_with_columns; + if (select_query) { - translateQualifiedNames(query, select_query, source_columns_set, context); + tables_with_columns = getDatabaseAndTablesWithColumnNames(*select_query, context); + translateQualifiedNames(query, select_query, source_columns_set, tables_with_columns); /// Depending on the user's profile, check for the execution rights /// distributed subqueries inside the IN or JOIN sections and process these subqueries. @@ -687,7 +670,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Common subexpression elimination. Rewrite rules. normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set, - context, select_query); + context, select_query, tables_with_columns); /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 016d176caba..f6e5ebe956a 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -50,25 +50,24 @@ std::vector TranslateQualifiedNamesMatcher::visit(ASTPtr & ast, Data & return {}; } -std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr & ast, Data & data) +std::vector TranslateQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, Data & data) { if (IdentifierSemantic::getColumnName(identifier)) { size_t best_table_pos = 0; size_t best_match = 0; for (size_t i = 0; i < data.tables.size(); ++i) - if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i])) + if (size_t match = IdentifierSemantic::canReferColumnToTable(identifier, data.tables[i].first)) if (match > best_match) { best_match = match; best_table_pos = i; } - IdentifierSemantic::setColumnShortName(identifier, best_match); - /// In case if column from the joined table are in source columns, change it's name to qualified. - if (best_table_pos && data.source_columns.count(ast->getColumnName())) - IdentifierSemantic::setColumnQualifiedName(identifier, data.tables[best_table_pos]); + if (best_table_pos && data.source_columns.count(identifier.shortName())) + IdentifierSemantic::setNeedLongName(identifier, true); + IdentifierSemantic::setColumnNormalName(identifier, data.tables[best_table_pos].first); } return {}; @@ -85,7 +84,7 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs DatabaseAndTableWithAlias db_and_table(ident); for (const auto & known_table : data.tables) - if (db_and_table.satisfies(known_table, true)) + if (db_and_table.satisfies(known_table.first, true)) return {}; throw Exception("Unknown qualified identifier: " + ident->getAliasOrColumnName(), ErrorCodes::UNKNOWN_IDENTIFIER); @@ -93,10 +92,11 @@ std::vector TranslateQualifiedNamesMatcher::visit(const ASTQualifiedAs std::vector TranslateQualifiedNamesMatcher::visit(ASTTableJoin & join, const ASTPtr & , Data &) { - /// Don't translate on_expression here in order to resolve equation parts later. std::vector out; if (join.using_expression_list) out.push_back(&join.using_expression_list); + else if (join.on_expression) + out.push_back(&join.on_expression); return out; } diff --git a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h index 48d41213cb8..bee5e7022f4 100644 --- a/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/dbms/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -21,7 +21,16 @@ public: struct Data { const NameSet & source_columns; - const std::vector & tables; + const std::vector & tables; + + static void setTablesOnly(const std::vector & tables, + std::vector & tables_with_columns) + { + tables_with_columns.clear(); + tables_with_columns.reserve(tables.size()); + for (const auto & table : tables) + tables_with_columns.emplace_back(TableWithColumnNames{table, {}}); + } }; static constexpr const char * label = "TranslateQualifiedNames"; diff --git a/dbms/src/Parsers/ASTIdentifier.h b/dbms/src/Parsers/ASTIdentifier.h index 5c287eb9da4..6bf7eac5260 100644 --- a/dbms/src/Parsers/ASTIdentifier.h +++ b/dbms/src/Parsers/ASTIdentifier.h @@ -42,6 +42,13 @@ public: name_parts.clear(); } + const String & shortName() const + { + if (!name_parts.empty()) + return name_parts.back(); + return name; + } + protected: void formatImplWithoutAlias(const FormatSettings & settings, FormatState & state, FormatStateStacked frame) const override; void appendColumnNameImpl(WriteBuffer & ostr) const override;