diff --git a/dbms/src/Interpreters/AnalyzedJoin.cpp b/dbms/src/Interpreters/AnalyzedJoin.cpp index 772fcad3eaf..5e4bf1fe53b 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.cpp +++ b/dbms/src/Interpreters/AnalyzedJoin.cpp @@ -98,14 +98,6 @@ NameSet AnalyzedJoin::getQualifiedColumnsSet() const return out; } -NameSet AnalyzedJoin::getOriginalColumnsSet() const -{ - NameSet out; - for (const auto & names : original_names) - out.insert(names.second); - return out; -} - NamesWithAliases AnalyzedJoin::getNamesWithAliases(const NameSet & required_columns) const { NamesWithAliases out; diff --git a/dbms/src/Interpreters/AnalyzedJoin.h b/dbms/src/Interpreters/AnalyzedJoin.h index 9503e2c28e9..677662d949c 100644 --- a/dbms/src/Interpreters/AnalyzedJoin.h +++ b/dbms/src/Interpreters/AnalyzedJoin.h @@ -96,7 +96,6 @@ public: bool hasOn() const { return table_join.on_expression != nullptr; } NameSet getQualifiedColumnsSet() const; - NameSet getOriginalColumnsSet() const; NamesWithAliases getNamesWithAliases(const NameSet & required_columns) const; NamesWithAliases getRequiredColumns(const Block & sample, const Names & action_columns) const; diff --git a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp index 894e1ea3a5a..04484aebe0b 100644 --- a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp +++ b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.cpp @@ -170,11 +170,22 @@ size_t CollectJoinOnKeysMatcher::getTableForIdentifiers(std::vectorname; - bool in_left_table = data.source_columns.count(name); - bool in_right_table = data.joined_columns.count(name); + bool in_left_table = data.left_table.hasColumn(name); + bool in_right_table = data.right_table.hasColumn(name); if (in_left_table && in_right_table) - throw Exception("Column '" + name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + { + /// Relax ambiguous check for multiple JOINs + if (auto original_name = IdentifierSemantic::uncover(*identifier)) + { + auto match = IdentifierSemantic::canReferColumnToTable(*original_name, data.right_table.table); + if (match == IdentifierSemantic::ColumnMatch::NoMatch) + in_right_table = false; + in_left_table = !in_right_table; + } + else + throw Exception("Column '" + name + "' is ambiguous", ErrorCodes::AMBIGUOUS_COLUMN_NAME); + } if (in_left_table) membership = 1; diff --git a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h index 0b4cb1fe857..68109e460e5 100644 --- a/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h +++ b/dbms/src/Interpreters/CollectJoinOnKeysVisitor.h @@ -3,6 +3,7 @@ #include #include #include +#include #include @@ -25,8 +26,8 @@ public: struct Data { AnalyzedJoin & analyzed_join; - const NameSet & source_columns; - const NameSet & joined_columns; + const TableWithColumnNames & left_table; + const TableWithColumnNames & right_table; const Aliases & aliases; const bool is_asof{false}; ASTPtr asof_left_key{}; diff --git a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h index 82f0f1fa966..3567a351b14 100644 --- a/dbms/src/Interpreters/DatabaseAndTableWithAlias.h +++ b/dbms/src/Interpreters/DatabaseAndTableWithAlias.h @@ -53,6 +53,20 @@ struct TableWithColumnNames for (auto & column : addition) hidden_columns.push_back(column.name); } + + bool hasColumn(const String & name) const + { + if (columns_set.empty()) + { + columns_set.insert(columns.begin(), columns.end()); + columns_set.insert(hidden_columns.begin(), hidden_columns.end()); + } + + return columns_set.count(name); + } + +private: + mutable NameSet columns_set; }; std::vector getDatabaseAndTables(const ASTSelectQuery & select_query, const String & current_database); diff --git a/dbms/src/Interpreters/IdentifierSemantic.cpp b/dbms/src/Interpreters/IdentifierSemantic.cpp index 959a750c637..17349ab81cd 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.cpp +++ b/dbms/src/Interpreters/IdentifierSemantic.cpp @@ -92,6 +92,22 @@ std::optional IdentifierSemantic::getTableName(const ASTPtr & ast) return {}; } +std::optional IdentifierSemantic::uncover(const ASTIdentifier & identifier) +{ + if (identifier.semantic->covered) + { + std::vector name_parts = identifier.name_parts; + return ASTIdentifier(std::move(name_parts)); + } + return {}; +} + +void IdentifierSemantic::coverName(ASTIdentifier & identifier, const String & alias) +{ + identifier.setShortName(alias); + identifier.semantic->covered = true; +} + bool IdentifierSemantic::canBeAlias(const ASTIdentifier & identifier) { return identifier.semantic->can_be_alias; diff --git a/dbms/src/Interpreters/IdentifierSemantic.h b/dbms/src/Interpreters/IdentifierSemantic.h index dc5859dc05e..e3b69abc61e 100644 --- a/dbms/src/Interpreters/IdentifierSemantic.h +++ b/dbms/src/Interpreters/IdentifierSemantic.h @@ -12,6 +12,7 @@ struct IdentifierSemanticImpl { bool special = false; /// for now it's 'not a column': tables, subselects and some special stuff like FORMAT bool can_be_alias = true; /// if it's a cropped name it could not be an alias + bool covered = false; /// real (compound) name is hidden by an alias (short name) std::optional membership; /// table position in join }; @@ -43,6 +44,8 @@ struct IdentifierSemantic static void setColumnLongName(ASTIdentifier & identifier, const DatabaseAndTableWithAlias & db_and_table); static bool canBeAlias(const ASTIdentifier & identifier); static void setMembership(ASTIdentifier &, size_t table_no); + static void coverName(ASTIdentifier &, const String & alias); + static std::optional uncover(const ASTIdentifier & identifier); static std::optional getMembership(const ASTIdentifier & identifier); static bool chooseTable(const ASTIdentifier &, const std::vector & tables, size_t & best_table_pos, bool ambiguous = false); diff --git a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index 7e8174b7955..60d8651f453 100644 --- a/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/dbms/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -159,7 +159,7 @@ struct ColumnAliasesMatcher aliases[alias] = long_name; rev_aliases[long_name].push_back(alias); - identifier->setShortName(alias); + IdentifierSemantic::coverName(*identifier, alias); if (is_public) { identifier->setAlias(long_name); @@ -177,7 +177,7 @@ struct ColumnAliasesMatcher if (is_public && allowed_long_names.count(long_name)) ; /// leave original name unchanged for correct output else - identifier->setShortName(it->second[0]); + IdentifierSemantic::coverName(*identifier, it->second[0]); } } } @@ -229,7 +229,7 @@ struct ColumnAliasesMatcher if (!last_table) { - node.setShortName(alias); + IdentifierSemantic::coverName(node, alias); node.setAlias(""); } } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index aa244909109..a26d8b8253a 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -532,8 +532,8 @@ void setJoinStrictness(ASTSelectQuery & select_query, JoinStrictness join_defaul } /// Find the columns that are obtained by JOIN. -void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery & select_query, const NameSet & source_columns, - const Aliases & aliases) +void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery & select_query, + const std::vector & tables, const Aliases & aliases) { const ASTTablesInSelectQueryElement * node = select_query.join(); if (!node) @@ -551,7 +551,7 @@ void collectJoinedColumns(AnalyzedJoin & analyzed_join, const ASTSelectQuery & s { bool is_asof = (table_join.strictness == ASTTableJoin::Strictness::Asof); - CollectJoinOnKeysVisitor::Data data{analyzed_join, source_columns, analyzed_join.getOriginalColumnsSet(), aliases, is_asof}; + CollectJoinOnKeysVisitor::Data data{analyzed_join, tables[0], tables[1], aliases, is_asof}; CollectJoinOnKeysVisitor(data).visit(table_join.on_expression); if (!data.has_some) throw Exception("Cannot get JOIN keys from JOIN ON section: " + queryToString(table_join.on_expression), @@ -820,6 +820,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( if (storage) collectSourceColumns(storage->getColumns(), result.source_columns, (select_query != nullptr)); NameSet source_columns_set = removeDuplicateColumns(result.source_columns); + std::vector tables_with_columns; if (select_query) { @@ -837,7 +838,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( } std::vector table_expressions = getTableExpressions(*select_query); - auto tables_with_columns = getTablesWithColumns(table_expressions, context); + tables_with_columns = getTablesWithColumns(table_expressions, context); if (tables_with_columns.empty()) { @@ -935,7 +936,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( setJoinStrictness(*select_query, settings.join_default_strictness, settings.any_join_distinct_right_table_keys, result.analyzed_join->table_join); - collectJoinedColumns(*result.analyzed_join, *select_query, source_columns_set, result.aliases); + collectJoinedColumns(*result.analyzed_join, *select_query, tables_with_columns, result.aliases); } result.aggregates = getAggregates(query); diff --git a/dbms/tests/queries/0_stateless/01051_same_name_alias_with_joins.reference b/dbms/tests/queries/0_stateless/01051_same_name_alias_with_joins.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/dbms/tests/queries/0_stateless/01051_same_name_alias_with_joins.sql b/dbms/tests/queries/0_stateless/01051_same_name_alias_with_joins.sql new file mode 100644 index 00000000000..f42eea468fa --- /dev/null +++ b/dbms/tests/queries/0_stateless/01051_same_name_alias_with_joins.sql @@ -0,0 +1,29 @@ +DROP TABLE IF EXISTS a; +DROP TABLE IF EXISTS b; +DROP TABLE IF EXISTS c; + +CREATE TABLE a (x UInt64) ENGINE = Memory; +CREATE TABLE b (x UInt64) ENGINE = Memory; +CREATE TABLE c (x UInt64) ENGINE = Memory; + +SET enable_optimize_predicate_expression = 0; + +SELECT a.x AS x FROM a +LEFT JOIN b ON a.x = b.x +LEFT JOIN c ON a.x = c.x; + +SELECT a.x AS x FROM a +LEFT JOIN b ON a.x = b.x +LEFT JOIN c ON b.x = c.x; + +SELECT b.x AS x FROM a +LEFT JOIN b ON a.x = b.x +LEFT JOIN c ON b.x = c.x; + +SELECT c.x AS x FROM a +LEFT JOIN b ON a.x = b.x +LEFT JOIN c ON b.x = c.x; + +DROP TABLE a; +DROP TABLE b; +DROP TABLE c;