diff --git a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp index bac82d967f2..bf2d1eb79cd 100644 --- a/src/Interpreters/JoinToSubqueryTransformVisitor.cpp +++ b/src/Interpreters/JoinToSubqueryTransformVisitor.cpp @@ -37,10 +37,13 @@ namespace /// @note we use `--` prefix for unique short names and `--.` for subqueries. /// It expects that user do not use names starting with `--` and column names starting with dot. -ASTPtr makeSubqueryTemplate() +ASTPtr makeSubqueryTemplate(const String & table_alias) { ParserTablesInSelectQueryElement parser(true); - ASTPtr subquery_template = parseQuery(parser, "(select * from _t) as `--.s`", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); + String query_template = "(select * from _t)"; + if (!table_alias.empty()) + query_template += " as " + table_alias; + ASTPtr subquery_template = parseQuery(parser, query_template, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH); if (!subquery_template) throw Exception(ErrorCodes::LOGICAL_ERROR, "Cannot parse subquery template"); return subquery_template; @@ -232,8 +235,7 @@ struct RewriteTablesVisitorData { using TypeToVisit = ASTTablesInSelectQuery; - ASTPtr left; - ASTPtr right; + ASTs new_tables; bool done = false; /// @note Do not change ASTTablesInSelectQuery itself. No need to change select.tables. @@ -241,7 +243,6 @@ struct RewriteTablesVisitorData { if (done) return; - ASTs new_tables{left, right}; ast->children.swap(new_tables); done = true; } @@ -483,7 +484,11 @@ struct TableNeededColumns /// t.x as `some` static void addAliasedName(const String & table, const String & column, const String & alias, ASTExpressionList & expression_list) { - auto ident = std::make_shared(std::vector{table, column}); + std::vector name_parts; + if (!table.empty()) + name_parts.push_back(table); + name_parts.push_back(column); + auto ident = std::make_shared(std::move(name_parts)); ident->setAlias(alias); expression_list.children.emplace_back(std::move(ident)); } @@ -494,7 +499,8 @@ class UniqueShortNames public: /// We know that long names are unique (do not clashes with others). /// So we could make unique names base on this knolage by adding some unused prefix. - static constexpr const char * pattern = "--"; + /// Add a heading underscore to make unique names valid for `isValidIdentifierBegin` + static constexpr const char * pattern = "_--"; String longToShort(const String & long_name) { @@ -558,8 +564,6 @@ std::vector normalizeColumnNamesExtractNeeded( const std::unordered_set & public_identifiers, UniqueShortNames & unique_names) { - size_t last_table_pos = tables.size() - 1; - std::vector needed_columns; needed_columns.reserve(tables.size()); for (const auto & table : tables) @@ -602,12 +606,9 @@ std::vector normalizeColumnNamesExtractNeeded( const auto & unique_long_name = ident->name(); /// For tables moved into subselects we need unique short names for clashed names - if (*table_pos != last_table_pos) - { - String unique_short_name = unique_names.longToShort(unique_long_name); - ident->setShortName(unique_short_name); - needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name); - } + String unique_short_name = unique_names.longToShort(unique_long_name); + ident->setShortName(unique_short_name); + needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name); } else { @@ -625,9 +626,47 @@ std::vector normalizeColumnNamesExtractNeeded( restoreName(*ident, original_long_name, restored_names); } else if (got_alias) - needed_columns[*table_pos].alias_clashes.emplace(ident->shortName()); + { + String short_name = ident->shortName(); + if (!isValidIdentifierBegin(short_name.at(0))) + { + String original_long_name; + if (public_identifiers.contains(ident)) + original_long_name = ident->name(); + + const auto & table = tables[*table_pos]; + IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column + const auto & unique_long_name = ident->name(); + + String unique_short_name = unique_names.longToShort(unique_long_name); + ident->setShortName(unique_short_name); + needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name); + restoreName(*ident, original_long_name, restored_names); + } + else + needed_columns[*table_pos].alias_clashes.emplace(ident->shortName()); + } else - needed_columns[*table_pos].no_clashes.emplace(ident->shortName()); + { + String short_name = ident->shortName(); + if (!isValidIdentifierBegin(short_name.at(0))) + { + String original_long_name; + if (public_identifiers.contains(ident)) + original_long_name = ident->name(); + + const auto & table = tables[*table_pos]; + IdentifierSemantic::setColumnLongName(*ident, table.table); /// table.column -> table_alias.column + const auto & unique_long_name = ident->name(); + + String unique_short_name = unique_names.longToShort(unique_long_name); + ident->setShortName(unique_short_name); + needed_columns[*table_pos].column_clashes.emplace(short_name, unique_short_name); + restoreName(*ident, original_long_name, restored_names); + } + else + needed_columns[*table_pos].no_clashes.emplace(ident->shortName()); + } } } @@ -785,7 +824,7 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast ASTPtr left_table = src_tables[0]; - static ASTPtr subquery_template = makeSubqueryTemplate(); + static ASTPtr subquery_template = makeSubqueryTemplate("`--.s`"); for (size_t i = 1; i < src_tables.size() - 1; ++i) { @@ -798,7 +837,36 @@ void JoinToSubqueryTransformMatcher::visit(ASTSelectQuery & select, ASTPtr & ast left_table = replaceJoin(left_table, src_tables[i], subquery); } - RewriteVisitor::Data visitor_data{left_table, src_tables.back()}; + // expand the last table into a subselect, to resolve alias clashes inside it + static ASTPtr last_select_template = makeSubqueryTemplate("`--.t`"); + auto last_select = last_select_template->clone(); + { + auto expression_list = std::make_shared(); + needed_columns[src_tables.size() - 1].fillExpressionList(*expression_list); + + SubqueryExpressionsRewriteVisitor::Data expr_rewrite_data{std::move(expression_list)}; + SubqueryExpressionsRewriteVisitor(expr_rewrite_data).visit(last_select); + + // move ASTTableJoin out of subquery + auto * last_table_elem = src_tables.back()->as(); + auto * last_select_elem = last_select->as(); + if (!last_table_elem || !last_select_elem) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Two TablesInSelectQueryElements expected"); + + if (!last_table_elem->table_join) + throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected"); + + last_select_elem->table_join = std::move(last_table_elem->table_join); + last_select_elem->children.emplace_back(last_select_elem->table_join); + last_table_elem->children.erase( + std::remove(last_table_elem->children.begin(), last_table_elem->children.end(), last_select_elem->table_join), + last_table_elem->children.end()); + + RewriteVisitor::Data visitor_data{{src_tables.back()}}; + RewriteVisitor(visitor_data).visit(last_select); + } + + RewriteVisitor::Data visitor_data{{left_table, last_select}}; RewriteVisitor(visitor_data).visit(select.refTables()); data.done = true; @@ -815,7 +883,7 @@ ASTPtr JoinToSubqueryTransformMatcher::replaceJoin(ASTPtr ast_left, ASTPtr ast_r throw Exception(ErrorCodes::LOGICAL_ERROR, "Table join expected"); /// replace '_t' with pair of joined tables - RewriteVisitor::Data visitor_data{ast_left, ast_right}; + RewriteVisitor::Data visitor_data{{ast_left, ast_right}}; RewriteVisitor(visitor_data).visit(subquery_template); return subquery_template; } diff --git a/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference new file mode 100644 index 00000000000..76294a66748 --- /dev/null +++ b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.reference @@ -0,0 +1,48 @@ +-- { echo } + +-- no clash name +SELECT + c + 1, + Z.c + 1 +FROM + (SELECT 10 a) X +CROSS JOIN + (SELECT 20 b) Y +CROSS JOIN + (SELECT 30 c) Z; +31 31 +-- alias clash +SELECT + (a + 1) AS c, + Z.c + 1 +FROM + (SELECT 10 a) X +CROSS JOIN + (SELECT 20 b) Y +CROSS JOIN + (SELECT 30 c) Z; +11 31 +-- column clash +SELECT + (X.c + 1) AS c, + Z.c + 1 +FROM + (SELECT 10 c) X +CROSS JOIN + (SELECT 20 b) Y +CROSS JOIN + (SELECT 30 c) Z; +11 31 +SELECT + (X.a + 1) AS a, + (Y.a + 1) AS Y_a, + (Z.a + 1) AS Z_a, + (Y.b + 1) AS b, + (Z.b + 1) AS Z_b +FROM + (SELECT 10 a) X +CROSS JOIN + (SELECT 20 a, 21 as b) Y +CROSS JOIN + (SELECT 30 a, 31 as b, 32 as c) Z; +11 21 31 22 32 diff --git a/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql new file mode 100644 index 00000000000..15a1d36c082 --- /dev/null +++ b/tests/queries/0_stateless/02871_multiple_joins_rewriter_v2_handle_last_table_columns.sql @@ -0,0 +1,47 @@ +-- { echo } + +-- no clash name +SELECT + c + 1, + Z.c + 1 +FROM + (SELECT 10 a) X +CROSS JOIN + (SELECT 20 b) Y +CROSS JOIN + (SELECT 30 c) Z; + +-- alias clash +SELECT + (a + 1) AS c, + Z.c + 1 +FROM + (SELECT 10 a) X +CROSS JOIN + (SELECT 20 b) Y +CROSS JOIN + (SELECT 30 c) Z; + +-- column clash +SELECT + (X.c + 1) AS c, + Z.c + 1 +FROM + (SELECT 10 c) X +CROSS JOIN + (SELECT 20 b) Y +CROSS JOIN + (SELECT 30 c) Z; + +SELECT + (X.a + 1) AS a, + (Y.a + 1) AS Y_a, + (Z.a + 1) AS Z_a, + (Y.b + 1) AS b, + (Z.b + 1) AS Z_b +FROM + (SELECT 10 a) X +CROSS JOIN + (SELECT 20 a, 21 as b) Y +CROSS JOIN + (SELECT 30 a, 31 as b, 32 as c) Z;