diff --git a/dbms/src/Interpreters/QueryNormalizer.cpp b/dbms/src/Interpreters/QueryNormalizer.cpp index b8f11ddbf79..a5151be7c64 100644 --- a/dbms/src/Interpreters/QueryNormalizer.cpp +++ b/dbms/src/Interpreters/QueryNormalizer.cpp @@ -1,13 +1,16 @@ +#include #include #include +#include +#include #include #include #include +#include #include #include #include #include -#include #include #include @@ -16,10 +19,14 @@ namespace DB namespace ErrorCodes { + extern const int LOGICAL_ERROR; extern const int TOO_DEEP_AST; extern const int CYCLIC_ALIASES; } +NameSet removeDuplicateColumns(NamesAndTypesList & columns); + + class CheckASTDepth { public: @@ -135,7 +142,8 @@ void QueryNormalizer::visit(ASTIdentifier & node, ASTPtr & ast, Data & data) /// Replace *, alias.*, database.table.* with a list of columns. void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & data) { - auto & tables_with_columns = data.tables_with_columns; + const auto & tables_with_columns = data.tables_with_columns; + const auto & source_columns_set = data.source_columns_set; ASTs old_children; if (data.processAsterisks()) @@ -162,22 +170,43 @@ void QueryNormalizer::visit(ASTExpressionList & node, const ASTPtr &, Data & dat { if (typeid_cast(child.get())) { - for (const auto & pr : tables_with_columns) - for (const auto & column_name : pr.second) - node.children.emplace_back(std::make_shared(column_name)); + bool first_table = true; + for (const auto & [table_name, table_columns] : tables_with_columns) + { + for (const auto & column_name : table_columns) + if (first_table || !data.join_using_columns.count(column_name)) + { + /// qualifed names for duplicates + if (!first_table && source_columns_set && source_columns_set->count(column_name)) + node.children.emplace_back(std::make_shared(table_name.getQualifiedNamePrefix() + column_name)); + else + node.children.emplace_back(std::make_shared(column_name)); + } + + first_table = false; + } } else if (const auto * qualified_asterisk = typeid_cast(child.get())) { DatabaseAndTableWithAlias ident_db_and_name(qualified_asterisk->children[0]); + bool first_table = true; for (const auto & [table_name, table_columns] : tables_with_columns) { if (ident_db_and_name.satisfies(table_name, true)) { for (const auto & column_name : table_columns) - node.children.emplace_back(std::make_shared(column_name)); + { + /// qualifed names for duplicates + if (!first_table && source_columns_set && source_columns_set->count(column_name)) + node.children.emplace_back(std::make_shared(table_name.getQualifiedNamePrefix() + column_name)); + else + node.children.emplace_back(std::make_shared(column_name)); + } break; } + + first_table = false; } } else @@ -198,6 +227,11 @@ void QueryNormalizer::visit(ASTTablesInSelectQueryElement & node, const ASTPtr & /// special visitChildren() for ASTSelectQuery void QueryNormalizer::visit(ASTSelectQuery & select, const ASTPtr & ast, Data & data) { + extractTablesWithColumns(select, data); + + if (auto join = select.join()) + extractJoinUsingColumns(join->table_join, data); + for (auto & child : ast->children) { if (typeid_cast(child.get()) || @@ -312,4 +346,46 @@ void QueryNormalizer::visit(ASTPtr & ast, Data & data) } } +void QueryNormalizer::extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data) +{ + if (data.context && select_query.tables && !select_query.tables->children.empty()) + { + data.tables_with_columns.clear(); + String current_database = data.context->getCurrentDatabase(); + + for (const ASTTableExpression * table_expression : getSelectTablesExpression(select_query)) + { + DatabaseAndTableWithAlias table_name(*table_expression, current_database); + + NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, *data.context); + removeDuplicateColumns(names_and_types); + + data.tables_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); + } + } +} + +/// 'select * from a join b using id' should result one 'id' column +void QueryNormalizer::extractJoinUsingColumns(const ASTPtr ast, Data & data) +{ + const auto & table_join = typeid_cast(*ast); + + if (table_join.using_expression_list) + { + auto & keys = typeid_cast(*table_join.using_expression_list); + for (const auto & key : keys.children) + if (auto opt_column = getIdentifierName(key)) + data.join_using_columns.insert(*opt_column); + else if (typeid_cast(key.get())) + data.join_using_columns.insert(key->getColumnName()); + else + { + String alias = key->tryGetAlias(); + if (alias.empty()) + throw Exception("Logical error: expected identifier or alias, got: " + key->getID(), ErrorCodes::LOGICAL_ERROR); + data.join_using_columns.insert(alias); + } + } +} + } diff --git a/dbms/src/Interpreters/QueryNormalizer.h b/dbms/src/Interpreters/QueryNormalizer.h index 57f4645569a..671ddf1b106 100644 --- a/dbms/src/Interpreters/QueryNormalizer.h +++ b/dbms/src/Interpreters/QueryNormalizer.h @@ -1,5 +1,7 @@ #pragma once +#include + #include #include #include @@ -18,11 +20,11 @@ inline bool functionIsInOrGlobalInOperator(const String & name) return functionIsInOperator(name) || name == "globalIn" || name == "globalNotIn"; } - class ASTFunction; class ASTIdentifier; class ASTExpressionList; struct ASTTablesInSelectQueryElement; +class Context; class QueryNormalizer @@ -53,7 +55,10 @@ public: const Aliases & aliases; const ExtractedSettings settings; - const std::vector tables_with_columns; + const Context * context; + const NameSet * source_columns_set; + std::vector tables_with_columns; + std::unordered_set join_using_columns; /// tmp data size_t level; @@ -61,10 +66,22 @@ public: SetOfASTs current_asts; /// vertices in the current call stack of this method std::string current_alias; /// the alias referencing to the ancestor of ast (the deepest ancestor with aliases) - Data(const Aliases & aliases_, ExtractedSettings && settings_, std::vector && tables_with_columns_ = {}) + Data(const Aliases & aliases_, ExtractedSettings && settings_, const Context & context_, + const NameSet & source_columns_set, Names && all_columns) : aliases(aliases_) , settings(settings_) - , tables_with_columns(tables_with_columns_) + , context(&context_) + , source_columns_set(&source_columns_set) + , level(0) + { + tables_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns)); + } + + Data(const Aliases & aliases_, ExtractedSettings && settings_) + : aliases(aliases_) + , settings(settings_) + , context(nullptr) + , source_columns_set(nullptr) , level(0) {} @@ -92,6 +109,9 @@ private: static void visit(ASTSelectQuery &, const ASTPtr &, Data &); static void visitChildren(const ASTPtr &, Data & data); + + static void extractTablesWithColumns(const ASTSelectQuery & select_query, Data & data); + static void extractJoinUsingColumns(const ASTPtr ast, Data & data); }; } diff --git a/dbms/src/Interpreters/SyntaxAnalyzer.cpp b/dbms/src/Interpreters/SyntaxAnalyzer.cpp index e09f1b3a95b..fb6a34d37b9 100644 --- a/dbms/src/Interpreters/SyntaxAnalyzer.cpp +++ b/dbms/src/Interpreters/SyntaxAnalyzer.cpp @@ -100,12 +100,13 @@ void normalizeTree( const Names & source_columns, const NameSet & source_columns_set, const Context & context, - const ASTSelectQuery * select_query, - bool asterisk_left_columns_only) + const ASTSelectQuery * select_query) { + const auto & settings = context.getSettingsRef(); + Names all_columns_name = source_columns; - if (!asterisk_left_columns_only) + if (!settings.asterisk_left_columns_only) { auto columns_from_joined_table = result.analyzed_join.getColumnsFromJoinedTable(source_columns_set, context, select_query); for (auto & column : columns_from_joined_table) @@ -115,37 +116,7 @@ void normalizeTree( if (all_columns_name.empty()) throw Exception("An asterisk cannot be replaced with empty columns.", ErrorCodes::LOGICAL_ERROR); - std::vector table_with_columns; - if (select_query && select_query->tables && !select_query->tables->children.empty()) - { - std::vector tables_expression = getSelectTablesExpression(*select_query); - - bool first = true; - String current_database = context.getCurrentDatabase(); - for (const auto * table_expression : tables_expression) - { - DatabaseAndTableWithAlias table_name(*table_expression, current_database); - NamesAndTypesList names_and_types = getNamesAndTypeListFromTableExpression(*table_expression, context); - - removeDuplicateColumns(names_and_types); - - if (!first) - { - /// For joined tables qualify duplicating names. - for (auto & name_and_type : names_and_types) - if (source_columns_set.count(name_and_type.name)) - name_and_type.name = table_name.getQualifiedNamePrefix() + name_and_type.name; - } - - first = false; - - table_with_columns.emplace_back(std::move(table_name), names_and_types.getNames()); - } - } - else - table_with_columns.emplace_back(DatabaseAndTableWithAlias{}, std::move(all_columns_name)); - - QueryNormalizer::Data normalizer_data(result.aliases, context.getSettingsRef(), std::move(table_with_columns)); + QueryNormalizer::Data normalizer_data(result.aliases, settings, context, source_columns_set, std::move(all_columns_name)); QueryNormalizer(normalizer_data).visit(query); } @@ -754,7 +725,7 @@ SyntaxAnalyzerResultPtr SyntaxAnalyzer::analyze( /// Common subexpression elimination. Rewrite rules. normalizeTree(query, result, (storage ? storage->getColumns().ordinary.getNames() : source_columns_list), source_columns_set, - context, select_query, settings.asterisk_left_columns_only != 0); + context, select_query); /// Remove unneeded columns according to 'required_result_columns'. /// Leave all selected columns in case of DISTINCT; columns that contain arrayJoin function inside. diff --git a/dbms/tests/queries/0_stateless/00050_any_left_join.reference b/dbms/tests/queries/0_stateless/00050_any_left_join.reference index dc46f7e8a75..98dc95d69e1 100644 --- a/dbms/tests/queries/0_stateless/00050_any_left_join.reference +++ b/dbms/tests/queries/0_stateless/00050_any_left_join.reference @@ -1,10 +1,10 @@ 0 0 0 -1 1 0 +1 0 0 2 2 1 -3 3 0 +3 0 0 4 4 2 -5 5 0 +5 0 0 6 6 3 -7 7 0 +7 0 0 8 8 4 -9 9 0 +9 0 0 diff --git a/dbms/tests/queries/0_stateless/00050_any_left_join.sql b/dbms/tests/queries/0_stateless/00050_any_left_join.sql index 2440a9f93ab..61ccb200307 100644 --- a/dbms/tests/queries/0_stateless/00050_any_left_join.sql +++ b/dbms/tests/queries/0_stateless/00050_any_left_join.sql @@ -1,9 +1,9 @@ -SELECT * FROM +SELECT a.*, b.* FROM ( SELECT number AS k FROM system.numbers LIMIT 10 -) +) AS a ANY LEFT JOIN ( SELECT number * 2 AS k, number AS joined FROM system.numbers LIMIT 10 -) +) AS b USING k; diff --git a/dbms/tests/queries/0_stateless/00051_any_inner_join.sql b/dbms/tests/queries/0_stateless/00051_any_inner_join.sql index a3ff3c437cf..986c798d763 100644 --- a/dbms/tests/queries/0_stateless/00051_any_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00051_any_inner_join.sql @@ -1,9 +1,9 @@ -SELECT * FROM +SELECT a.*, b.* FROM ( SELECT number AS k FROM system.numbers LIMIT 10 -) +) AS a ANY INNER JOIN ( SELECT number * 2 AS k, number AS joined FROM system.numbers LIMIT 10 -) +) AS b USING k; diff --git a/dbms/tests/queries/0_stateless/00052_all_left_join.reference b/dbms/tests/queries/0_stateless/00052_all_left_join.reference index 7d48d304488..a33465fcd1d 100644 --- a/dbms/tests/queries/0_stateless/00052_all_left_join.reference +++ b/dbms/tests/queries/0_stateless/00052_all_left_join.reference @@ -1,15 +1,15 @@ -0 0 0 -0 0 1 -1 1 2 -1 1 3 -2 2 4 -2 2 5 -3 3 6 -3 3 7 -4 4 8 -4 4 9 -5 5 0 -6 6 0 -7 7 0 -8 8 0 -9 9 0 +0 0 +0 1 +1 2 +1 3 +2 4 +2 5 +3 6 +3 7 +4 8 +4 9 +5 0 +6 0 +7 0 +8 0 +9 0 diff --git a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference index 24857668974..15bed0fbe0c 100644 --- a/dbms/tests/queries/0_stateless/00053_all_inner_join.reference +++ b/dbms/tests/queries/0_stateless/00053_all_inner_join.reference @@ -1,10 +1,10 @@ 0 0 0 -0 0 1 -1 1 2 -1 1 3 -2 2 4 -2 2 5 -3 3 6 -3 3 7 -4 4 8 -4 4 9 +0 1 1 +1 2 2 +1 3 3 +2 4 4 +2 0 5 +3 0 6 +3 0 7 +4 0 8 +4 0 9 diff --git a/dbms/tests/queries/0_stateless/00053_all_inner_join.sql b/dbms/tests/queries/0_stateless/00053_all_inner_join.sql index 18a94588033..b2e65c80574 100644 --- a/dbms/tests/queries/0_stateless/00053_all_inner_join.sql +++ b/dbms/tests/queries/0_stateless/00053_all_inner_join.sql @@ -1,9 +1,9 @@ -SELECT * FROM +SELECT a.*, b.* FROM ( SELECT number AS k FROM system.numbers LIMIT 10 -) +) AS a ALL INNER JOIN ( SELECT intDiv(number, 2) AS k, number AS joined FROM system.numbers LIMIT 10 -) +) AS b USING k; diff --git a/dbms/tests/queries/0_stateless/00054_join_string.reference b/dbms/tests/queries/0_stateless/00054_join_string.reference index 4c35b240b32..75a0a5bc5fc 100644 --- a/dbms/tests/queries/0_stateless/00054_join_string.reference +++ b/dbms/tests/queries/0_stateless/00054_join_string.reference @@ -1,15 +1,15 @@ -A A 0 -A A 1 -B B 2 -B B 3 -C C 4 -C C 5 -D D 6 -D D 7 -E E 8 -E E 9 -F F 0 -G G 0 -H H 0 -I I 0 -J J 0 +A 0 +A 1 +B 2 +B 3 +C 4 +C 5 +D 6 +D 7 +E 8 +E 9 +F 0 +G 0 +H 0 +I 0 +J 0 diff --git a/dbms/tests/queries/0_stateless/00555_right_join_excessive_rows.sql b/dbms/tests/queries/0_stateless/00555_right_join_excessive_rows.sql index 668ad5e1bc9..1ec82d5f1b4 100644 --- a/dbms/tests/queries/0_stateless/00555_right_join_excessive_rows.sql +++ b/dbms/tests/queries/0_stateless/00555_right_join_excessive_rows.sql @@ -1,2 +1,2 @@ SET max_block_size = 10; -SELECT * FROM (select toUInt64(1) s limit 1) any right join (select number s from numbers(11)) using (s) ORDER BY s; +SELECT * FROM (select toUInt64(1) s limit 1) any right join (select number s, s as x from numbers(11)) using (s) ORDER BY s; diff --git a/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference b/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference index 936106d559e..a0265bdb7ed 100644 --- a/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference +++ b/dbms/tests/queries/0_stateless/00585_union_all_subquery_aggregation_column_removal.reference @@ -14,10 +14,10 @@ 2 facebook.com 1 google.com 2 yandex.ru -1 baidu.com 1 baidu.com -1 google.com 1 google.com -2 facebook.com 2 facebook.com -2 yandex.ru 2 yandex.ru +1 baidu.com +1 google.com +2 facebook.com +2 yandex.ru 1 1 2 diff --git a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference index 3dd49a0bede..df5aebabc89 100644 --- a/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference +++ b/dbms/tests/queries/0_stateless/00597_push_down_predicate.reference @@ -3,7 +3,7 @@ 1 1 -------Need push down------- -0 0 +0 1 1 1 @@ -23,13 +23,13 @@ 1 2000-01-01 1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 -2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 +2000-01-01 1 test string 1 1 2000-01-01 test string 1 1 2000-01-01 1 test string 1 1 -1 2000-01-01 2000-01-01 1 test string 1 1 +1 2000-01-01 1 test string 1 1 2000-01-01 1 test string 1 1 2000-01-01 2 test string 2 2 1 1 -------Push to having expression, need check.------- -------Compatibility test------- -1 2000-01-01 2000-01-01 1 test string 1 1 +1 2000-01-01 test string 1 1 diff --git a/dbms/tests/queries/0_stateless/00679_replace_asterisk.reference b/dbms/tests/queries/0_stateless/00679_replace_asterisk.reference index ab22461f6f2..d05e3183147 100644 --- a/dbms/tests/queries/0_stateless/00679_replace_asterisk.reference +++ b/dbms/tests/queries/0_stateless/00679_replace_asterisk.reference @@ -1,4 +1,4 @@ 1 2 -1 2 3 1 4 5 -1 2 1 3 1 3 -1 2 1 3 1 3 3 +1 2 3 4 5 +1 2 3 1 3 +1 2 3 1 3 3 diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using.reference b/dbms/tests/queries/0_stateless/00702_join_with_using.reference index 53594f8d906..c9035acc656 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using.reference +++ b/dbms/tests/queries/0_stateless/00702_join_with_using.reference @@ -1,3 +1,39 @@ -1 1 1 1 -2 2 2 2 -3 3 3 3 +1 1 +2 2 +3 3 +1 John Robert +1 John Susan +3 Daniel Sarah +4 James David +4 James Joseph +5 Amanda Robert +1 John Robert +1 John Susan +3 Daniel Sarah +4 James David +4 James Joseph +5 Amanda Robert +1 John Robert +1 John Susan +3 Daniel Sarah +4 James David +4 James Joseph +5 Amanda Robert +1 John Robert +1 John Susan +3 Daniel Sarah +4 James David +4 James Joseph +5 Amanda Robert +1 John Robert +1 John Susan +3 Daniel Sarah +4 James David +4 James Joseph +5 Amanda Robert +1 John Robert +1 John Susan +3 Daniel Sarah +4 James David +4 James Joseph +5 Amanda Robert diff --git a/dbms/tests/queries/0_stateless/00702_join_with_using.sql b/dbms/tests/queries/0_stateless/00702_join_with_using.sql index 4dcb4461018..6956730eabf 100644 --- a/dbms/tests/queries/0_stateless/00702_join_with_using.sql +++ b/dbms/tests/queries/0_stateless/00702_join_with_using.sql @@ -11,3 +11,29 @@ SELECT * FROM test.using1 ALL LEFT JOIN (SELECT * FROM test.using2) USING (a, a, DROP TABLE test.using1; DROP TABLE test.using2; + +-- + +use test; +drop table if exists persons; +drop table if exists children; + +create table persons (id String, name String) engine MergeTree order by id; +create table children (id String, childName String) engine MergeTree order by id; + +insert into persons (id, name) +values ('1', 'John'), ('2', 'Jack'), ('3', 'Daniel'), ('4', 'James'), ('5', 'Amanda'); + +insert into children (id, childName) +values ('1', 'Robert'), ('1', 'Susan'), ('3', 'Sarah'), ('4', 'David'), ('4', 'Joseph'), ('5', 'Robert'); + +select * from persons all inner join children using id; +select * from persons all inner join (select * from children) as j using id; +select * from (select * from persons) as s all inner join (select * from children ) as j using id; +-- +select * from persons all inner join (select * from children) using id; +select * from (select * from persons) all inner join (select * from children) using id; +select * from (select * from persons) as s all inner join (select * from children) using id; + +drop table persons; +drop table children; diff --git a/dbms/tests/queries/0_stateless/00725_join_on_bug_2.sql b/dbms/tests/queries/0_stateless/00725_join_on_bug_2.sql index 116295d967f..7e95aa4a1d3 100644 --- a/dbms/tests/queries/0_stateless/00725_join_on_bug_2.sql +++ b/dbms/tests/queries/0_stateless/00725_join_on_bug_2.sql @@ -9,7 +9,7 @@ insert into test.s values(1,1); select a, b, s_a, s_b from test.t all left join (select a,b,a s_a, b s_b from test.s) using (a,b); select '-'; -select * from test.t all left join test.s using (a,b); +select t.*, s.* from test.t all left join test.s using (a,b); select '-'; select a,b,s_a,s_b from test.t all left join (select a, b, a s_a, b s_b from test.s) s on (s.a = t.a and s.b = t.b); select '-'; diff --git a/dbms/tests/queries/0_stateless/00740_database_in_nested_view.sql b/dbms/tests/queries/0_stateless/00740_database_in_nested_view.sql index e4dabc3a5a6..afaf23b4950 100644 --- a/dbms/tests/queries/0_stateless/00740_database_in_nested_view.sql +++ b/dbms/tests/queries/0_stateless/00740_database_in_nested_view.sql @@ -7,7 +7,7 @@ USE test; CREATE VIEW test AS SELECT 1 AS N; CREATE VIEW test_view AS SELECT * FROM test; CREATE VIEW test_nested_view AS SELECT * FROM (SELECT * FROM test); -CREATE VIEW test_joined_view AS SELECT * FROM test ANY LEFT JOIN test USING N; +CREATE VIEW test_joined_view AS SELECT *, N AS x FROM test ANY LEFT JOIN test USING N; SELECT * FROM test_view; SELECT * FROM test_nested_view; diff --git a/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference index 31d1de2d8c7..8e032c0a542 100644 --- a/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference +++ b/dbms/tests/queries/0_stateless/00800_low_cardinality_join.reference @@ -1,12 +1,12 @@ -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 -0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 - 0 0 0 0