From 0a9b1ee8031dbb502cd623d130c0baae4c951111 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 13 Jan 2022 20:23:44 +0300 Subject: [PATCH 1/4] Remove RestoreQualifiedNamesMatcher::Data::rename (always true) Signed-off-by: Azat Khuzhin --- src/Interpreters/TranslateQualifiedNamesVisitor.cpp | 3 +-- src/Interpreters/TranslateQualifiedNamesVisitor.h | 1 - src/Storages/StorageDistributed.cpp | 1 - 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp index 2d1b6b3f239..0d7d56058b9 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.cpp +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.cpp @@ -363,8 +363,7 @@ void RestoreQualifiedNamesMatcher::visit(ASTIdentifier & identifier, ASTPtr &, D if (IdentifierSemantic::getMembership(identifier)) { identifier.restoreTable(); // TODO(ilezhankin): should restore qualified name here - why exactly here? - if (data.rename) - data.changeTable(identifier); + data.changeTable(identifier); } } } diff --git a/src/Interpreters/TranslateQualifiedNamesVisitor.h b/src/Interpreters/TranslateQualifiedNamesVisitor.h index 1ed4da57a93..0f35d052ed2 100644 --- a/src/Interpreters/TranslateQualifiedNamesVisitor.h +++ b/src/Interpreters/TranslateQualifiedNamesVisitor.h @@ -67,7 +67,6 @@ struct RestoreQualifiedNamesMatcher { DatabaseAndTableWithAlias distributed_table; DatabaseAndTableWithAlias remote_table; - bool rename = false; void changeTable(ASTIdentifier & identifier) const; }; diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 19869b77106..177a2e39741 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -150,7 +150,6 @@ ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, co data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query->as(), 0)); data.remote_table.database = database; data.remote_table.table = table; - data.rename = true; RestoreQualifiedNamesVisitor(data).visit(modified_query_ast); } From 249bdce4539a48ec4c84d0bf1e4e8f323e6b402f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 13 Jan 2022 20:23:44 +0300 Subject: [PATCH 2/4] Fix AddDefaultDatabaseVisitor for temporary tables This will fix rewriting of distributed queries with GLOBAL IN v2: get external tables only for non-global context Signed-off-by: Azat Khuzhin --- src/Interpreters/AddDefaultDatabaseVisitor.h | 30 ++++++++++++++------ 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index d5039a2f19e..01da1c45b00 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -19,6 +19,7 @@ #include #include #include +#include namespace DB { @@ -37,7 +38,15 @@ public: : context(context_) , database_name(database_name_) , only_replace_current_database_function(only_replace_current_database_function_) - {} + { + if (!context->isGlobalContext()) + { + for (const auto & [table_name, _ /* storage */] : context->getExternalTables()) + { + external_tables.insert(table_name); + } + } + } void visitDDL(ASTPtr & ast) const { @@ -81,6 +90,7 @@ private: ContextPtr context; const String database_name; + std::set external_tables; bool only_replace_current_database_function = false; @@ -138,13 +148,17 @@ private: void visit(const ASTTableIdentifier & identifier, ASTPtr & ast) const { - if (!identifier.compound()) - { - auto qualified_identifier = std::make_shared(database_name, identifier.name()); - if (!identifier.alias.empty()) - qualified_identifier->setAlias(identifier.alias); - ast = qualified_identifier; - } + /// Already has database. + if (identifier.compound()) + return; + /// There is temporary table with such name, should not be rewritten. + if (external_tables.count(identifier.shortName())) + return; + + auto qualified_identifier = std::make_shared(database_name, identifier.name()); + if (!identifier.alias.empty()) + qualified_identifier->setAlias(identifier.alias); + ast = qualified_identifier; } void visit(ASTSubquery & subquery, ASTPtr &) const From 4dcb332be9efcad9a2b7586fb7d1313827c1005d Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 14 Jan 2022 11:16:00 +0300 Subject: [PATCH 3/4] Add ability to rewrite only JOIN tables in AddDefaultDatabaseVisitor Signed-off-by: Azat Khuzhin --- src/Interpreters/AddDefaultDatabaseVisitor.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Interpreters/AddDefaultDatabaseVisitor.h b/src/Interpreters/AddDefaultDatabaseVisitor.h index 01da1c45b00..2e63e6e1b43 100644 --- a/src/Interpreters/AddDefaultDatabaseVisitor.h +++ b/src/Interpreters/AddDefaultDatabaseVisitor.h @@ -34,10 +34,12 @@ public: explicit AddDefaultDatabaseVisitor( ContextPtr context_, const String & database_name_, - bool only_replace_current_database_function_ = false) + bool only_replace_current_database_function_ = false, + bool only_replace_in_join_ = false) : context(context_) , database_name(database_name_) , only_replace_current_database_function(only_replace_current_database_function_) + , only_replace_in_join(only_replace_in_join_) { if (!context->isGlobalContext()) { @@ -93,6 +95,7 @@ private: std::set external_tables; bool only_replace_current_database_function = false; + bool only_replace_in_join = false; void visit(ASTSelectWithUnionQuery & select, ASTPtr &) const { @@ -134,6 +137,9 @@ private: void visit(ASTTablesInSelectQueryElement & tables_element, ASTPtr &) const { + if (only_replace_in_join && !tables_element.table_join) + return; + if (tables_element.table_expression) tryVisit(tables_element.table_expression); } From c341b3b237bc048b2af670260e8e0f4784cc6ca4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 13 Jan 2022 20:23:44 +0300 Subject: [PATCH 4/4] Add current database to table names in JOIN section for distributed queries This should fix JOIN w/o explicit database. v2: rewrite only JOIN section, since there is old behavior that relies on default_database for IN section, see [1]: - 01487_distributed_in_not_default_db - 01152_cross_replication [1]: https://s3.amazonaws.com/clickhouse-test-reports/33611/d0ea3c76fa51131171b1825939680867eb1c04da/fast_test__actions_.html Signed-off-by: Azat Khuzhin --- src/Storages/StorageDistributed.cpp | 26 +++++++++++++++---- ...istributed_join_current_database.reference | 14 ++++++++++ ...2175_distributed_join_current_database.sql | 19 ++++++++++++++ 3 files changed, 54 insertions(+), 5 deletions(-) create mode 100644 tests/queries/0_stateless/02175_distributed_join_current_database.reference create mode 100644 tests/queries/0_stateless/02175_distributed_join_current_database.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 177a2e39741..6f165dfb4a5 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -126,7 +127,12 @@ namespace /// select query has database, table and table function names as AST pointers /// Creates a copy of query, changes database, table and table function names. -ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, const std::string & table, ASTPtr table_function_ptr = nullptr) +ASTPtr rewriteSelectQuery( + ContextPtr context, + const ASTPtr & query, + const std::string & remote_database, + const std::string & remote_table, + ASTPtr table_function_ptr = nullptr) { auto modified_query_ast = query->clone(); @@ -140,7 +146,7 @@ ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, co if (table_function_ptr) select_query.addTableFunction(table_function_ptr); else - select_query.replaceDatabaseAndTable(database, table); + select_query.replaceDatabaseAndTable(remote_database, remote_table); /// Restore long column names (cause our short names are ambiguous). /// TODO: aliased table functions & CREATE TABLE AS table function cases @@ -148,11 +154,20 @@ ASTPtr rewriteSelectQuery(const ASTPtr & query, const std::string & database, co { RestoreQualifiedNamesVisitor::Data data; data.distributed_table = DatabaseAndTableWithAlias(*getTableExpression(query->as(), 0)); - data.remote_table.database = database; - data.remote_table.table = table; + data.remote_table.database = remote_database; + data.remote_table.table = remote_table; RestoreQualifiedNamesVisitor(data).visit(modified_query_ast); } + /// To make local JOIN works, default database should be added to table names. + /// But only for JOIN section, since the following should work using default_database: + /// - SELECT * FROM d WHERE value IN (SELECT l.value FROM l) ORDER BY value + /// (see 01487_distributed_in_not_default_db) + AddDefaultDatabaseVisitor visitor(context, context->getCurrentDatabase(), + /* only_replace_current_database_function_= */false, + /* only_replace_in_join_= */true); + visitor.visit(modified_query_ast); + return modified_query_ast; } @@ -600,7 +615,8 @@ void StorageDistributed::read( throw Exception(ErrorCodes::ILLEGAL_FINAL, "Final modifier is not allowed together with parallel reading from replicas feature"); const auto & modified_query_ast = rewriteSelectQuery( - query_info.query, remote_database, remote_table, remote_table_function_ptr); + local_context, query_info.query, + remote_database, remote_table, remote_table_function_ptr); Block header = InterpreterSelectQuery(query_info.query, local_context, SelectQueryOptions(processed_stage).analyze()).getSampleBlock(); diff --git a/tests/queries/0_stateless/02175_distributed_join_current_database.reference b/tests/queries/0_stateless/02175_distributed_join_current_database.reference new file mode 100644 index 00000000000..cfa2db0708e --- /dev/null +++ b/tests/queries/0_stateless/02175_distributed_join_current_database.reference @@ -0,0 +1,14 @@ +-- { echoOn } +select * from dist_02175 l join local_02175 r using dummy; +0 +0 +select * from dist_02175 l global join local_02175 r using dummy; +0 +0 +-- explicit database for distributed table +select * from remote('127.1', currentDatabase(), dist_02175) l join local_02175 r using dummy; +0 +0 +select * from remote('127.1', currentDatabase(), dist_02175) l global join local_02175 r using dummy; +0 +0 diff --git a/tests/queries/0_stateless/02175_distributed_join_current_database.sql b/tests/queries/0_stateless/02175_distributed_join_current_database.sql new file mode 100644 index 00000000000..94b949df0de --- /dev/null +++ b/tests/queries/0_stateless/02175_distributed_join_current_database.sql @@ -0,0 +1,19 @@ +-- Tags: shard + +drop table if exists local_02175; +drop table if exists dist_02175; + +create table local_02175 engine=Memory() as select * from system.one; +create table dist_02175 as local_02175 engine=Distributed(test_cluster_two_shards, currentDatabase(), local_02175); + +-- { echoOn } +select * from dist_02175 l join local_02175 r using dummy; +select * from dist_02175 l global join local_02175 r using dummy; + +-- explicit database for distributed table +select * from remote('127.1', currentDatabase(), dist_02175) l join local_02175 r using dummy; +select * from remote('127.1', currentDatabase(), dist_02175) l global join local_02175 r using dummy; + +-- { echoOff } +drop table local_02175; +drop table dist_02175;