From 2156401506b546f1dd4dac961c70aa769e7aec35 Mon Sep 17 00:00:00 2001 From: vdimir Date: Thu, 11 Mar 2021 20:53:29 +0300 Subject: [PATCH] Remove unknown columns from joined table in where for queries to external database engines --- ..._transform_query_for_external_database.cpp | 9 +++ .../transformQueryForExternalDatabase.cpp | 68 ++++++++++++++++++- .../test_mysql_database_engine/test.py | 6 +- 3 files changed, 81 insertions(+), 2 deletions(-) diff --git a/src/Storages/tests/gtest_transform_query_for_external_database.cpp b/src/Storages/tests/gtest_transform_query_for_external_database.cpp index 99dfc55ed69..9794d4d3a26 100644 --- a/src/Storages/tests/gtest_transform_query_for_external_database.cpp +++ b/src/Storages/tests/gtest_transform_query_for_external_database.cpp @@ -148,3 +148,12 @@ TEST(TransformQueryForExternalDatabase, Aliases) R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))", state.context, state.columns); } + +TEST(TransformQueryForExternalDatabase, ForeignColumnInWhere) +{ + const State & state = State::instance(); + + check("SELECT column FROM test.table WHERE column > 2 AND (apply_id = 1 OR joined_table.foo = 1)", + R"(SELECT "column" FROM "test"."table" WHERE ("column" > 2) AND ("apply_id" = 1))", + state.context, state.columns); +} diff --git a/src/Storages/transformQueryForExternalDatabase.cpp b/src/Storages/transformQueryForExternalDatabase.cpp index 42daf8cfc26..ac1187ba0e2 100644 --- a/src/Storages/transformQueryForExternalDatabase.cpp +++ b/src/Storages/transformQueryForExternalDatabase.cpp @@ -160,8 +160,73 @@ bool isCompatible(const IAST & node) return node.as(); } +bool removeUnknownSubexpressions(ASTPtr & node, const NameSet & known_names); + +void removeUnknownChildren(ASTs & children, const NameSet & known_names) +{ + + ASTs new_children; + for (auto & child : children) + { + bool leave_child = removeUnknownSubexpressions(child, known_names); + if (leave_child) + new_children.push_back(child); + } + children = std::move(new_children); } +/// return `true` if we should leave node in tree +bool removeUnknownSubexpressions(ASTPtr & node, const NameSet & known_names) +{ + if (const auto * ident = node->as()) + return known_names.contains(ident->name()); + + if (const auto * lit = node->as()) + return true; + + auto * func = node->as(); + if (func && (func->name == "and" || func->name == "or")) + { + removeUnknownChildren(func->arguments->children, known_names); + /// all children removed, current node can be removed too + if (func->arguments->children.size() == 1) + { + /// if only one child left, pull it on top level + node = func->arguments->children[0]; + return true; + } + return !func->arguments->children.empty(); + } + + bool leave_child = true; + for (auto & child : node->children) + { + leave_child = leave_child && removeUnknownSubexpressions(child, known_names); + if (!leave_child) + break; + } + return leave_child; +} + +bool removeUnknownSubexpressionsFromWhere(ASTPtr & node, const NamesAndTypesList & available_columns) +{ + if (!node) + return false; + + NameSet known_names; + for (const auto & col : available_columns) + known_names.insert(col.name); + + if (auto * expr_list = node->as(); expr_list && !expr_list->children.empty()) + { + /// traverse expression list on top level + removeUnknownChildren(expr_list->children, known_names); + return !expr_list->children.empty(); + } + return removeUnknownSubexpressions(node, known_names); +} + +} String transformQueryForExternalDatabase( const SelectQueryInfo & query_info, @@ -191,7 +256,8 @@ String transformQueryForExternalDatabase( */ ASTPtr original_where = clone_query->as().where(); - if (original_where) + bool where_has_known_columns = removeUnknownSubexpressionsFromWhere(original_where, available_columns); + if (original_where && where_has_known_columns) { replaceConstantExpressions(original_where, context, available_columns); diff --git a/tests/integration/test_mysql_database_engine/test.py b/tests/integration/test_mysql_database_engine/test.py index 4d10e2ea6f5..f4b0bb1b2fc 100644 --- a/tests/integration/test_mysql_database_engine/test.py +++ b/tests/integration/test_mysql_database_engine/test.py @@ -146,10 +146,14 @@ def test_clickhouse_join_for_mysql_database(started_cluster): "CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306','test','t1_mysql_local','root','clickhouse')") clickhouse_node.query( "CREATE TABLE default.t2_remote_mysql AS mysql('mysql1:3306','test','t2_mysql_local','root','clickhouse')") + clickhouse_node.query("INSERT INTO `default`.`t1_remote_mysql` VALUES ('EN','A',''),('RU','B','AAA')") + clickhouse_node.query("INSERT INTO `default`.`t2_remote_mysql` VALUES ('A','AAA'),('Z','')") + assert clickhouse_node.query("SELECT s.pays " "FROM default.t1_remote_mysql AS s " "LEFT JOIN default.t1_remote_mysql AS s_ref " - "ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' + "ON (s_ref.opco = s.opco AND s_ref.service = s.service) " + "WHERE s_ref.opco != '' AND s.opco != '' ").rstrip() == 'RU' mysql_node.query("DROP DATABASE test")