Remove unknown columns from joined table in where for queries to external database engines

This commit is contained in:
vdimir 2021-03-11 20:53:29 +03:00
parent 08f312b1c4
commit 2156401506
No known key found for this signature in database
GPG Key ID: F57B3E10A21DBB31
3 changed files with 81 additions and 2 deletions

View File

@ -148,3 +148,12 @@ TEST(TransformQueryForExternalDatabase, Aliases)
R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))", R"(SELECT "field" FROM "test"."table" WHERE ("field" NOT IN ('')) AND ("field" LIKE '%test%'))",
state.context, state.columns); state.context, state.columns);
} }
TEST(TransformQueryForExternalDatabase, ForeignColumnInWhere)
{
const State & state = State::instance();
check("SELECT column FROM test.table WHERE column > 2 AND (apply_id = 1 OR joined_table.foo = 1)",
R"(SELECT "column" FROM "test"."table" WHERE ("column" > 2) AND ("apply_id" = 1))",
state.context, state.columns);
}

View File

@ -160,8 +160,73 @@ bool isCompatible(const IAST & node)
return node.as<ASTIdentifier>(); return node.as<ASTIdentifier>();
} }
bool removeUnknownSubexpressions(ASTPtr & node, const NameSet & known_names);
void removeUnknownChildren(ASTs & children, const NameSet & known_names)
{
ASTs new_children;
for (auto & child : children)
{
bool leave_child = removeUnknownSubexpressions(child, known_names);
if (leave_child)
new_children.push_back(child);
}
children = std::move(new_children);
} }
/// return `true` if we should leave node in tree
bool removeUnknownSubexpressions(ASTPtr & node, const NameSet & known_names)
{
if (const auto * ident = node->as<ASTIdentifier>())
return known_names.contains(ident->name());
if (const auto * lit = node->as<ASTLiteral>())
return true;
auto * func = node->as<ASTFunction>();
if (func && (func->name == "and" || func->name == "or"))
{
removeUnknownChildren(func->arguments->children, known_names);
/// all children removed, current node can be removed too
if (func->arguments->children.size() == 1)
{
/// if only one child left, pull it on top level
node = func->arguments->children[0];
return true;
}
return !func->arguments->children.empty();
}
bool leave_child = true;
for (auto & child : node->children)
{
leave_child = leave_child && removeUnknownSubexpressions(child, known_names);
if (!leave_child)
break;
}
return leave_child;
}
bool removeUnknownSubexpressionsFromWhere(ASTPtr & node, const NamesAndTypesList & available_columns)
{
if (!node)
return false;
NameSet known_names;
for (const auto & col : available_columns)
known_names.insert(col.name);
if (auto * expr_list = node->as<ASTExpressionList>(); expr_list && !expr_list->children.empty())
{
/// traverse expression list on top level
removeUnknownChildren(expr_list->children, known_names);
return !expr_list->children.empty();
}
return removeUnknownSubexpressions(node, known_names);
}
}
String transformQueryForExternalDatabase( String transformQueryForExternalDatabase(
const SelectQueryInfo & query_info, const SelectQueryInfo & query_info,
@ -191,7 +256,8 @@ String transformQueryForExternalDatabase(
*/ */
ASTPtr original_where = clone_query->as<ASTSelectQuery &>().where(); ASTPtr original_where = clone_query->as<ASTSelectQuery &>().where();
if (original_where) bool where_has_known_columns = removeUnknownSubexpressionsFromWhere(original_where, available_columns);
if (original_where && where_has_known_columns)
{ {
replaceConstantExpressions(original_where, context, available_columns); replaceConstantExpressions(original_where, context, available_columns);

View File

@ -146,10 +146,14 @@ def test_clickhouse_join_for_mysql_database(started_cluster):
"CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306','test','t1_mysql_local','root','clickhouse')") "CREATE TABLE default.t1_remote_mysql AS mysql('mysql1:3306','test','t1_mysql_local','root','clickhouse')")
clickhouse_node.query( clickhouse_node.query(
"CREATE TABLE default.t2_remote_mysql AS mysql('mysql1:3306','test','t2_mysql_local','root','clickhouse')") "CREATE TABLE default.t2_remote_mysql AS mysql('mysql1:3306','test','t2_mysql_local','root','clickhouse')")
clickhouse_node.query("INSERT INTO `default`.`t1_remote_mysql` VALUES ('EN','A',''),('RU','B','AAA')")
clickhouse_node.query("INSERT INTO `default`.`t2_remote_mysql` VALUES ('A','AAA'),('Z','')")
assert clickhouse_node.query("SELECT s.pays " assert clickhouse_node.query("SELECT s.pays "
"FROM default.t1_remote_mysql AS s " "FROM default.t1_remote_mysql AS s "
"LEFT JOIN default.t1_remote_mysql AS s_ref " "LEFT JOIN default.t1_remote_mysql AS s_ref "
"ON (s_ref.opco = s.opco AND s_ref.service = s.service)") == '' "ON (s_ref.opco = s.opco AND s_ref.service = s.service) "
"WHERE s_ref.opco != '' AND s.opco != '' ").rstrip() == 'RU'
mysql_node.query("DROP DATABASE test") mysql_node.query("DROP DATABASE test")