mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-24 16:42:05 +00:00
CROSS JOIN to INNER JOIN converter
This commit is contained in:
parent
2f833d4753
commit
170c108a59
128
dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp
Normal file
128
dbms/src/Interpreters/CrossToInnerJoinVisitor.cpp
Normal file
@ -0,0 +1,128 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Interpreters/CrossToInnerJoinVisitor.h>
|
||||
#include <Interpreters/DatabaseAndTableWithAlias.h>
|
||||
#include <Interpreters/IdentifierSemantic.h>
|
||||
#include <Parsers/ASTSelectQuery.h>
|
||||
#include <Parsers/ASTTablesInSelectQuery.h>
|
||||
#include <Parsers/ASTIdentifier.h>
|
||||
#include <Parsers/ASTExpressionList.h>
|
||||
#include <Parsers/ParserTablesInSelectQuery.h>
|
||||
#include <Parsers/ExpressionListParsers.h>
|
||||
#include <Parsers/parseQuery.h>
|
||||
#include <IO/WriteHelpers.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
/// TODO: array join aliases?
|
||||
struct CheckColumnsVisitorData
|
||||
{
|
||||
using TypeToVisit = ASTIdentifier;
|
||||
|
||||
const std::vector<DatabaseAndTableWithAlias> & tables;
|
||||
size_t visited;
|
||||
size_t found;
|
||||
|
||||
size_t allMatch() const { return visited == found; }
|
||||
|
||||
void visit(ASTIdentifier & node, ASTPtr &)
|
||||
{
|
||||
++visited;
|
||||
for (const auto & t : tables)
|
||||
if (IdentifierSemantic::canReferColumnToTable(node, t))
|
||||
++found;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
static bool extractTableName(const ASTTableExpression & expr, std::vector<DatabaseAndTableWithAlias> & names)
|
||||
{
|
||||
/// Subselects are not supported.
|
||||
if (!expr.database_and_table_name)
|
||||
return false;
|
||||
|
||||
names.emplace_back(DatabaseAndTableWithAlias(expr));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static ASTPtr getCrossJoin(ASTSelectQuery & select, std::vector<DatabaseAndTableWithAlias> & table_names)
|
||||
{
|
||||
if (!select.tables)
|
||||
return {};
|
||||
|
||||
auto tables = typeid_cast<const ASTTablesInSelectQuery *>(select.tables.get());
|
||||
if (!tables)
|
||||
return {};
|
||||
|
||||
size_t num_tables = tables->children.size();
|
||||
if (num_tables != 2)
|
||||
return {};
|
||||
|
||||
auto left = typeid_cast<const ASTTablesInSelectQueryElement *>(tables->children[0].get());
|
||||
auto right = typeid_cast<const ASTTablesInSelectQueryElement *>(tables->children[1].get());
|
||||
if (!left || !right || !right->table_join)
|
||||
return {};
|
||||
|
||||
if (auto join = typeid_cast<const ASTTableJoin *>(right->table_join.get()))
|
||||
{
|
||||
if (join->kind == ASTTableJoin::Kind::Cross)
|
||||
{
|
||||
if (!join->children.empty())
|
||||
throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR);
|
||||
|
||||
auto & left_expr = typeid_cast<const ASTTableExpression &>(*left->table_expression);
|
||||
auto & right_expr = typeid_cast<const ASTTableExpression &>(*right->table_expression);
|
||||
|
||||
table_names.reserve(2);
|
||||
if (extractTableName(left_expr, table_names) &&
|
||||
extractTableName(right_expr, table_names))
|
||||
return right->table_join;
|
||||
}
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
|
||||
std::vector<ASTPtr *> CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
|
||||
{
|
||||
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
|
||||
visit(*t, ast, data);
|
||||
return {};
|
||||
}
|
||||
|
||||
void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data)
|
||||
{
|
||||
using CheckColumnsMatcher = OneTypeMatcher<CheckColumnsVisitorData>;
|
||||
using CheckColumnsVisitor = InDepthNodeVisitor<CheckColumnsMatcher, true>;
|
||||
|
||||
std::vector<DatabaseAndTableWithAlias> table_names;
|
||||
ASTPtr ast_join = getCrossJoin(select, table_names);
|
||||
if (!ast_join)
|
||||
return;
|
||||
|
||||
/// check Identifier names from where expression
|
||||
CheckColumnsVisitor::Data columns_data{table_names, 0, 0};
|
||||
CheckColumnsVisitor(columns_data).visit(select.where_expression);
|
||||
|
||||
if (!columns_data.allMatch())
|
||||
return;
|
||||
|
||||
auto & join = typeid_cast<ASTTableJoin &>(*ast_join);
|
||||
join.kind = ASTTableJoin::Kind::Inner;
|
||||
join.strictness = ASTTableJoin::Strictness::All; /// TODO: do we need it?
|
||||
|
||||
join.on_expression.swap(select.where_expression);
|
||||
join.children.push_back(join.on_expression);
|
||||
|
||||
ast = ast->clone(); /// rewrite AST in right manner
|
||||
data.done = true;
|
||||
}
|
||||
|
||||
}
|
30
dbms/src/Interpreters/CrossToInnerJoinVisitor.h
Normal file
30
dbms/src/Interpreters/CrossToInnerJoinVisitor.h
Normal file
@ -0,0 +1,30 @@
|
||||
#pragma once
|
||||
|
||||
#include <Interpreters/InDepthNodeVisitor.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
class ASTSelectQuery;
|
||||
|
||||
/// AST transformer. It replaces cross joins with equivalented inner join if possible.
|
||||
class CrossToInnerJoinMatcher
|
||||
{
|
||||
public:
|
||||
struct Data
|
||||
{
|
||||
bool done = false;
|
||||
};
|
||||
|
||||
static constexpr const char * label = "JoinToSubqueryTransform";
|
||||
|
||||
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
|
||||
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
|
||||
|
||||
private:
|
||||
static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data);
|
||||
};
|
||||
|
||||
using CrossToInnerJoinVisitor = InDepthNodeVisitor<CrossToInnerJoinMatcher, true>;
|
||||
|
||||
}
|
@ -27,7 +27,7 @@ struct DatabaseAndTableWithAlias
|
||||
DatabaseAndTableWithAlias() = default;
|
||||
DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
|
||||
DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
|
||||
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database);
|
||||
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
|
||||
|
||||
/// "alias." or "table." if alias is empty
|
||||
String getQualifiedNamePrefix() const;
|
||||
|
@ -298,6 +298,7 @@ struct Settings
|
||||
M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.") \
|
||||
M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.") \
|
||||
M(SettingBool, allow_experimental_multiple_joins_emulation, false, "Emulate multiple joins using subselects") \
|
||||
M(SettingBool, allow_experimental_cross_to_join_conversion, false, "Convert CROSS JOIN to INNER JOIN if possible") \
|
||||
|
||||
#define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \
|
||||
TYPE NAME {DEFAULT};
|
||||
|
@ -22,6 +22,7 @@
|
||||
#include <Parsers/queryToString.h>
|
||||
|
||||
#include <Interpreters/JoinToSubqueryTransformVisitor.h>
|
||||
#include <Interpreters/CrossToInnerJoinVisitor.h>
|
||||
#include <Interpreters/Quota.h>
|
||||
#include <Interpreters/InterpreterFactory.h>
|
||||
#include <Interpreters/ProcessList.h>
|
||||
@ -199,6 +200,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
|
||||
logQuery(queryToString(*ast), context);
|
||||
}
|
||||
|
||||
if (settings.allow_experimental_cross_to_join_conversion)
|
||||
{
|
||||
CrossToInnerJoinVisitor::Data cross_to_inner;
|
||||
CrossToInnerJoinVisitor(cross_to_inner).visit(ast);
|
||||
if (cross_to_inner.done)
|
||||
logQuery(queryToString(*ast), context);
|
||||
}
|
||||
|
||||
/// Check the limits.
|
||||
checkASTSizeLimits(*ast, settings);
|
||||
|
||||
|
35
dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql
Normal file
35
dbms/tests/queries/0_stateless/00826_cross_to_inner_join.sql
Normal file
@ -0,0 +1,35 @@
|
||||
USE test;
|
||||
|
||||
DROP TABLE IF EXISTS t1;
|
||||
DROP TABLE IF EXISTS t2;
|
||||
|
||||
CREATE TABLE t1 (a Int8, b Nullable(Int8)) ENGINE = Memory;
|
||||
CREATE TABLE t2 (a Int8, b Nullable(Int8)) ENGINE = Memory;
|
||||
|
||||
INSERT INTO t1 values (1,1), (2,2);
|
||||
INSERT INTO t2 values (1,1);
|
||||
INSERT INTO t2 (a) values (2), (3);
|
||||
|
||||
SELECT 'cross';
|
||||
SELECT * FROM t1 cross join t2 where t1.a = t2.a;
|
||||
SELECT 'cross nullable';
|
||||
SELECT * FROM t1 cross join t2 where t1.b = t2.b;
|
||||
SELECT 'cross nullable vs not nullable';
|
||||
SELECT * FROM t1 cross join t2 where t1.a = t2.b;
|
||||
|
||||
SET enable_debug_queries = 1;
|
||||
AST SELECT * FROM t1 cross join t2 where t1.a = t2.a;
|
||||
|
||||
SET allow_experimental_cross_to_join_conversion = 1;
|
||||
|
||||
AST SELECT * FROM t1 cross join t2 where t1.a = t2.a;
|
||||
|
||||
SELECT 'cross';
|
||||
SELECT * FROM t1 cross join t2 where t1.a = t2.a;
|
||||
SELECT 'cross nullable';
|
||||
SELECT * FROM t1 cross join t2 where t1.b = t2.b;
|
||||
SELECT 'cross nullable vs not nullable';
|
||||
SELECT * FROM t1 cross join t2 where t1.a = t2.b;
|
||||
|
||||
DROP TABLE t1;
|
||||
DROP TABLE t2;
|
Loading…
Reference in New Issue
Block a user