Merge pull request #4221 from 4ertus2/cross

CROSS JOIN to INNER JOIN converter
This commit is contained in:
alexey-milovidov 2019-02-02 14:27:05 +03:00 committed by GitHub
commit 25502718ae
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 235 additions and 1 deletions

View File

@ -0,0 +1,129 @@
#include <Common/typeid_cast.h>
#include <Interpreters/CrossToInnerJoinVisitor.h>
#include <Interpreters/DatabaseAndTableWithAlias.h>
#include <Interpreters/IdentifierSemantic.h>
#include <Parsers/ASTSelectQuery.h>
#include <Parsers/ASTTablesInSelectQuery.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTExpressionList.h>
#include <Parsers/ParserTablesInSelectQuery.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>
#include <IO/WriteHelpers.h>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
/// TODO: array join aliases?
struct CheckColumnsVisitorData
{
using TypeToVisit = ASTIdentifier;
const std::vector<DatabaseAndTableWithAlias> & tables;
size_t visited;
size_t found;
size_t allMatch() const { return visited == found; }
void visit(ASTIdentifier & node, ASTPtr &)
{
++visited;
for (const auto & t : tables)
if (IdentifierSemantic::canReferColumnToTable(node, t))
++found;
}
};
static bool extractTableName(const ASTTableExpression & expr, std::vector<DatabaseAndTableWithAlias> & names)
{
/// Subselects are not supported.
if (!expr.database_and_table_name)
return false;
names.emplace_back(DatabaseAndTableWithAlias(expr));
return true;
}
static ASTPtr getCrossJoin(ASTSelectQuery & select, std::vector<DatabaseAndTableWithAlias> & table_names)
{
if (!select.tables)
return {};
auto tables = typeid_cast<const ASTTablesInSelectQuery *>(select.tables.get());
if (!tables)
return {};
size_t num_tables = tables->children.size();
if (num_tables != 2)
return {};
auto left = typeid_cast<const ASTTablesInSelectQueryElement *>(tables->children[0].get());
auto right = typeid_cast<const ASTTablesInSelectQueryElement *>(tables->children[1].get());
if (!left || !right || !right->table_join)
return {};
if (auto join = typeid_cast<const ASTTableJoin *>(right->table_join.get()))
{
if (join->kind == ASTTableJoin::Kind::Cross ||
join->kind == ASTTableJoin::Kind::Comma)
{
if (!join->children.empty())
throw Exception("Logical error: CROSS JOIN has expressions", ErrorCodes::LOGICAL_ERROR);
auto & left_expr = typeid_cast<const ASTTableExpression &>(*left->table_expression);
auto & right_expr = typeid_cast<const ASTTableExpression &>(*right->table_expression);
table_names.reserve(2);
if (extractTableName(left_expr, table_names) &&
extractTableName(right_expr, table_names))
return right->table_join;
}
}
return {};
}
std::vector<ASTPtr *> CrossToInnerJoinMatcher::visit(ASTPtr & ast, Data & data)
{
if (auto * t = typeid_cast<ASTSelectQuery *>(ast.get()))
visit(*t, ast, data);
return {};
}
void CrossToInnerJoinMatcher::visit(ASTSelectQuery & select, ASTPtr & ast, Data & data)
{
using CheckColumnsMatcher = OneTypeMatcher<CheckColumnsVisitorData>;
using CheckColumnsVisitor = InDepthNodeVisitor<CheckColumnsMatcher, true>;
std::vector<DatabaseAndTableWithAlias> table_names;
ASTPtr ast_join = getCrossJoin(select, table_names);
if (!ast_join)
return;
/// check Identifier names from where expression
CheckColumnsVisitor::Data columns_data{table_names, 0, 0};
CheckColumnsVisitor(columns_data).visit(select.where_expression);
if (!columns_data.allMatch())
return;
auto & join = typeid_cast<ASTTableJoin &>(*ast_join);
join.kind = ASTTableJoin::Kind::Inner;
join.strictness = ASTTableJoin::Strictness::All; /// TODO: do we need it?
join.on_expression.swap(select.where_expression);
join.children.push_back(join.on_expression);
ast = ast->clone(); /// rewrite AST in right manner
data.done = true;
}
}

View File

@ -0,0 +1,30 @@
#pragma once
#include <Interpreters/InDepthNodeVisitor.h>
namespace DB
{
class ASTSelectQuery;
/// AST transformer. It replaces cross joins with equivalented inner join if possible.
class CrossToInnerJoinMatcher
{
public:
struct Data
{
bool done = false;
};
static constexpr const char * label = "JoinToSubqueryTransform";
static bool needChildVisit(ASTPtr &, const ASTPtr &) { return true; }
static std::vector<ASTPtr *> visit(ASTPtr & ast, Data & data);
private:
static void visit(ASTSelectQuery & select, ASTPtr & ast, Data & data);
};
using CrossToInnerJoinVisitor = InDepthNodeVisitor<CrossToInnerJoinMatcher, true>;
}

View File

@ -27,7 +27,7 @@ struct DatabaseAndTableWithAlias
DatabaseAndTableWithAlias() = default;
DatabaseAndTableWithAlias(const ASTPtr & identifier_node, const String & current_database = "");
DatabaseAndTableWithAlias(const ASTIdentifier & identifier, const String & current_database = "");
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database);
DatabaseAndTableWithAlias(const ASTTableExpression & table_expression, const String & current_database = "");
/// "alias." or "table." if alias is empty
String getQualifiedNamePrefix() const;

View File

@ -298,6 +298,7 @@ struct Settings
M(SettingBool, enable_unaligned_array_join, false, "Allow ARRAY JOIN with multiple arrays that have different sizes. When this settings is enabled, arrays will be resized to the longest one.") \
M(SettingBool, low_cardinality_allow_in_native_format, true, "Use LowCardinality type in Native format. Otherwise, convert LowCardinality columns to ordinary for select query, and convert ordinary columns to required LowCardinality for insert query.") \
M(SettingBool, allow_experimental_multiple_joins_emulation, false, "Emulate multiple joins using subselects") \
M(SettingBool, allow_experimental_cross_to_join_conversion, false, "Convert CROSS JOIN to INNER JOIN if possible") \
#define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \
TYPE NAME {DEFAULT};

View File

@ -22,6 +22,7 @@
#include <Parsers/queryToString.h>
#include <Interpreters/JoinToSubqueryTransformVisitor.h>
#include <Interpreters/CrossToInnerJoinVisitor.h>
#include <Interpreters/Quota.h>
#include <Interpreters/InterpreterFactory.h>
#include <Interpreters/ProcessList.h>
@ -199,6 +200,14 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
logQuery(queryToString(*ast), context);
}
if (settings.allow_experimental_cross_to_join_conversion)
{
CrossToInnerJoinVisitor::Data cross_to_inner;
CrossToInnerJoinVisitor(cross_to_inner).visit(ast);
if (cross_to_inner.done)
logQuery(queryToString(*ast), context);
}
/// Check the limits.
checkASTSizeLimits(*ast, settings);

View File

@ -0,0 +1,23 @@
cross
1 1 1 1
2 2 2 \N
cross nullable
1 1 1 1
cross nullable vs not nullable
1 1 1 1
Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n
Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 3)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableExpression (children 1)\n Identifier t2\n TableJoin\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n
Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n
Explain ParsedAST (children 1)\n SelectWithUnionQuery (children 1)\n ExpressionList (children 1)\n SelectQuery (children 2)\n ExpressionList (children 1)\n Asterisk\n TablesInSelectQuery (children 2)\n TablesInSelectQueryElement (children 1)\n TableExpression (children 1)\n Identifier t1\n TablesInSelectQueryElement (children 2)\n TableJoin (children 1)\n Function equals (children 1)\n ExpressionList (children 2)\n Identifier t1.a\n Identifier t2.a\n TableExpression (children 1)\n Identifier t2\n
cross
1 1 1 1
2 2 2 \N
cross nullable
1 1 1 1
cross nullable vs not nullable
1 1 1 1
comma
1 1 1 1
2 2 2 \N
comma nullable
1 1 1 1

View File

@ -0,0 +1,42 @@
USE test;
DROP TABLE IF EXISTS t1;
DROP TABLE IF EXISTS t2;
CREATE TABLE t1 (a Int8, b Nullable(Int8)) ENGINE = Memory;
CREATE TABLE t2 (a Int8, b Nullable(Int8)) ENGINE = Memory;
INSERT INTO t1 values (1,1), (2,2);
INSERT INTO t2 values (1,1);
INSERT INTO t2 (a) values (2), (3);
SELECT 'cross';
SELECT * FROM t1 cross join t2 where t1.a = t2.a;
SELECT 'cross nullable';
SELECT * FROM t1 cross join t2 where t1.b = t2.b;
SELECT 'cross nullable vs not nullable';
SELECT * FROM t1 cross join t2 where t1.a = t2.b;
SET enable_debug_queries = 1;
AST SELECT * FROM t1 cross join t2 where t1.a = t2.a;
AST SELECT * FROM t1, t2 where t1.a = t2.a;
SET allow_experimental_cross_to_join_conversion = 1;
AST SELECT * FROM t1 cross join t2 where t1.a = t2.a;
AST SELECT * FROM t1, t2 where t1.a = t2.a;
SELECT 'cross';
SELECT * FROM t1 cross join t2 where t1.a = t2.a;
SELECT 'cross nullable';
SELECT * FROM t1 cross join t2 where t1.b = t2.b;
SELECT 'cross nullable vs not nullable';
SELECT * FROM t1 cross join t2 where t1.a = t2.b;
SELECT 'comma';
SELECT * FROM t1, t2 where t1.a = t2.a;
SELECT 'comma nullable';
SELECT * FROM t1, t2 where t1.b = t2.b;
DROP TABLE t1;
DROP TABLE t2;