crash fix, style fixes, ASTs moved out of TableJoin in ORs in JOIN

This commit is contained in:
Ilya Golshtein 2021-09-16 00:51:15 +03:00
parent 4c043a0157
commit 8057e052a6
7 changed files with 46 additions and 43 deletions

View File

@ -36,9 +36,17 @@ void CollectJoinOnKeysMatcher::Data::setDisjuncts(const ASTPtr & or_func_ast)
{ {
const auto * func = or_func_ast->as<ASTFunction>(); const auto * func = or_func_ast->as<ASTFunction>();
const auto * func_args = func->arguments->as<ASTExpressionList>(); const auto * func_args = func->arguments->as<ASTExpressionList>();
TableJoin::Disjuncts v = func_args->children; ASTs v = func_args->children;
analyzed_join.setDisjuncts(std::move(v)); disjuncts = std::move(v);
}
void CollectJoinOnKeysMatcher::Data::addDisjunct(const ASTPtr & ast)
{
const IAST * addr = ast.get();
if (std::find_if(disjuncts.begin(), disjuncts.end(), [addr](const ASTPtr & ast_){return ast_.get() == addr;}) != disjuncts.end())
analyzed_join.newClauseIfPopulated();
} }
void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos) void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos)
@ -87,7 +95,7 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys()
void CollectJoinOnKeysMatcher::Data::optimize() void CollectJoinOnKeysMatcher::Data::optimize()
{ {
analyzed_join.optimizeDisjuncts(); analyzed_join.optimizeClauses();
} }
void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data) void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data)
@ -107,7 +115,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
return; return;
} }
data.analyzed_join.addDisjunct(ast); data.addDisjunct(ast);
if (func.name == "and") if (func.name == "and")
return; /// go into children return; /// go into children

View File

@ -47,12 +47,17 @@ public:
const bool is_asof{false}; const bool is_asof{false};
ASTPtr asof_left_key{}; ASTPtr asof_left_key{};
ASTPtr asof_right_key{}; ASTPtr asof_right_key{};
ASTs disjuncts{};
void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos); void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos);
void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos, void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos,
const ASOF::Inequality & asof_inequality); const ASOF::Inequality & asof_inequality);
void setDisjuncts(const ASTPtr & or_func_ast);
void asofToJoinKeys(); void asofToJoinKeys();
/// remember OR's children
void setDisjuncts(const ASTPtr & or_func_ast);
/// create new disjunct when see a direct child of a previously discovered OR
void addDisjunct(const ASTPtr & ast);
void optimize(); void optimize();
}; };

View File

@ -3,8 +3,6 @@
#include <unordered_map> #include <unordered_map>
#include <vector> #include <vector>
#include <common/logger_useful.h>
#include <Columns/ColumnConst.h> #include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h> #include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h> #include <Columns/ColumnVector.h>

View File

@ -1,6 +1,5 @@
#include <Interpreters/TableJoin.h> #include <Interpreters/TableJoin.h>
#include <Common/StringUtils/StringUtils.h> #include <Common/StringUtils/StringUtils.h>
#include <Core/Block.h> #include <Core/Block.h>
@ -23,6 +22,7 @@
#include <Storages/StorageJoin.h> #include <Storages/StorageJoin.h>
#include <common/logger_useful.h> #include <common/logger_useful.h>
#include <algorithm>
namespace DB namespace DB
@ -32,6 +32,7 @@ namespace ErrorCodes
{ {
extern const int TYPE_MISMATCH; extern const int TYPE_MISMATCH;
extern const int LOGICAL_ERROR; extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
} }
namespace namespace
@ -123,12 +124,7 @@ void TableJoin::addUsingKey(const ASTPtr & ast)
addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast); addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast);
} }
/// create new disjunct when see a direct child of a previously discovered OR void TableJoin::newClauseIfPopulated()
void TableJoin::addDisjunct(const ASTPtr & ast)
{
const IAST * addr = ast.get();
if (std::find_if(disjuncts.begin(), disjuncts.end(), [addr](const ASTPtr & ast_){return ast_.get() == addr;}) != disjuncts.end())
{ {
const auto & clause = clauses.back(); const auto & clause = clauses.back();
if (!clause.key_names_left.empty() || !clause.key_names_right.empty() || if (!clause.key_names_left.empty() || !clause.key_names_right.empty() ||
@ -136,7 +132,6 @@ void TableJoin::addDisjunct(const ASTPtr & ast)
{ {
clauses.emplace_back(); clauses.emplace_back();
} }
}
if (getStorageJoin() && clauses.size() > 1) if (getStorageJoin() && clauses.size() > 1)
throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED); throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED);
} }
@ -149,11 +144,6 @@ bool operator==(const TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause
return l.key_names_left == r.key_names_left && l.key_names_right == r.key_names_right; return l.key_names_left == r.key_names_left && l.key_names_right == r.key_names_right;
} }
bool operator!=(const TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r)
{
return !(l == r);
}
TableJoin::JoinOnClause & operator+=(TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r) TableJoin::JoinOnClause & operator+=(TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r)
{ {
for (auto & cp : for (auto & cp :
@ -163,23 +153,33 @@ TableJoin::JoinOnClause & operator+=(TableJoin::JoinOnClause & l, const TableJoi
if (*cp.first == nullptr) if (*cp.first == nullptr)
*cp.first = *cp.second; *cp.first = *cp.second;
else if (const auto * func = (*cp.first)->as<ASTFunction>(); func && func->name == "or") else if (const auto * func = (*cp.first)->as<ASTFunction>(); func && func->name == "or")
{
/// already have `or` in condition, just add new argument /// already have `or` in condition, just add new argument
if (*cp.second != nullptr)
func->arguments->children.push_back(*cp.second); func->arguments->children.push_back(*cp.second);
}
else else
{
/// already have some conditions, unite it with `or` /// already have some conditions, unite it with `or`
if (*cp.second != nullptr)
*cp.first = makeASTFunction("or", *cp.first, *cp.second); *cp.first = makeASTFunction("or", *cp.first, *cp.second);
} }
}
return l; return l;
} }
} }
void TableJoin::optimizeDisjuncts() bool operator<(const TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r)
{
return l.key_names_left < r.key_names_left || (l.key_names_left == r.key_names_left && l.key_names_right < r.key_names_right);
}
void TableJoin::optimizeClauses()
{ {
if (clauses.size() > 1) if (clauses.size() > 1)
{ {
std::sort(clauses.begin(), clauses.end(), [](const JoinOnClause & a, const JoinOnClause & b) { std::sort(std::begin(clauses), std::end(clauses));
return a.key_names_left < b.key_names_left || (a.key_names_left == b.key_names_left && a.key_names_left < b.key_names_left); });
auto to_it = clauses.begin(); auto to_it = clauses.begin();
auto from_it = to_it + 1; auto from_it = to_it + 1;
@ -203,18 +203,12 @@ void TableJoin::optimizeDisjuncts()
if (clauses.size() != new_size) if (clauses.size() != new_size)
{ {
LOG_TRACE( LOG_TRACE(
&Poco::Logger::get("TableJoin"), "optimizeDisjuncts trim clauses, new size is {}", new_size); &Poco::Logger::get("TableJoin"), "optimizeClauses trim clauses, size {} => {}", clauses.size(), new_size);
clauses.resize(new_size); clauses.resize(new_size);
} }
} }
} }
/// remember OR's children
void TableJoin::setDisjuncts(Disjuncts&& disjuncts_)
{
disjuncts = std::move(disjuncts_);
}
void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast) void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
{ {
addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast); addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast);

View File

@ -52,7 +52,6 @@ class TableJoin
public: public:
using NameToTypeMap = std::unordered_map<String, DataTypePtr>; using NameToTypeMap = std::unordered_map<String, DataTypePtr>;
using Disjuncts = ASTs;
/// Corresponds to one disjunct /// Corresponds to one disjunct
struct JoinOnClause struct JoinOnClause
@ -120,8 +119,6 @@ private:
ASTs key_asts_left; ASTs key_asts_left;
ASTs key_asts_right; ASTs key_asts_right;
Disjuncts disjuncts;
Clauses clauses; Clauses clauses;
ASTTableJoin table_join; ASTTableJoin table_join;
@ -223,8 +220,8 @@ public:
void resetCollected(); void resetCollected();
void addUsingKey(const ASTPtr & ast); void addUsingKey(const ASTPtr & ast);
void setDisjuncts(Disjuncts &&); void newClauseIfPopulated();
void addDisjunct(const ASTPtr &);
/// if several disjuncts have exactly the same table columns /// if several disjuncts have exactly the same table columns
/// we can eliminate redundant disjuncts ORing filter conditions /// we can eliminate redundant disjuncts ORing filter conditions
/// This is vital for queries like t1.key = t2.key AND (t1.a = 1 OR t2.bb > 2) /// This is vital for queries like t1.key = t2.key AND (t1.a = 1 OR t2.bb > 2)
@ -232,7 +229,7 @@ public:
/// because after DNFing it is (t1.key = t2.key AND t1.a = 1) OR (t1.key = t2.key AND t2.bb > 2) /// because after DNFing it is (t1.key = t2.key AND t1.a = 1) OR (t1.key = t2.key AND t2.bb > 2)
/// and we unable to proceed with mergejoin and have to do deal with extra hashmap for hashjoin. /// and we unable to proceed with mergejoin and have to do deal with extra hashmap for hashjoin.
/// Practically we revert DNFing in this case. /// Practically we revert DNFing in this case.
void optimizeDisjuncts(); void optimizeClauses();
void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast); void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);
/* Conditions for left/right table from JOIN ON section. /* Conditions for left/right table from JOIN ON section.

View File

@ -23,6 +23,7 @@
1 1
1 1
1 1
1
-- --
2 2
2 2

View File

@ -36,7 +36,7 @@ SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key ==
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id == 3; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id == 3;
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.key2 == 'BBB'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.key2 == 'BBB';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key2 == '333'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key2 == '333';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND (t2.key == t2.key2 OR isNull(t2.key2)) AND t1.key == t1.key2 AND t1.key2 == '333'; -- { serverError 48 } SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND (t2.key == t2.key2 OR isNull(t2.key2)) AND t1.key == t1.key2 AND t1.key2 == '333';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_lc as t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key2 == '333'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_lc as t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key2 == '333';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND isNull(t2.key2); SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND isNull(t2.key2);
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND t1.key2 like '33%'; SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND t1.key2 like '33%';