crash fix, style fixes, ASTs moved out of TableJoin in ORs in JOIN

This commit is contained in:
Ilya Golshtein 2021-09-16 00:51:15 +03:00
parent 4c043a0157
commit 8057e052a6
7 changed files with 46 additions and 43 deletions

View File

@ -36,9 +36,17 @@ void CollectJoinOnKeysMatcher::Data::setDisjuncts(const ASTPtr & or_func_ast)
{
const auto * func = or_func_ast->as<ASTFunction>();
const auto * func_args = func->arguments->as<ASTExpressionList>();
TableJoin::Disjuncts v = func_args->children;
ASTs v = func_args->children;
analyzed_join.setDisjuncts(std::move(v));
disjuncts = std::move(v);
}
void CollectJoinOnKeysMatcher::Data::addDisjunct(const ASTPtr & ast)
{
const IAST * addr = ast.get();
if (std::find_if(disjuncts.begin(), disjuncts.end(), [addr](const ASTPtr & ast_){return ast_.get() == addr;}) != disjuncts.end())
analyzed_join.newClauseIfPopulated();
}
void CollectJoinOnKeysMatcher::Data::addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos)
@ -87,7 +95,7 @@ void CollectJoinOnKeysMatcher::Data::asofToJoinKeys()
void CollectJoinOnKeysMatcher::Data::optimize()
{
analyzed_join.optimizeDisjuncts();
analyzed_join.optimizeClauses();
}
void CollectJoinOnKeysMatcher::visit(const ASTIdentifier & ident, const ASTPtr & ast, CollectJoinOnKeysMatcher::Data & data)
@ -107,7 +115,7 @@ void CollectJoinOnKeysMatcher::visit(const ASTFunction & func, const ASTPtr & as
return;
}
data.analyzed_join.addDisjunct(ast);
data.addDisjunct(ast);
if (func.name == "and")
return; /// go into children

View File

@ -47,12 +47,17 @@ public:
const bool is_asof{false};
ASTPtr asof_left_key{};
ASTPtr asof_right_key{};
ASTs disjuncts{};
void addJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos);
void addAsofJoinKeys(const ASTPtr & left_ast, const ASTPtr & right_ast, JoinIdentifierPosPair table_pos,
const ASOF::Inequality & asof_inequality);
void setDisjuncts(const ASTPtr & or_func_ast);
void asofToJoinKeys();
/// remember OR's children
void setDisjuncts(const ASTPtr & or_func_ast);
/// create new disjunct when see a direct child of a previously discovered OR
void addDisjunct(const ASTPtr & ast);
void optimize();
};

View File

@ -3,8 +3,6 @@
#include <unordered_map>
#include <vector>
#include <common/logger_useful.h>
#include <Columns/ColumnConst.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnVector.h>

View File

@ -1,6 +1,5 @@
#include <Interpreters/TableJoin.h>
#include <Common/StringUtils/StringUtils.h>
#include <Core/Block.h>
@ -23,6 +22,7 @@
#include <Storages/StorageJoin.h>
#include <common/logger_useful.h>
#include <algorithm>
namespace DB
@ -32,6 +32,7 @@ namespace ErrorCodes
{
extern const int TYPE_MISMATCH;
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
}
namespace
@ -123,19 +124,13 @@ void TableJoin::addUsingKey(const ASTPtr & ast)
addKey(ast->getColumnName(), renamedRightColumnName(ast->getAliasOrColumnName()), ast);
}
/// create new disjunct when see a direct child of a previously discovered OR
void TableJoin::addDisjunct(const ASTPtr & ast)
void TableJoin::newClauseIfPopulated()
{
const IAST * addr = ast.get();
if (std::find_if(disjuncts.begin(), disjuncts.end(), [addr](const ASTPtr & ast_){return ast_.get() == addr;}) != disjuncts.end())
const auto & clause = clauses.back();
if (!clause.key_names_left.empty() || !clause.key_names_right.empty() ||
clause.on_filter_condition_left || clause.on_filter_condition_right)
{
const auto & clause = clauses.back();
if (!clause.key_names_left.empty() || !clause.key_names_right.empty() ||
clause.on_filter_condition_left || clause.on_filter_condition_right)
{
clauses.emplace_back();
}
clauses.emplace_back();
}
if (getStorageJoin() && clauses.size() > 1)
throw Exception("StorageJoin with ORs is not supported", ErrorCodes::NOT_IMPLEMENTED);
@ -149,11 +144,6 @@ bool operator==(const TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause
return l.key_names_left == r.key_names_left && l.key_names_right == r.key_names_right;
}
bool operator!=(const TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r)
{
return !(l == r);
}
TableJoin::JoinOnClause & operator+=(TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r)
{
for (auto & cp :
@ -163,23 +153,33 @@ TableJoin::JoinOnClause & operator+=(TableJoin::JoinOnClause & l, const TableJoi
if (*cp.first == nullptr)
*cp.first = *cp.second;
else if (const auto * func = (*cp.first)->as<ASTFunction>(); func && func->name == "or")
{
/// already have `or` in condition, just add new argument
func->arguments->children.push_back(*cp.second);
if (*cp.second != nullptr)
func->arguments->children.push_back(*cp.second);
}
else
{
/// already have some conditions, unite it with `or`
*cp.first = makeASTFunction("or", *cp.first, *cp.second);
if (*cp.second != nullptr)
*cp.first = makeASTFunction("or", *cp.first, *cp.second);
}
}
return l;
}
}
void TableJoin::optimizeDisjuncts()
bool operator<(const TableJoin::JoinOnClause & l, const TableJoin::JoinOnClause & r)
{
return l.key_names_left < r.key_names_left || (l.key_names_left == r.key_names_left && l.key_names_right < r.key_names_right);
}
void TableJoin::optimizeClauses()
{
if (clauses.size() > 1)
{
std::sort(clauses.begin(), clauses.end(), [](const JoinOnClause & a, const JoinOnClause & b) {
return a.key_names_left < b.key_names_left || (a.key_names_left == b.key_names_left && a.key_names_left < b.key_names_left); });
std::sort(std::begin(clauses), std::end(clauses));
auto to_it = clauses.begin();
auto from_it = to_it + 1;
@ -203,18 +203,12 @@ void TableJoin::optimizeDisjuncts()
if (clauses.size() != new_size)
{
LOG_TRACE(
&Poco::Logger::get("TableJoin"), "optimizeDisjuncts trim clauses, new size is {}", new_size);
&Poco::Logger::get("TableJoin"), "optimizeClauses trim clauses, size {} => {}", clauses.size(), new_size);
clauses.resize(new_size);
}
}
}
/// remember OR's children
void TableJoin::setDisjuncts(Disjuncts&& disjuncts_)
{
disjuncts = std::move(disjuncts_);
}
void TableJoin::addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast)
{
addKey(left_table_ast->getColumnName(), right_table_ast->getAliasOrColumnName(), left_table_ast, right_table_ast);

View File

@ -52,7 +52,6 @@ class TableJoin
public:
using NameToTypeMap = std::unordered_map<String, DataTypePtr>;
using Disjuncts = ASTs;
/// Corresponds to one disjunct
struct JoinOnClause
@ -120,8 +119,6 @@ private:
ASTs key_asts_left;
ASTs key_asts_right;
Disjuncts disjuncts;
Clauses clauses;
ASTTableJoin table_join;
@ -223,8 +220,8 @@ public:
void resetCollected();
void addUsingKey(const ASTPtr & ast);
void setDisjuncts(Disjuncts &&);
void addDisjunct(const ASTPtr &);
void newClauseIfPopulated();
/// if several disjuncts have exactly the same table columns
/// we can eliminate redundant disjuncts ORing filter conditions
/// This is vital for queries like t1.key = t2.key AND (t1.a = 1 OR t2.bb > 2)
@ -232,7 +229,7 @@ public:
/// because after DNFing it is (t1.key = t2.key AND t1.a = 1) OR (t1.key = t2.key AND t2.bb > 2)
/// and we unable to proceed with mergejoin and have to do deal with extra hashmap for hashjoin.
/// Practically we revert DNFing in this case.
void optimizeDisjuncts();
void optimizeClauses();
void addOnKeys(ASTPtr & left_table_ast, ASTPtr & right_table_ast);
/* Conditions for left/right table from JOIN ON section.

View File

@ -23,6 +23,7 @@
1
1
1
1
--
2
2

View File

@ -36,7 +36,7 @@ SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key ==
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.id == 3;
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t2.key2 == 'BBB';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key2 == '333';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND (t2.key == t2.key2 OR isNull(t2.key2)) AND t1.key == t1.key2 AND t1.key2 == '333'; -- { serverError 48 }
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND (t2.key == t2.key2 OR isNull(t2.key2)) AND t1.key == t1.key2 AND t1.key2 == '333';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_lc as t2 ON t1.id == t2.id AND t2.key == t2.key2 AND t1.key == t1.key2 AND t1.key2 == '333';
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND isNull(t2.key2);
SELECT '333' = t1.key FROM t1 INNER ANY JOIN t2_nullable as t2 ON t1.id == t2.id AND t1.key2 like '33%';