2017-04-01 09:19:00 +00:00
|
|
|
#include <Interpreters/LogicalExpressionsOptimizer.h>
|
2022-10-25 06:44:09 +00:00
|
|
|
#include <Interpreters/IdentifierSemantic.h>
|
|
|
|
#include <DataTypes/DataTypeLowCardinality.h>
|
2019-03-22 12:08:30 +00:00
|
|
|
#include <Core/Settings.h>
|
2015-02-18 09:43:36 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/ASTFunction.h>
|
|
|
|
#include <Parsers/ASTSelectQuery.h>
|
|
|
|
#include <Parsers/ASTLiteral.h>
|
2022-10-25 06:44:09 +00:00
|
|
|
#include <Parsers/ASTIdentifier.h>
|
2015-02-18 09:43:36 +00:00
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Common/typeid_cast.h>
|
2016-11-20 12:43:20 +00:00
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
#include <deque>
|
2022-10-25 06:44:09 +00:00
|
|
|
#include <vector>
|
2015-02-18 09:43:36 +00:00
|
|
|
|
2022-01-30 19:49:48 +00:00
|
|
|
#include <base/sort.h>
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-01-11 21:46:36 +00:00
|
|
|
namespace ErrorCodes
|
|
|
|
{
|
|
|
|
extern const int LOGICAL_ERROR;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2019-03-11 12:49:39 +00:00
|
|
|
LogicalExpressionsOptimizer::OrWithExpression::OrWithExpression(const ASTFunction * or_function_,
|
2017-01-25 01:53:29 +00:00
|
|
|
const IAST::Hash & expression_, const std::string & alias_)
|
2016-03-10 14:24:04 +00:00
|
|
|
: or_function(or_function_), expression(expression_), alias(alias_)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-02-20 11:33:21 +00:00
|
|
|
bool LogicalExpressionsOptimizer::OrWithExpression::operator<(const OrWithExpression & rhs) const
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-25 14:23:15 +00:00
|
|
|
return std::tie(this->or_function, this->expression) < std::tie(rhs.or_function, rhs.expression);
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
|
|
|
|
2022-10-25 06:44:09 +00:00
|
|
|
LogicalExpressionsOptimizer::LogicalExpressionsOptimizer(ASTSelectQuery * select_query_,
|
|
|
|
const TablesWithColumns & tables_with_columns_, UInt64 optimize_min_equality_disjunction_chain_length)
|
|
|
|
: select_query(select_query_), tables_with_columns(tables_with_columns_), settings(optimize_min_equality_disjunction_chain_length)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2015-09-29 14:40:23 +00:00
|
|
|
void LogicalExpressionsOptimizer::perform()
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-09-29 14:40:23 +00:00
|
|
|
if (select_query == nullptr)
|
|
|
|
return;
|
2022-04-18 10:18:43 +00:00
|
|
|
if (visited_nodes.contains(select_query))
|
2015-02-18 09:43:36 +00:00
|
|
|
return;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-03-10 14:24:04 +00:00
|
|
|
size_t position = 0;
|
2019-04-09 14:22:35 +00:00
|
|
|
for (auto & column : select_query->select()->children)
|
2016-03-10 14:24:04 +00:00
|
|
|
{
|
|
|
|
bool inserted = column_to_position.emplace(column.get(), position).second;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-01-12 00:26:39 +00:00
|
|
|
/// Do not run, if AST was already converted to DAG.
|
|
|
|
/// TODO This is temporary solution. We must completely eliminate conversion of AST to DAG.
|
|
|
|
/// (see ExpressionAnalyzer::normalizeTree)
|
2016-03-10 14:24:04 +00:00
|
|
|
if (!inserted)
|
2017-01-12 00:26:39 +00:00
|
|
|
return;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-03-10 14:24:04 +00:00
|
|
|
++position;
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
collectDisjunctiveEqualityChains();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
for (auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-18 13:53:19 +00:00
|
|
|
if (!mayOptimizeDisjunctiveEqualityChain(chain))
|
2015-02-18 09:43:36 +00:00
|
|
|
continue;
|
2015-02-19 14:55:47 +00:00
|
|
|
addInExpression(chain);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
auto & equalities = chain.second;
|
|
|
|
equalities.is_processed = true;
|
2015-02-20 10:37:55 +00:00
|
|
|
++processed_count;
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-20 10:37:55 +00:00
|
|
|
if (processed_count > 0)
|
|
|
|
{
|
|
|
|
cleanupOrExpressions();
|
|
|
|
fixBrokenOrExpressions();
|
2016-03-10 14:24:04 +00:00
|
|
|
reorderColumns();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void LogicalExpressionsOptimizer::reorderColumns()
|
|
|
|
{
|
2019-04-09 14:22:35 +00:00
|
|
|
auto & columns = select_query->select()->children;
|
2016-03-10 14:24:04 +00:00
|
|
|
size_t cur_position = 0;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-03-10 14:24:04 +00:00
|
|
|
while (cur_position < columns.size())
|
|
|
|
{
|
|
|
|
size_t expected_position = column_to_position.at(columns[cur_position].get());
|
|
|
|
if (cur_position != expected_position)
|
|
|
|
std::swap(columns[cur_position], columns[expected_position]);
|
|
|
|
else
|
|
|
|
++cur_position;
|
2015-02-20 10:37:55 +00:00
|
|
|
}
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void LogicalExpressionsOptimizer::collectDisjunctiveEqualityChains()
|
|
|
|
{
|
2022-04-18 10:18:43 +00:00
|
|
|
if (visited_nodes.contains(select_query))
|
2015-09-18 13:36:10 +00:00
|
|
|
return;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 15:11:47 +00:00
|
|
|
using Edge = std::pair<IAST *, IAST *>;
|
2015-02-18 09:43:36 +00:00
|
|
|
std::deque<Edge> to_visit;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-09-29 14:40:23 +00:00
|
|
|
to_visit.emplace_back(nullptr, select_query);
|
2015-02-18 09:43:36 +00:00
|
|
|
while (!to_visit.empty())
|
|
|
|
{
|
2015-02-18 22:13:54 +00:00
|
|
|
auto edge = to_visit.back();
|
2020-04-22 06:01:33 +00:00
|
|
|
auto * from_node = edge.first;
|
|
|
|
auto * to_node = edge.second;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 22:13:54 +00:00
|
|
|
to_visit.pop_back();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 13:53:19 +00:00
|
|
|
bool found_chain = false;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2019-03-11 13:22:51 +00:00
|
|
|
auto * function = to_node->as<ASTFunction>();
|
2023-03-12 11:06:29 +00:00
|
|
|
/// Optimization does not respect aliases properly, which can lead to MULTIPLE_EXPRESSION_FOR_ALIAS error.
|
|
|
|
/// Disable it if an expression has an alias. Proper implementation is done with the new analyzer.
|
2023-03-10 20:14:35 +00:00
|
|
|
if (function && function->alias.empty() && function->name == "or" && function->children.size() == 1)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * expression_list = function->children[0]->as<ASTExpressionList>();
|
2019-03-11 12:49:39 +00:00
|
|
|
if (expression_list)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/// The chain of elements of the OR expression.
|
2020-04-22 06:01:33 +00:00
|
|
|
for (const auto & child : expression_list->children)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
auto * equals = child->as<ASTFunction>();
|
2023-03-10 20:14:35 +00:00
|
|
|
if (equals && equals->alias.empty() && equals->name == "equals" && equals->children.size() == 1)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * equals_expression_list = equals->children[0]->as<ASTExpressionList>();
|
2019-03-11 12:49:39 +00:00
|
|
|
if (equals_expression_list && equals_expression_list->children.size() == 2)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Equality expr = xN.
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * literal = equals_expression_list->children[1]->as<ASTLiteral>();
|
2023-03-10 20:14:35 +00:00
|
|
|
if (literal && literal->alias.empty())
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2017-01-25 01:53:29 +00:00
|
|
|
auto expr_lhs = equals_expression_list->children[0]->getTreeHash();
|
2016-03-10 14:24:04 +00:00
|
|
|
OrWithExpression or_with_expression{function, expr_lhs, function->tryGetAlias()};
|
2015-02-19 14:55:47 +00:00
|
|
|
disjunctive_equality_chains_map[or_with_expression].functions.push_back(equals);
|
2015-02-18 13:53:19 +00:00
|
|
|
found_chain = true;
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-01-03 07:37:29 +00:00
|
|
|
visited_nodes.insert(to_node);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 22:13:54 +00:00
|
|
|
if (found_chain)
|
|
|
|
{
|
|
|
|
if (from_node != nullptr)
|
|
|
|
{
|
|
|
|
auto res = or_parent_map.insert(std::make_pair(function, ParentNodes{from_node}));
|
|
|
|
if (!res.second)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: parent node information is corrupted");
|
2015-02-18 22:13:54 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2015-02-18 09:43:36 +00:00
|
|
|
for (auto & child : to_node->children)
|
2015-02-18 22:13:54 +00:00
|
|
|
{
|
2019-03-11 13:22:51 +00:00
|
|
|
if (!child->as<ASTSelectQuery>())
|
2015-02-18 22:13:54 +00:00
|
|
|
{
|
2022-04-18 10:18:43 +00:00
|
|
|
if (!visited_nodes.contains(child.get()))
|
2015-02-18 22:13:54 +00:00
|
|
|
to_visit.push_back(Edge(to_node, &*child));
|
|
|
|
else
|
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/// If the node is an OR function, update the information about its parents.
|
2015-02-18 22:13:54 +00:00
|
|
|
auto it = or_parent_map.find(&*child);
|
|
|
|
if (it != or_parent_map.end())
|
|
|
|
{
|
|
|
|
auto & parent_nodes = it->second;
|
|
|
|
parent_nodes.push_back(to_node);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 18:18:37 +00:00
|
|
|
for (auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-18 12:11:16 +00:00
|
|
|
auto & equalities = chain.second;
|
2015-02-19 14:55:47 +00:00
|
|
|
auto & equality_functions = equalities.functions;
|
2022-01-30 19:49:48 +00:00
|
|
|
::sort(equality_functions.begin(), equality_functions.end());
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-02-20 14:22:33 +00:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
|
2019-03-11 12:49:39 +00:00
|
|
|
inline ASTs & getFunctionOperands(const ASTFunction * or_function)
|
2015-02-20 14:22:33 +00:00
|
|
|
{
|
2019-03-11 12:49:39 +00:00
|
|
|
return or_function->children[0]->children;
|
2015-02-20 14:22:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
2022-10-25 06:44:09 +00:00
|
|
|
bool LogicalExpressionsOptimizer::isLowCardinalityEqualityChain(const std::vector<ASTFunction *> & functions) const
|
|
|
|
{
|
|
|
|
if (functions.size() > 1)
|
|
|
|
{
|
|
|
|
/// Check if identifier is LowCardinality type
|
|
|
|
auto & first_operands = getFunctionOperands(functions[0]);
|
|
|
|
const auto * identifier = first_operands[0]->as<ASTIdentifier>();
|
|
|
|
if (identifier)
|
|
|
|
{
|
|
|
|
auto pos = IdentifierSemantic::getMembership(*identifier);
|
|
|
|
if (!pos)
|
|
|
|
pos = IdentifierSemantic::chooseTableColumnMatch(*identifier, tables_with_columns, true);
|
|
|
|
if (pos)
|
|
|
|
{
|
|
|
|
if (auto data_type_and_name = tables_with_columns[*pos].columns.tryGetByName(identifier->shortName()))
|
|
|
|
{
|
|
|
|
if (typeid_cast<const DataTypeLowCardinality *>(data_type_and_name->type.get()))
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2015-02-18 13:53:19 +00:00
|
|
|
bool LogicalExpressionsOptimizer::mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2018-11-26 00:56:50 +00:00
|
|
|
const auto & equalities = chain.second;
|
2015-02-19 14:55:47 +00:00
|
|
|
const auto & equality_functions = equalities.functions;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2023-03-10 20:14:35 +00:00
|
|
|
if (settings.optimize_min_equality_disjunction_chain_length == 0)
|
|
|
|
return false;
|
|
|
|
|
2022-11-02 06:21:16 +00:00
|
|
|
/// For LowCardinality column, the dict is usually smaller and the index is relatively large.
|
|
|
|
/// In most cases, merging OR-chain as IN is better than converting each LowCardinality into full column individually.
|
|
|
|
/// For non-LowCardinality, we need to eliminate too short chains.
|
2022-10-25 06:44:09 +00:00
|
|
|
if (equality_functions.size() < settings.optimize_min_equality_disjunction_chain_length &&
|
|
|
|
!isLowCardinalityEqualityChain(equality_functions))
|
2015-02-18 09:43:36 +00:00
|
|
|
return false;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// We check that the right-hand sides of all equalities have the same type.
|
2015-02-20 14:22:33 +00:00
|
|
|
auto & first_operands = getFunctionOperands(equality_functions[0]);
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * first_literal = first_operands[1]->as<ASTLiteral>();
|
2015-02-20 14:27:05 +00:00
|
|
|
for (size_t i = 1; i < equality_functions.size(); ++i)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-20 14:27:05 +00:00
|
|
|
auto & operands = getFunctionOperands(equality_functions[i]);
|
2019-03-11 13:22:51 +00:00
|
|
|
const auto * literal = operands[1]->as<ASTLiteral>();
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-12-24 18:35:01 +00:00
|
|
|
if (literal->value.getType() != first_literal->value.getType())
|
2015-02-18 09:43:36 +00:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
void LogicalExpressionsOptimizer::addInExpression(const DisjunctiveEqualityChain & chain)
|
2015-02-18 14:55:11 +00:00
|
|
|
{
|
|
|
|
const auto & or_with_expression = chain.first;
|
|
|
|
const auto & equalities = chain.second;
|
2015-02-19 14:55:47 +00:00
|
|
|
const auto & equality_functions = equalities.functions;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// 1. Create a new IN expression based on information from the OR-chain.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-09-06 13:54:26 +00:00
|
|
|
/// Construct a tuple of literals `x1, ..., xN` from the string `expr = x1 OR ... OR expr = xN`
|
|
|
|
|
|
|
|
Tuple tuple;
|
|
|
|
tuple.reserve(equality_functions.size());
|
|
|
|
|
2020-04-22 06:01:33 +00:00
|
|
|
for (const auto * function : equality_functions)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-20 14:22:33 +00:00
|
|
|
const auto & operands = getFunctionOperands(function);
|
2021-09-06 13:54:26 +00:00
|
|
|
tuple.push_back(operands[1]->as<ASTLiteral>()->value);
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Sort the literals so that they are specified in the same order in the IN expression.
|
2022-01-30 19:49:48 +00:00
|
|
|
::sort(tuple.begin(), tuple.end());
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Get the expression `expr` from the chain `expr = x1 OR ... OR expr = xN`
|
2015-02-18 16:54:42 +00:00
|
|
|
ASTPtr equals_expr_lhs;
|
|
|
|
{
|
2020-04-22 06:01:33 +00:00
|
|
|
auto * function = equality_functions[0];
|
2015-02-20 14:22:33 +00:00
|
|
|
const auto & operands = getFunctionOperands(function);
|
|
|
|
equals_expr_lhs = operands[0];
|
2015-02-18 16:54:42 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2021-09-06 13:54:26 +00:00
|
|
|
auto tuple_literal = std::make_shared<ASTLiteral>(std::move(tuple));
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-05-28 15:42:22 +00:00
|
|
|
ASTPtr expression_list = std::make_shared<ASTExpressionList>();
|
2015-02-18 16:54:42 +00:00
|
|
|
expression_list->children.push_back(equals_expr_lhs);
|
2021-09-06 13:54:26 +00:00
|
|
|
expression_list->children.push_back(tuple_literal);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Construct the expression `expr IN (x1, ..., xN)`
|
2016-05-28 15:42:22 +00:00
|
|
|
auto in_function = std::make_shared<ASTFunction>();
|
2015-02-18 09:43:36 +00:00
|
|
|
in_function->name = "in";
|
2015-02-18 16:54:42 +00:00
|
|
|
in_function->arguments = expression_list;
|
2015-02-18 09:43:36 +00:00
|
|
|
in_function->children.push_back(in_function->arguments);
|
2016-03-10 14:24:04 +00:00
|
|
|
in_function->setAlias(or_with_expression.alias);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// 2. Insert the new IN expression.
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-20 14:22:33 +00:00
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-02-18 14:55:11 +00:00
|
|
|
operands.push_back(in_function);
|
2015-02-19 14:55:47 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void LogicalExpressionsOptimizer::cleanupOrExpressions()
|
|
|
|
{
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Saves for each optimized OR-chain the iterator on the first element
|
|
|
|
/// list of operands to be deleted.
|
2019-03-11 12:49:39 +00:00
|
|
|
std::unordered_map<const ASTFunction *, ASTs::iterator> garbage_map;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Initialization.
|
2015-02-20 10:53:26 +00:00
|
|
|
garbage_map.reserve(processed_count);
|
2015-02-19 14:55:47 +00:00
|
|
|
for (const auto & chain : disjunctive_equality_chains_map)
|
|
|
|
{
|
|
|
|
if (!chain.second.is_processed)
|
|
|
|
continue;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
const auto & or_with_expression = chain.first;
|
2015-02-20 14:22:33 +00:00
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-09-29 14:40:23 +00:00
|
|
|
garbage_map.emplace(or_with_expression.or_function, operands.end());
|
2015-02-19 14:55:47 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Collect garbage.
|
2015-02-19 14:55:47 +00:00
|
|
|
for (const auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 12:11:16 +00:00
|
|
|
{
|
2015-02-19 14:55:47 +00:00
|
|
|
const auto & equalities = chain.second;
|
|
|
|
if (!equalities.is_processed)
|
|
|
|
continue;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
const auto & or_with_expression = chain.first;
|
2015-02-20 14:22:33 +00:00
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2015-02-19 14:55:47 +00:00
|
|
|
const auto & equality_functions = equalities.functions;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-19 14:55:47 +00:00
|
|
|
auto it = garbage_map.find(or_with_expression.or_function);
|
|
|
|
if (it == garbage_map.end())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: garbage map is corrupted");
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-20 10:53:26 +00:00
|
|
|
auto & first_erased = it->second;
|
|
|
|
first_erased = std::remove_if(operands.begin(), first_erased, [&](const ASTPtr & operand)
|
2015-02-19 14:55:47 +00:00
|
|
|
{
|
|
|
|
return std::binary_search(equality_functions.begin(), equality_functions.end(), &*operand);
|
|
|
|
});
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// Delete garbage.
|
2015-02-19 14:55:47 +00:00
|
|
|
for (const auto & entry : garbage_map)
|
|
|
|
{
|
2020-04-22 06:01:33 +00:00
|
|
|
const auto * function = entry.first;
|
2022-10-18 09:40:12 +00:00
|
|
|
auto * first_erased = entry.second;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-20 14:22:33 +00:00
|
|
|
auto & operands = getFunctionOperands(function);
|
2015-02-20 10:53:26 +00:00
|
|
|
operands.erase(first_erased, operands.end());
|
2015-02-19 14:55:47 +00:00
|
|
|
}
|
2015-02-18 11:57:44 +00:00
|
|
|
}
|
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
void LogicalExpressionsOptimizer::fixBrokenOrExpressions()
|
|
|
|
{
|
2015-02-18 18:18:37 +00:00
|
|
|
for (const auto & chain : disjunctive_equality_chains_map)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-20 10:37:55 +00:00
|
|
|
const auto & equalities = chain.second;
|
|
|
|
if (!equalities.is_processed)
|
|
|
|
continue;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 14:55:11 +00:00
|
|
|
const auto & or_with_expression = chain.first;
|
2020-04-22 06:01:33 +00:00
|
|
|
const auto * or_function = or_with_expression.or_function;
|
2015-02-20 14:22:33 +00:00
|
|
|
auto & operands = getFunctionOperands(or_with_expression.or_function);
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 13:16:59 +00:00
|
|
|
if (operands.size() == 1)
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2015-02-18 14:55:11 +00:00
|
|
|
auto it = or_parent_map.find(or_function);
|
|
|
|
if (it == or_parent_map.end())
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: parent node information is corrupted");
|
2015-02-18 13:16:59 +00:00
|
|
|
auto & parents = it->second;
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2016-03-10 14:24:04 +00:00
|
|
|
auto it2 = column_to_position.find(or_function);
|
|
|
|
if (it2 != column_to_position.end())
|
|
|
|
{
|
|
|
|
size_t position = it2->second;
|
|
|
|
bool inserted = column_to_position.emplace(operands[0].get(), position).second;
|
|
|
|
if (!inserted)
|
2023-01-23 21:13:58 +00:00
|
|
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "LogicalExpressionsOptimizer: internal error");
|
2016-03-10 14:24:04 +00:00
|
|
|
column_to_position.erase(it2);
|
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
for (auto & parent : parents)
|
|
|
|
{
|
2018-07-17 06:27:28 +00:00
|
|
|
// The order of children matters if or is children of some function, e.g. minus
|
|
|
|
std::replace_if(parent->children.begin(), parent->children.end(),
|
2018-11-24 01:48:06 +00:00
|
|
|
[or_function](const ASTPtr & ptr) { return ptr.get() == or_function; },
|
|
|
|
operands[0]);
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
2017-04-01 07:20:54 +00:00
|
|
|
|
2017-04-02 17:37:49 +00:00
|
|
|
/// If the OR node was the root of the WHERE, PREWHERE, or HAVING expression, then update this root.
|
|
|
|
/// Due to the fact that we are dealing with a directed acyclic graph, we must check all cases.
|
2019-04-09 14:22:35 +00:00
|
|
|
if (select_query->where() && (or_function == &*(select_query->where())))
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::WHERE, operands[0]->clone());
|
|
|
|
if (select_query->prewhere() && (or_function == &*(select_query->prewhere())))
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::PREWHERE, operands[0]->clone());
|
|
|
|
if (select_query->having() && (or_function == &*(select_query->having())))
|
|
|
|
select_query->setExpression(ASTSelectQuery::Expression::HAVING, operands[0]->clone());
|
2015-02-18 09:43:36 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|