ClickHouse/dbms/src/Interpreters/LogicalExpressionsOptimizer.h

116 lines
3.6 KiB
C++
Raw Normal View History

#pragma once
#include <Parsers/IAST.h>
#include <string>
#include <vector>
#include <map>
#include <unordered_map>
2017-01-03 07:37:29 +00:00
#include <unordered_set>
namespace DB
{
2016-03-07 04:35:06 +00:00
struct Settings;
class ASTFunction;
class ASTSelectQuery;
2017-06-02 21:37:28 +00:00
/** This class provides functions for optimizing boolean expressions within queries.
*
2017-06-02 21:37:28 +00:00
* For simplicity, we call a homogeneous OR-chain any expression having the following structure:
* expr = x1 OR ... OR expr = xN
2017-06-02 21:37:28 +00:00
* where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type
*/
class LogicalExpressionsOptimizer final
{
2018-10-19 15:42:47 +00:00
struct ExtractedSettings
{
const UInt64 optimize_min_equality_disjunction_chain_length;
ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_)
: optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_)
{}
};
public:
2017-06-02 21:37:28 +00:00
/// Constructor. Accepts the root of the query DAG.
2018-10-19 15:42:47 +00:00
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, ExtractedSettings && settings_);
2017-06-02 21:37:28 +00:00
/** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
* on the expressions `expr` IN (x1, ..., xN).
*/
void perform();
LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete;
LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete;
private:
2017-06-02 21:37:28 +00:00
/** The OR function with the expression.
*/
struct OrWithExpression
{
OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_);
bool operator<(const OrWithExpression & rhs) const;
const ASTFunction * or_function;
const IAST::Hash expression;
const std::string alias;
};
struct Equalities
{
std::vector<ASTFunction *> functions;
bool is_processed = false;
};
using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>;
using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type;
private:
2017-06-02 21:37:28 +00:00
/** Collect information about all the equations in the OR chains (not necessarily homogeneous).
* This information is grouped by the expression that is on the left side of the equation.
*/
void collectDisjunctiveEqualityChains();
2017-06-02 21:37:28 +00:00
/** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements:
* 1. It's not too small
* 2. x1, ... xN have the same type
*/
bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;
2017-06-02 21:37:28 +00:00
/// Insert the IN expression into the OR chain.
void addInExpression(const DisjunctiveEqualityChain & chain);
2017-06-02 21:37:28 +00:00
/// Delete the equalities that were replaced by the IN expressions.
void cleanupOrExpressions();
2017-06-02 21:37:28 +00:00
/// Delete OR expressions that have only one operand.
void fixBrokenOrExpressions();
2017-06-02 21:37:28 +00:00
/// Restore the original column order after optimization.
void reorderColumns();
private:
using ParentNodes = std::vector<IAST *>;
using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>;
using ColumnToPosition = std::unordered_map<const IAST *, size_t>;
private:
ASTSelectQuery * select_query;
2018-10-19 15:42:47 +00:00
const ExtractedSettings settings;
2017-06-02 21:37:28 +00:00
/// Information about the OR-chains inside the query.
DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
2017-06-02 21:37:28 +00:00
/// Number of processed OR-chains.
size_t processed_count = 0;
2017-06-02 21:37:28 +00:00
/// Parents of OR functions.
FunctionParentMap or_parent_map;
2017-06-02 21:37:28 +00:00
/// The position of each column.
ColumnToPosition column_to_position;
/// Set of nodes, that was visited.
std::unordered_set<void *> visited_nodes;
};
}