2015-02-18 09:43:36 +00:00
|
|
|
#pragma once
|
|
|
|
|
2017-04-01 09:19:00 +00:00
|
|
|
#include <Parsers/IAST.h>
|
2015-02-18 14:55:11 +00:00
|
|
|
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <map>
|
|
|
|
#include <unordered_map>
|
2017-01-03 07:37:29 +00:00
|
|
|
#include <unordered_set>
|
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2016-03-07 04:35:06 +00:00
|
|
|
struct Settings;
|
2015-02-18 14:55:11 +00:00
|
|
|
class ASTFunction;
|
2015-02-18 09:43:36 +00:00
|
|
|
class ASTSelectQuery;
|
|
|
|
|
2018-10-18 15:03:14 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** This class provides functions for optimizing boolean expressions within queries.
|
2015-02-18 11:57:44 +00:00
|
|
|
*
|
2017-06-02 21:37:28 +00:00
|
|
|
* For simplicity, we call a homogeneous OR-chain any expression having the following structure:
|
2015-02-18 11:57:44 +00:00
|
|
|
* expr = x1 OR ... OR expr = xN
|
2017-06-02 21:37:28 +00:00
|
|
|
* where `expr` is an arbitrary expression and x1, ..., xN are literals of the same type
|
2015-02-18 11:57:44 +00:00
|
|
|
*/
|
2015-02-18 14:55:11 +00:00
|
|
|
class LogicalExpressionsOptimizer final
|
2015-02-18 09:43:36 +00:00
|
|
|
{
|
2018-10-19 15:42:47 +00:00
|
|
|
struct ExtractedSettings
|
|
|
|
{
|
|
|
|
const UInt64 optimize_min_equality_disjunction_chain_length;
|
|
|
|
|
|
|
|
ExtractedSettings(UInt64 optimize_min_equality_disjunction_chain_length_)
|
|
|
|
: optimize_min_equality_disjunction_chain_length(optimize_min_equality_disjunction_chain_length_)
|
|
|
|
{}
|
|
|
|
};
|
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
public:
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Constructor. Accepts the root of the query DAG.
|
2018-10-19 15:42:47 +00:00
|
|
|
LogicalExpressionsOptimizer(ASTSelectQuery * select_query_, ExtractedSettings && settings_);
|
2015-02-18 11:57:44 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Replace all rather long homogeneous OR-chains expr = x1 OR ... OR expr = xN
|
|
|
|
* on the expressions `expr` IN (x1, ..., xN).
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void perform();
|
2015-02-18 09:43:36 +00:00
|
|
|
|
2017-04-01 07:20:54 +00:00
|
|
|
LogicalExpressionsOptimizer(const LogicalExpressionsOptimizer &) = delete;
|
|
|
|
LogicalExpressionsOptimizer & operator=(const LogicalExpressionsOptimizer &) = delete;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
|
|
private:
|
2017-06-02 21:37:28 +00:00
|
|
|
/** The OR function with the expression.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
struct OrWithExpression
|
|
|
|
{
|
2019-03-11 12:49:39 +00:00
|
|
|
OrWithExpression(const ASTFunction * or_function_, const IAST::Hash & expression_, const std::string & alias_);
|
2017-04-01 07:20:54 +00:00
|
|
|
bool operator<(const OrWithExpression & rhs) const;
|
|
|
|
|
2019-03-11 12:49:39 +00:00
|
|
|
const ASTFunction * or_function;
|
2017-04-01 07:20:54 +00:00
|
|
|
const IAST::Hash expression;
|
|
|
|
const std::string alias;
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Equalities
|
|
|
|
{
|
|
|
|
std::vector<ASTFunction *> functions;
|
|
|
|
bool is_processed = false;
|
|
|
|
};
|
|
|
|
|
|
|
|
using DisjunctiveEqualityChainsMap = std::map<OrWithExpression, Equalities>;
|
|
|
|
using DisjunctiveEqualityChain = DisjunctiveEqualityChainsMap::value_type;
|
2015-02-18 09:43:36 +00:00
|
|
|
|
|
|
|
private:
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Collect information about all the equations in the OR chains (not necessarily homogeneous).
|
|
|
|
* This information is grouped by the expression that is on the left side of the equation.
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
void collectDisjunctiveEqualityChains();
|
2015-02-18 11:57:44 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/** Check that the set of equalities expr = x1, ..., expr = xN fulfills the following two requirements:
|
|
|
|
* 1. It's not too small
|
|
|
|
* 2. x1, ... xN have the same type
|
2017-04-01 07:20:54 +00:00
|
|
|
*/
|
|
|
|
bool mayOptimizeDisjunctiveEqualityChain(const DisjunctiveEqualityChain & chain) const;
|
2015-02-18 11:57:44 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Insert the IN expression into the OR chain.
|
2017-04-01 07:20:54 +00:00
|
|
|
void addInExpression(const DisjunctiveEqualityChain & chain);
|
2015-02-19 14:55:47 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Delete the equalities that were replaced by the IN expressions.
|
2017-04-01 07:20:54 +00:00
|
|
|
void cleanupOrExpressions();
|
2015-02-18 11:57:44 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Delete OR expressions that have only one operand.
|
2017-04-01 07:20:54 +00:00
|
|
|
void fixBrokenOrExpressions();
|
2015-02-18 09:43:36 +00:00
|
|
|
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Restore the original column order after optimization.
|
2017-04-01 07:20:54 +00:00
|
|
|
void reorderColumns();
|
2016-03-10 14:24:04 +00:00
|
|
|
|
2015-02-18 16:54:42 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
using ParentNodes = std::vector<IAST *>;
|
2019-03-11 12:49:39 +00:00
|
|
|
using FunctionParentMap = std::unordered_map<const IAST *, ParentNodes>;
|
|
|
|
using ColumnToPosition = std::unordered_map<const IAST *, size_t>;
|
2015-02-18 16:54:42 +00:00
|
|
|
|
2015-02-18 09:43:36 +00:00
|
|
|
private:
|
2017-04-01 07:20:54 +00:00
|
|
|
ASTSelectQuery * select_query;
|
2018-10-19 15:42:47 +00:00
|
|
|
const ExtractedSettings settings;
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Information about the OR-chains inside the query.
|
2017-04-01 07:20:54 +00:00
|
|
|
DisjunctiveEqualityChainsMap disjunctive_equality_chains_map;
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Number of processed OR-chains.
|
2017-04-01 07:20:54 +00:00
|
|
|
size_t processed_count = 0;
|
2017-06-02 21:37:28 +00:00
|
|
|
/// Parents of OR functions.
|
2017-04-01 07:20:54 +00:00
|
|
|
FunctionParentMap or_parent_map;
|
2017-06-02 21:37:28 +00:00
|
|
|
/// The position of each column.
|
2017-04-01 07:20:54 +00:00
|
|
|
ColumnToPosition column_to_position;
|
|
|
|
/// Set of nodes, that was visited.
|
|
|
|
std::unordered_set<void *> visited_nodes;
|
2015-02-18 09:43:36 +00:00
|
|
|
};
|
|
|
|
|
2015-02-18 17:02:51 +00:00
|
|
|
}
|