2020-11-16 14:57:56 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <Core/ColumnsWithTypeAndName.h>
|
|
|
|
#include <Core/NamesAndTypes.h>
|
|
|
|
#include <Core/Names.h>
|
|
|
|
|
|
|
|
#if !defined(ARCADIA_BUILD)
|
|
|
|
# include "config_core.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
|
|
|
class ActionsDAG;
|
|
|
|
using ActionsDAGPtr = std::shared_ptr<ActionsDAG>;
|
|
|
|
|
|
|
|
class IExecutableFunction;
|
|
|
|
using ExecutableFunctionPtr = std::shared_ptr<IExecutableFunction>;
|
|
|
|
|
|
|
|
class IFunctionBase;
|
|
|
|
using FunctionBasePtr = std::shared_ptr<IFunctionBase>;
|
|
|
|
|
|
|
|
class IFunctionOverloadResolver;
|
|
|
|
using FunctionOverloadResolverPtr = std::shared_ptr<IFunctionOverloadResolver>;
|
|
|
|
|
|
|
|
class IDataType;
|
|
|
|
using DataTypePtr = std::shared_ptr<const IDataType>;
|
|
|
|
|
|
|
|
class Context;
|
|
|
|
class CompiledExpressionCache;
|
|
|
|
|
|
|
|
/// Directed acyclic graph of expressions.
|
|
|
|
/// This is an intermediate representation of actions which is usually built from expression list AST.
|
|
|
|
/// Node of DAG describe calculation of a single column with known type, name, and constant value (if applicable).
|
|
|
|
///
|
|
|
|
/// DAG representation is useful in case we need to know explicit dependencies between actions.
|
|
|
|
/// It is helpful when it is needed to optimize actions, remove unused expressions, compile subexpressions,
|
|
|
|
/// split or merge parts of graph, calculate expressions on partial input.
|
|
|
|
///
|
|
|
|
/// Built DAG is used by ExpressionActions, which calculates expressions on block.
|
|
|
|
class ActionsDAG
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
|
|
|
|
enum class ActionType
|
|
|
|
{
|
|
|
|
/// Column which must be in input.
|
|
|
|
INPUT,
|
|
|
|
/// Constant column with known value.
|
|
|
|
COLUMN,
|
|
|
|
/// Another one name for column.
|
|
|
|
ALIAS,
|
|
|
|
/// Function arrayJoin. Specially separated because it changes the number of rows.
|
|
|
|
ARRAY_JOIN,
|
|
|
|
FUNCTION,
|
|
|
|
};
|
|
|
|
|
|
|
|
struct Node
|
|
|
|
{
|
|
|
|
std::vector<Node *> children;
|
|
|
|
|
|
|
|
ActionType type;
|
|
|
|
|
|
|
|
std::string result_name;
|
|
|
|
DataTypePtr result_type;
|
|
|
|
|
|
|
|
FunctionOverloadResolverPtr function_builder;
|
|
|
|
/// Can be used after action was added to ExpressionActions if we want to get function signature or properties like monotonicity.
|
|
|
|
FunctionBasePtr function_base;
|
|
|
|
/// Prepared function which is used in function execution.
|
|
|
|
ExecutableFunctionPtr function;
|
|
|
|
/// If function is a compiled statement.
|
|
|
|
bool is_function_compiled = false;
|
|
|
|
|
|
|
|
/// For COLUMN node and propagated constants.
|
|
|
|
ColumnPtr column;
|
|
|
|
/// Some functions like `ignore()` always return constant but can't be replaced by constant it.
|
|
|
|
/// We calculate such constants in order to avoid unnecessary materialization, but prohibit it's folding.
|
|
|
|
bool allow_constant_folding = true;
|
|
|
|
};
|
|
|
|
|
|
|
|
/// Index is used to:
|
2021-01-13 08:38:19 +00:00
|
|
|
/// * find Node by it's result_name
|
2020-11-16 14:57:56 +00:00
|
|
|
/// * specify order of columns in result
|
|
|
|
/// It represents a set of available columns.
|
|
|
|
/// Removing of column from index is equivalent to removing of column from final result.
|
|
|
|
///
|
|
|
|
/// DAG allows actions with duplicating result names. In this case index will point to last added Node.
|
|
|
|
/// It does not cause any problems as long as execution of actions does not depend on action names anymore.
|
|
|
|
///
|
|
|
|
/// Index is a list of nodes + [map: name -> list::iterator].
|
|
|
|
/// List is ordered, may contain nodes with same names, or one node several times.
|
|
|
|
class Index
|
|
|
|
{
|
|
|
|
private:
|
|
|
|
std::list<Node *> list;
|
|
|
|
/// Map key is a string_view to Node::result_name for node from value.
|
|
|
|
/// Map always point to existing node, so key always valid (nodes live longer then index).
|
|
|
|
std::unordered_map<std::string_view, std::list<Node *>::iterator> map;
|
|
|
|
|
|
|
|
public:
|
|
|
|
auto size() const { return list.size(); }
|
|
|
|
bool contains(std::string_view key) const { return map.count(key) != 0; }
|
|
|
|
|
|
|
|
std::list<Node *>::iterator begin() { return list.begin(); }
|
|
|
|
std::list<Node *>::iterator end() { return list.end(); }
|
|
|
|
std::list<Node *>::const_iterator begin() const { return list.begin(); }
|
|
|
|
std::list<Node *>::const_iterator end() const { return list.end(); }
|
2021-01-18 21:54:01 +00:00
|
|
|
std::list<Node *>::const_reverse_iterator rbegin() const { return list.rbegin(); }
|
|
|
|
std::list<Node *>::const_reverse_iterator rend() const { return list.rend(); }
|
2020-11-16 14:57:56 +00:00
|
|
|
std::list<Node *>::const_iterator find(std::string_view key) const
|
|
|
|
{
|
|
|
|
auto it = map.find(key);
|
|
|
|
if (it == map.end())
|
|
|
|
return list.end();
|
|
|
|
|
|
|
|
return it->second;
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Insert method doesn't check if map already have node with the same name.
|
|
|
|
/// If node with the same name exists, it is removed from map, but not list.
|
|
|
|
/// It is expected and used for project(), when result may have several columns with the same name.
|
|
|
|
void insert(Node * node) { map[node->result_name] = list.emplace(list.end(), node); }
|
2021-01-18 21:54:01 +00:00
|
|
|
void prepend(Node * node) { map[node->result_name] = list.emplace(list.begin(), node); }
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
/// If node with same name exists in index, replace it. Otherwise insert new node to index.
|
|
|
|
void replace(Node * node)
|
|
|
|
{
|
|
|
|
if (auto handle = map.extract(node->result_name))
|
|
|
|
{
|
|
|
|
handle.key() = node->result_name; /// Change string_view
|
|
|
|
*handle.mapped() = node;
|
|
|
|
map.insert(std::move(handle));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
insert(node);
|
|
|
|
}
|
|
|
|
|
2020-11-26 16:16:44 +00:00
|
|
|
void remove(std::list<Node *>::iterator it)
|
|
|
|
{
|
|
|
|
auto map_it = map.find((*it)->result_name);
|
|
|
|
if (map_it != map.end() && map_it->second == it)
|
|
|
|
map.erase(map_it);
|
|
|
|
|
|
|
|
list.erase(it);
|
|
|
|
}
|
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
void swap(Index & other)
|
|
|
|
{
|
|
|
|
list.swap(other.list);
|
|
|
|
map.swap(other.map);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2021-02-26 16:29:56 +00:00
|
|
|
/// NOTE: std::list is an implementation detail.
|
|
|
|
/// It allows to add and remove new nodes inplace without reallocation.
|
|
|
|
/// Raw pointers to nodes remain valid.
|
2020-11-16 14:57:56 +00:00
|
|
|
using Nodes = std::list<Node>;
|
2020-11-17 12:34:31 +00:00
|
|
|
using Inputs = std::vector<Node *>;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
struct ActionsSettings
|
|
|
|
{
|
|
|
|
size_t max_temporary_columns = 0;
|
|
|
|
size_t max_temporary_non_const_columns = 0;
|
|
|
|
size_t min_count_to_compile_expression = 0;
|
|
|
|
bool compile_expressions = false;
|
|
|
|
bool project_input = false;
|
|
|
|
bool projected_output = false;
|
|
|
|
};
|
|
|
|
|
|
|
|
private:
|
|
|
|
Nodes nodes;
|
|
|
|
Index index;
|
2020-11-17 12:34:31 +00:00
|
|
|
Inputs inputs;
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
ActionsSettings settings;
|
|
|
|
|
|
|
|
#if USE_EMBEDDED_COMPILER
|
|
|
|
std::shared_ptr<CompiledExpressionCache> compilation_cache;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
public:
|
|
|
|
ActionsDAG() = default;
|
2020-11-26 16:16:44 +00:00
|
|
|
ActionsDAG(ActionsDAG &&) = default;
|
2020-11-16 14:57:56 +00:00
|
|
|
ActionsDAG(const ActionsDAG &) = delete;
|
|
|
|
ActionsDAG & operator=(const ActionsDAG &) = delete;
|
2020-11-17 12:39:41 +00:00
|
|
|
explicit ActionsDAG(const NamesAndTypesList & inputs_);
|
|
|
|
explicit ActionsDAG(const ColumnsWithTypeAndName & inputs_);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
const Nodes & getNodes() const { return nodes; }
|
|
|
|
const Index & getIndex() const { return index; }
|
2020-11-17 12:34:31 +00:00
|
|
|
const Inputs & getInputs() const { return inputs; }
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
NamesAndTypesList getRequiredColumns() const;
|
|
|
|
ColumnsWithTypeAndName getResultColumns() const;
|
|
|
|
NamesAndTypesList getNamesAndTypesList() const;
|
|
|
|
|
|
|
|
Names getNames() const;
|
|
|
|
std::string dumpNames() const;
|
|
|
|
std::string dumpDAG() const;
|
|
|
|
|
2020-11-17 12:34:31 +00:00
|
|
|
const Node & addInput(std::string name, DataTypePtr type, bool can_replace = false);
|
|
|
|
const Node & addInput(ColumnWithTypeAndName column, bool can_replace = false);
|
2021-02-05 16:35:21 +00:00
|
|
|
const Node & addColumn(ColumnWithTypeAndName column, bool can_replace = false, bool materialize = false);
|
2020-11-16 14:57:56 +00:00
|
|
|
const Node & addAlias(const std::string & name, std::string alias, bool can_replace = false);
|
|
|
|
const Node & addArrayJoin(const std::string & source_name, std::string result_name);
|
|
|
|
const Node & addFunction(
|
|
|
|
const FunctionOverloadResolverPtr & function,
|
|
|
|
const Names & argument_names,
|
|
|
|
std::string result_name,
|
2021-02-05 19:22:11 +00:00
|
|
|
const Context & context,
|
|
|
|
bool can_replace = false);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
/// Call addAlias several times.
|
|
|
|
void addAliases(const NamesWithAliases & aliases);
|
|
|
|
/// Add alias actions and remove unused columns from index. Also specify result columns order in index.
|
|
|
|
void project(const NamesWithAliases & projection);
|
|
|
|
|
|
|
|
/// If column is not in index, try to find it in nodes and insert back into index.
|
|
|
|
bool tryRestoreColumn(const std::string & column_name);
|
2021-02-04 11:44:00 +00:00
|
|
|
/// Find column in result. Remove it from index.
|
|
|
|
/// If columns is in inputs and has no dependent nodes, remove it from inputs too.
|
|
|
|
/// Return true if column was removed from inputs.
|
|
|
|
bool removeUnusedResult(const std::string & column_name);
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
void projectInput() { settings.project_input = true; }
|
|
|
|
void removeUnusedActions(const Names & required_names);
|
|
|
|
|
|
|
|
bool hasArrayJoin() const;
|
2020-11-27 09:50:56 +00:00
|
|
|
bool hasStatefulFunctions() const;
|
2021-01-28 11:00:24 +00:00
|
|
|
bool trivial() const; /// If actions has no functions or array join.
|
2020-11-16 14:57:56 +00:00
|
|
|
|
|
|
|
const ActionsSettings & getSettings() const { return settings; }
|
|
|
|
|
|
|
|
void compileExpressions();
|
|
|
|
|
|
|
|
ActionsDAGPtr clone() const;
|
|
|
|
|
2021-02-04 20:36:50 +00:00
|
|
|
/// For apply materialize() function for every output.
|
|
|
|
/// Also add aliases so the result names remain unchanged.
|
|
|
|
void addMaterializingOutputActions();
|
2020-11-17 14:51:05 +00:00
|
|
|
|
|
|
|
enum class MatchColumnsMode
|
|
|
|
{
|
|
|
|
/// Require same number of columns in source and result. Match columns by corresponding positions, regardless to names.
|
|
|
|
Position,
|
|
|
|
/// Find columns in source by their names. Allow excessive columns in source.
|
|
|
|
Name,
|
|
|
|
};
|
|
|
|
|
2020-11-20 16:52:50 +00:00
|
|
|
/// Create ActionsDAG which converts block structure from source to result.
|
|
|
|
/// It is needed to convert result from different sources to the same structure, e.g. for UNION query.
|
|
|
|
/// Conversion should be possible with only usage of CAST function and renames.
|
2020-11-17 14:51:05 +00:00
|
|
|
static ActionsDAGPtr makeConvertingActions(
|
|
|
|
const ColumnsWithTypeAndName & source,
|
|
|
|
const ColumnsWithTypeAndName & result,
|
|
|
|
MatchColumnsMode mode,
|
|
|
|
bool ignore_constant_values = false); /// Do not check that constants are same. Use value from result_header.
|
|
|
|
|
2021-02-04 14:25:11 +00:00
|
|
|
/// Create expression which add const column and then materialize it.
|
|
|
|
static ActionsDAGPtr makeAddingColumnActions(ColumnWithTypeAndName column);
|
|
|
|
|
2021-01-19 10:03:25 +00:00
|
|
|
/// Create ActionsDAG which represents expression equivalent to applying first and second actions consequently.
|
2020-12-01 11:19:03 +00:00
|
|
|
/// Is used to replace `(first -> second)` expression chain to single `merge(first, second)` expression.
|
|
|
|
/// If first.settings.project_input is set, then outputs of `first` must include inputs of `second`.
|
|
|
|
/// Otherwise, any two actions may be combined.
|
|
|
|
static ActionsDAGPtr merge(ActionsDAG && first, ActionsDAG && second);
|
2020-11-26 16:16:44 +00:00
|
|
|
|
2021-02-04 11:44:00 +00:00
|
|
|
using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
|
|
|
|
|
2021-01-19 10:03:25 +00:00
|
|
|
/// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
|
|
|
|
/// Execution of first then second parts on block is equivalent to execution of initial DAG.
|
|
|
|
/// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal index (outputs).
|
|
|
|
/// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
|
2021-02-04 11:44:00 +00:00
|
|
|
SplitResult split(std::unordered_set<const Node *> split_nodes) const;
|
2021-01-18 14:59:59 +00:00
|
|
|
|
2021-01-19 10:03:25 +00:00
|
|
|
/// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
|
2021-02-04 11:44:00 +00:00
|
|
|
SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
|
2021-01-19 10:03:25 +00:00
|
|
|
|
|
|
|
/// Splits actions into two parts. First part has minimal size sufficient for calculation of column_name.
|
|
|
|
/// Index of initial actions must contain column_name.
|
2021-02-04 11:44:00 +00:00
|
|
|
SplitResult splitActionsForFilter(const std::string & column_name) const;
|
2021-01-19 10:03:25 +00:00
|
|
|
|
2021-02-10 16:26:49 +00:00
|
|
|
/// Create actions which may calculate part of filter using only available_inputs.
|
|
|
|
/// If nothing may be calculated, returns nullptr.
|
|
|
|
/// Otherwise, return actions which inputs are from available_inputs.
|
|
|
|
/// Returned actions add single column which may be used for filter.
|
|
|
|
/// Also, replace some nodes of current inputs to constant 1 in case they are filtered.
|
2021-02-10 17:47:48 +00:00
|
|
|
ActionsDAGPtr splitActionsForFilter(const std::string & filter_name, bool can_remove_filter, const Names & available_inputs);
|
2021-02-10 16:26:49 +00:00
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
private:
|
2021-02-05 16:35:21 +00:00
|
|
|
Node & addNode(Node node, bool can_replace = false, bool add_to_index = true);
|
2020-11-16 14:57:56 +00:00
|
|
|
Node & getNode(const std::string & name);
|
|
|
|
|
2020-11-18 09:08:51 +00:00
|
|
|
Node & addAlias(Node & child, std::string alias, bool can_replace);
|
|
|
|
Node & addFunction(
|
2020-11-17 14:51:05 +00:00
|
|
|
const FunctionOverloadResolverPtr & function,
|
|
|
|
Inputs children,
|
|
|
|
std::string result_name,
|
2021-02-05 16:35:21 +00:00
|
|
|
bool can_replace,
|
|
|
|
bool add_to_index = true);
|
2020-11-17 14:51:05 +00:00
|
|
|
|
2020-11-16 14:57:56 +00:00
|
|
|
ActionsDAGPtr cloneEmpty() const
|
|
|
|
{
|
|
|
|
auto actions = std::make_shared<ActionsDAG>();
|
|
|
|
actions->settings = settings;
|
|
|
|
|
|
|
|
#if USE_EMBEDDED_COMPILER
|
|
|
|
actions->compilation_cache = compilation_cache;
|
|
|
|
#endif
|
|
|
|
return actions;
|
|
|
|
}
|
|
|
|
|
|
|
|
void removeUnusedActions(const std::vector<Node *> & required_nodes);
|
2021-02-10 16:26:49 +00:00
|
|
|
void removeUnusedActions(bool allow_remove_inputs = true);
|
2020-11-16 14:57:56 +00:00
|
|
|
void addAliases(const NamesWithAliases & aliases, std::vector<Node *> & result_nodes);
|
|
|
|
|
|
|
|
void compileFunctions();
|
2021-02-20 16:13:36 +00:00
|
|
|
|
2021-02-20 17:42:06 +00:00
|
|
|
ActionsDAGPtr cloneActionsForConjunction(std::vector<Node *> conjunction);
|
2020-11-16 14:57:56 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
}
|