mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-17 21:24:28 +00:00
Refactor ExpressionActions.
This commit is contained in:
parent
fa8ed65c4d
commit
07c2730169
@ -156,7 +156,7 @@ private:
|
|||||||
/// This is needed to allow function execution over data.
|
/// This is needed to allow function execution over data.
|
||||||
/// It is safe because functions does not change column names, so index is unaffected.
|
/// It is safe because functions does not change column names, so index is unaffected.
|
||||||
/// It is temporary.
|
/// It is temporary.
|
||||||
friend struct ExpressionAction;
|
friend class ExpressionActions;
|
||||||
friend class ActionsDAG;
|
friend class ActionsDAG;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -523,7 +523,7 @@ ExpressionActions::ExpressionActions(const ColumnsWithTypeAndName & input_column
|
|||||||
|
|
||||||
ExpressionActions::~ExpressionActions() = default;
|
ExpressionActions::~ExpressionActions() = default;
|
||||||
|
|
||||||
void ExpressionActions::checkLimits(Block & block) const
|
void ExpressionActions::checkLimits(ExecutionContext & execution_context) const
|
||||||
{
|
{
|
||||||
if (settings.max_temporary_columns && block.columns() > settings.max_temporary_columns)
|
if (settings.max_temporary_columns && block.columns() > settings.max_temporary_columns)
|
||||||
throw Exception("Too many temporary columns: " + block.dumpNames()
|
throw Exception("Too many temporary columns: " + block.dumpNames()
|
||||||
@ -551,62 +551,6 @@ void ExpressionActions::checkLimits(Block & block) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExpressionActions::addInput(const ColumnWithTypeAndName & column)
|
|
||||||
{
|
|
||||||
input_columns.emplace_back(column.name, column.type);
|
|
||||||
sample_block.insert(column);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ExpressionActions::addInput(const NameAndTypePair & column)
|
|
||||||
{
|
|
||||||
addInput(ColumnWithTypeAndName(nullptr, column.type, column.name));
|
|
||||||
}
|
|
||||||
|
|
||||||
void ExpressionActions::add(const ExpressionAction & action, Names & out_new_columns)
|
|
||||||
{
|
|
||||||
addImpl(action, out_new_columns);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ExpressionActions::add(const ExpressionAction & action)
|
|
||||||
{
|
|
||||||
Names new_names;
|
|
||||||
addImpl(action, new_names);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ExpressionActions::addImpl(ExpressionAction action, Names & new_names)
|
|
||||||
{
|
|
||||||
if (!action.result_name.empty())
|
|
||||||
new_names.push_back(action.result_name);
|
|
||||||
|
|
||||||
/// Compiled functions are custom functions and they don't need building
|
|
||||||
if (action.type == ExpressionAction::APPLY_FUNCTION && !action.is_function_compiled)
|
|
||||||
{
|
|
||||||
if (sample_block.has(action.result_name))
|
|
||||||
throw Exception("Column '" + action.result_name + "' already exists", ErrorCodes::DUPLICATE_COLUMN);
|
|
||||||
|
|
||||||
ColumnsWithTypeAndName arguments(action.argument_names.size());
|
|
||||||
for (size_t i = 0; i < action.argument_names.size(); ++i)
|
|
||||||
{
|
|
||||||
if (!sample_block.has(action.argument_names[i]))
|
|
||||||
throw Exception("Unknown identifier: '" + action.argument_names[i] + "'", ErrorCodes::UNKNOWN_IDENTIFIER);
|
|
||||||
arguments[i] = sample_block.getByName(action.argument_names[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!action.function_base)
|
|
||||||
{
|
|
||||||
action.function_base = action.function_builder->build(arguments);
|
|
||||||
action.result_type = action.function_base->getReturnType();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (action.type == ExpressionAction::ADD_ALIASES)
|
|
||||||
for (const auto & name_with_alias : action.projection)
|
|
||||||
new_names.emplace_back(name_with_alias.second);
|
|
||||||
|
|
||||||
action.prepare(sample_block, settings, names_not_for_constant_folding);
|
|
||||||
actions.push_back(action);
|
|
||||||
}
|
|
||||||
|
|
||||||
void ExpressionActions::prependProjectInput()
|
void ExpressionActions::prependProjectInput()
|
||||||
{
|
{
|
||||||
actions.insert(actions.begin(), ExpressionAction::project(getRequiredColumns()));
|
actions.insert(actions.begin(), ExpressionAction::project(getRequiredColumns()));
|
||||||
@ -614,12 +558,33 @@ void ExpressionActions::prependProjectInput()
|
|||||||
|
|
||||||
void ExpressionActions::execute(Block & block, bool dry_run) const
|
void ExpressionActions::execute(Block & block, bool dry_run) const
|
||||||
{
|
{
|
||||||
|
ExecutionContext execution_context
|
||||||
|
{
|
||||||
|
.input_columns = block.data,
|
||||||
|
.num_rows = block.rows(),
|
||||||
|
};
|
||||||
|
|
||||||
|
execution_context.columns.reserve(num_columns);
|
||||||
|
|
||||||
|
ColumnNumbers inputs_to_remove;
|
||||||
|
inputs_to_remove.reserve(required_columns.size());
|
||||||
|
for (const auto & column : required_columns)
|
||||||
|
{
|
||||||
|
size_t pos = block.getPositionByName(column.name);
|
||||||
|
execution_context.columns.emplace_back(std::move(block.getByPosition(pos)));
|
||||||
|
|
||||||
|
if (!sample_block.has(column.name))
|
||||||
|
inputs_to_remove.emplace_back(pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
execution_context.columns.resize(num_columns);
|
||||||
|
|
||||||
for (const auto & action : actions)
|
for (const auto & action : actions)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
action.execute(block, dry_run);
|
executeAction(action, execution_context, dry_run);
|
||||||
checkLimits(block);
|
checkLimits(execution_context);
|
||||||
}
|
}
|
||||||
catch (Exception & e)
|
catch (Exception & e)
|
||||||
{
|
{
|
||||||
@ -627,113 +592,112 @@ void ExpressionActions::execute(Block & block, bool dry_run) const
|
|||||||
throw;
|
throw;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::sort(inputs_to_remove.rbegin(), inputs_to_remove.rend());
|
||||||
|
for (auto input : inputs_to_remove)
|
||||||
|
block.erase(input);
|
||||||
|
|
||||||
|
for (const auto & action : actions)
|
||||||
|
{
|
||||||
|
if (!action.is_used_in_result)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
auto & column = execution_context.columns[action.result_position];
|
||||||
|
column.name = action.node->result_name;
|
||||||
|
|
||||||
|
if (block.has(action.node->result_name))
|
||||||
|
block.getByName(action.node->result_name) = std::move(column);
|
||||||
|
else
|
||||||
|
block.insert(std::move(column));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExpressionActions::executeAction(const Action & action, ExecutionContext & execution_context, bool dry_run)
|
void ExpressionActions::executeAction(const Action & action, ExecutionContext & execution_context, bool dry_run)
|
||||||
{
|
{
|
||||||
|
auto & columns = execution_context.columns;
|
||||||
|
auto & num_rows = execution_context.num_rows;
|
||||||
|
|
||||||
switch (action.node->type)
|
switch (action.node->type)
|
||||||
{
|
{
|
||||||
case ActionsDAG::Type::FUNCTION:
|
case ActionsDAG::Type::FUNCTION:
|
||||||
{
|
{
|
||||||
ColumnNumbers arguments(argument_names.size());
|
auto & res_column = columns[action.result_position];
|
||||||
for (size_t i = 0; i < argument_names.size(); ++i)
|
if (res_column.type || res_column.column)
|
||||||
arguments[i] = block.getPositionByName(argument_names[i]);
|
throw Exception("Result column is not empty", ErrorCodes::LOGICAL_ERROR);
|
||||||
|
|
||||||
size_t num_columns_without_result = block.columns();
|
res_column.type = action.node->result_type;
|
||||||
block.insert({ nullptr, result_type, result_name});
|
/// Columns names are not used, avoid extra copy.
|
||||||
|
/// res_column.name = action.node->result_name;
|
||||||
|
|
||||||
ProfileEvents::increment(ProfileEvents::FunctionExecute);
|
ProfileEvents::increment(ProfileEvents::FunctionExecute);
|
||||||
if (is_function_compiled)
|
if (action.node->is_function_compiled)
|
||||||
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
|
ProfileEvents::increment(ProfileEvents::CompiledFunctionExecute);
|
||||||
function->execute(block, arguments, num_columns_without_result, input_rows_count, dry_run);
|
action.node->function->execute(columns, action.arguments, action.result_position, num_rows, dry_run);
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ARRAY_JOIN:
|
case ActionsDAG::Type::ARRAY_JOIN:
|
||||||
{
|
{
|
||||||
auto source = block.getByName(source_name);
|
size_t array_join_key_pos = action.arguments.front();
|
||||||
block.erase(source_name);
|
auto array_join_key = columns[array_join_key_pos];
|
||||||
source.column = source.column->convertToFullColumnIfConst();
|
|
||||||
|
|
||||||
const ColumnArray * array = typeid_cast<const ColumnArray *>(source.column.get());
|
/// Remove array join argument in advance if it is not needed.
|
||||||
|
if (!action.to_remove.empty())
|
||||||
|
columns[array_join_key_pos] = {};
|
||||||
|
|
||||||
|
array_join_key.column = array_join_key.column->convertToFullColumnIfConst();
|
||||||
|
|
||||||
|
const ColumnArray * array = typeid_cast<const ColumnArray *>(array_join_key.column.get());
|
||||||
if (!array)
|
if (!array)
|
||||||
throw Exception("ARRAY JOIN of not array: " + source_name, ErrorCodes::TYPE_MISMATCH);
|
throw Exception("ARRAY JOIN of not array: " + action.node->result_name, ErrorCodes::TYPE_MISMATCH);
|
||||||
|
|
||||||
for (auto & column : block)
|
for (auto & column : columns)
|
||||||
|
if (column.column)
|
||||||
column.column = column.column->replicate(array->getOffsets());
|
column.column = column.column->replicate(array->getOffsets());
|
||||||
|
|
||||||
source.column = array->getDataPtr();
|
for (auto & column : execution_context.input_columns)
|
||||||
source.type = assert_cast<const DataTypeArray &>(*source.type).getNestedType();
|
if (column.column)
|
||||||
source.name = result_name;
|
column.column = column.column->replicate(array->getOffsets());
|
||||||
|
|
||||||
block.insert(std::move(source));
|
auto & res_column = columns[action.result_position];
|
||||||
|
|
||||||
|
res_column.column = array->getDataPtr();
|
||||||
|
res_column.type = assert_cast<const DataTypeArray &>(*array_join_key.type).getNestedType();
|
||||||
|
|
||||||
|
num_rows = res_column.column->size();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case PROJECT:
|
case ActionsDAG::Type::COLUMN:
|
||||||
{
|
{
|
||||||
Block new_block;
|
auto & res_column = columns[action.result_position];
|
||||||
|
res_column.column = action.node->column->cloneResized(num_rows);
|
||||||
for (const auto & elem : projection)
|
res_column.type = action.node->result_type;
|
||||||
{
|
|
||||||
const std::string & name = elem.first;
|
|
||||||
const std::string & alias = elem.second;
|
|
||||||
ColumnWithTypeAndName column = block.getByName(name);
|
|
||||||
if (!alias.empty())
|
|
||||||
column.name = alias;
|
|
||||||
new_block.insert(std::move(column));
|
|
||||||
}
|
|
||||||
|
|
||||||
block.swap(new_block);
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case ADD_ALIASES:
|
case ActionsDAG::Type::ALIAS:
|
||||||
{
|
{
|
||||||
for (const auto & elem : projection)
|
/// Do not care about names, they are empty.
|
||||||
{
|
columns[action.result_position] = columns[action.arguments.front()];
|
||||||
const std::string & name = elem.first;
|
|
||||||
const std::string & alias = elem.second;
|
|
||||||
const ColumnWithTypeAndName & column = block.getByName(name);
|
|
||||||
if (!alias.empty() && !block.has(alias))
|
|
||||||
block.insert({column.column, column.type, alias});
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
case REMOVE_COLUMN:
|
case ActionsDAG::Type::INPUT:
|
||||||
block.erase(source_name);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case ADD_COLUMN:
|
|
||||||
block.insert({ added_column->cloneResized(input_rows_count), result_type, result_name });
|
|
||||||
break;
|
|
||||||
|
|
||||||
case COPY_COLUMN:
|
|
||||||
if (can_replace && block.has(result_name))
|
|
||||||
{
|
{
|
||||||
auto & result = block.getByName(result_name);
|
throw Exception("Cannot execute INPUT action", ErrorCodes::LOGICAL_ERROR);
|
||||||
const auto & source = block.getByName(source_name);
|
|
||||||
result.type = source.type;
|
|
||||||
result.column = source.column;
|
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
const auto & source_column = block.getByName(source_name);
|
|
||||||
block.insert({source_column.column, source_column.type, result_name});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
break;
|
for (auto to_remove : action.to_remove)
|
||||||
}
|
columns[to_remove] = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ExpressionActions::hasArrayJoin() const
|
bool ExpressionActions::hasArrayJoin() const
|
||||||
{
|
{
|
||||||
for (const auto & action : actions)
|
for (const auto & action : actions)
|
||||||
if (action.type == ExpressionAction::ARRAY_JOIN)
|
if (action.node->type == ActionsDAG::Type::ARRAY_JOIN)
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
@ -1013,86 +977,6 @@ std::string ExpressionActions::dumpActions() const
|
|||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ExpressionActions::optimizeArrayJoin()
|
|
||||||
{
|
|
||||||
const size_t none = actions.size();
|
|
||||||
size_t first_array_join = none;
|
|
||||||
|
|
||||||
/// Columns that need to be evaluated for arrayJoin.
|
|
||||||
/// Actions for adding them can not be moved to the left of the arrayJoin.
|
|
||||||
NameSet array_joined_columns;
|
|
||||||
|
|
||||||
/// Columns needed to evaluate arrayJoin or those that depend on it.
|
|
||||||
/// Actions to delete them can not be moved to the left of the arrayJoin.
|
|
||||||
NameSet array_join_dependencies;
|
|
||||||
|
|
||||||
for (size_t i = 0; i < actions.size(); ++i)
|
|
||||||
{
|
|
||||||
/// Do not move the action to the right of the projection (the more that they are not usually there).
|
|
||||||
if (actions[i].type == ExpressionAction::PROJECT)
|
|
||||||
break;
|
|
||||||
|
|
||||||
bool depends_on_array_join = false;
|
|
||||||
Names needed;
|
|
||||||
|
|
||||||
if (actions[i].type == ExpressionAction::ARRAY_JOIN)
|
|
||||||
{
|
|
||||||
depends_on_array_join = true;
|
|
||||||
needed = actions[i].getNeededColumns();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (first_array_join == none)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
needed = actions[i].getNeededColumns();
|
|
||||||
|
|
||||||
for (const auto & elem : needed)
|
|
||||||
{
|
|
||||||
if (array_joined_columns.count(elem))
|
|
||||||
{
|
|
||||||
depends_on_array_join = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (depends_on_array_join)
|
|
||||||
{
|
|
||||||
if (first_array_join == none)
|
|
||||||
first_array_join = i;
|
|
||||||
|
|
||||||
if (!actions[i].result_name.empty())
|
|
||||||
array_joined_columns.insert(actions[i].result_name);
|
|
||||||
|
|
||||||
array_join_dependencies.insert(needed.begin(), needed.end());
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
bool can_move = false;
|
|
||||||
|
|
||||||
if (actions[i].type == ExpressionAction::REMOVE_COLUMN)
|
|
||||||
{
|
|
||||||
/// If you delete a column that is not needed for arrayJoin (and those who depend on it), you can delete it before arrayJoin.
|
|
||||||
can_move = !array_join_dependencies.count(actions[i].source_name);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/// If the action does not delete the columns and does not depend on the result of arrayJoin, you can make it until arrayJoin.
|
|
||||||
can_move = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Move the current action to the position just before the first arrayJoin.
|
|
||||||
if (can_move)
|
|
||||||
{
|
|
||||||
/// Move the i-th element to the position `first_array_join`.
|
|
||||||
std::rotate(actions.begin() + first_array_join, actions.begin() + i, actions.begin() + i + 1);
|
|
||||||
++first_array_join;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ExpressionActionsPtr ExpressionActions::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns)
|
ExpressionActionsPtr ExpressionActions::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns)
|
||||||
{
|
{
|
||||||
/// Create new actions.
|
/// Create new actions.
|
||||||
|
@ -175,6 +175,8 @@ public:
|
|||||||
FunctionBasePtr function_base;
|
FunctionBasePtr function_base;
|
||||||
/// Prepared function which is used in function execution.
|
/// Prepared function which is used in function execution.
|
||||||
ExecutableFunctionPtr function;
|
ExecutableFunctionPtr function;
|
||||||
|
/// If function is a compiled statement.
|
||||||
|
bool is_function_compiled = false;
|
||||||
|
|
||||||
/// For COLUMN node and propagated constants.
|
/// For COLUMN node and propagated constants.
|
||||||
ColumnPtr column;
|
ColumnPtr column;
|
||||||
@ -231,35 +233,32 @@ private:
|
|||||||
using Node = ActionsDAG::Node;
|
using Node = ActionsDAG::Node;
|
||||||
using Index = ActionsDAG::Index;
|
using Index = ActionsDAG::Index;
|
||||||
|
|
||||||
struct Argument
|
|
||||||
{
|
|
||||||
size_t position;
|
|
||||||
bool can_remove;
|
|
||||||
};
|
|
||||||
|
|
||||||
using Arguments = std::vector<Argument>;
|
|
||||||
|
|
||||||
struct Action
|
struct Action
|
||||||
{
|
{
|
||||||
Node * node;
|
Node * node;
|
||||||
Arguments arguments;
|
ColumnNumbers arguments;
|
||||||
|
/// Columns which will be removed after actions is executed.
|
||||||
|
/// It is always a subset of arguments.
|
||||||
|
ColumnNumbers to_remove;
|
||||||
size_t result_position;
|
size_t result_position;
|
||||||
|
bool is_used_in_result;
|
||||||
};
|
};
|
||||||
|
|
||||||
using Actions = std::vector<Action>;
|
using Actions = std::vector<Action>;
|
||||||
|
|
||||||
struct ExecutionContext
|
struct ExecutionContext
|
||||||
{
|
{
|
||||||
ColumnsWithTypeAndName input_columns;
|
ColumnsWithTypeAndName & input_columns;
|
||||||
ColumnsWithTypeAndName columns;
|
ColumnsWithTypeAndName columns;
|
||||||
size_t num_rows;
|
size_t num_rows;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::list<Node> nodes;
|
std::list<Node> nodes;
|
||||||
Index index;
|
|
||||||
Actions actions;
|
Actions actions;
|
||||||
|
size_t num_columns;
|
||||||
|
|
||||||
NamesAndTypesList required_columns;
|
NamesAndTypesList required_columns;
|
||||||
|
Block sample_block;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
~ExpressionActions();
|
~ExpressionActions();
|
||||||
@ -283,7 +282,7 @@ public:
|
|||||||
|
|
||||||
/// Get a list of input columns.
|
/// Get a list of input columns.
|
||||||
Names getRequiredColumns() const;
|
Names getRequiredColumns() const;
|
||||||
const NamesAndTypesList & getRequiredColumnsWithTypes() const;
|
const NamesAndTypesList & getRequiredColumnsWithTypes() const { return required_columns; }
|
||||||
|
|
||||||
/// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns.
|
/// Execute the expression on the block. The block must contain all the columns returned by getRequiredColumns.
|
||||||
void execute(Block & block, bool dry_run = false) const;
|
void execute(Block & block, bool dry_run = false) const;
|
||||||
@ -291,7 +290,7 @@ public:
|
|||||||
bool hasArrayJoin() const;
|
bool hasArrayJoin() const;
|
||||||
|
|
||||||
/// Obtain a sample block that contains the names and types of result columns.
|
/// Obtain a sample block that contains the names and types of result columns.
|
||||||
const Block & getSampleBlock() const;
|
const Block & getSampleBlock() const { return sample_block; }
|
||||||
|
|
||||||
std::string dumpActions() const;
|
std::string dumpActions() const;
|
||||||
|
|
||||||
@ -310,9 +309,9 @@ private:
|
|||||||
std::shared_ptr<CompiledExpressionCache> compilation_cache;
|
std::shared_ptr<CompiledExpressionCache> compilation_cache;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void checkLimits(Block & block) const;
|
void checkLimits(ExecutionContext & execution_context) const;
|
||||||
|
|
||||||
void executeAction(const Action & action, ExecutionContext & execution_context, bool dry_run);
|
static void executeAction(const Action & action, ExecutionContext & execution_context, bool dry_run);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user