mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 15:42:02 +00:00
Add comments. Change projection restoring function.
This commit is contained in:
parent
12c8014e5c
commit
2c96b1c684
@ -130,29 +130,28 @@ String FunctionRestoreProjection::getName() const
|
||||
return name;
|
||||
}
|
||||
|
||||
bool FunctionRestoreProjection::isVariadic() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t FunctionRestoreProjection::getNumberOfArguments() const
|
||||
{
|
||||
return 3;
|
||||
return 0;
|
||||
}
|
||||
|
||||
DataTypePtr FunctionRestoreProjection::getReturnTypeImpl(const DataTypes & arguments) const
|
||||
{
|
||||
return arguments[0];
|
||||
return arguments[1];
|
||||
}
|
||||
|
||||
void FunctionRestoreProjection::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
|
||||
{
|
||||
const auto & projection_column = block.getByPosition(arguments[1]).column;
|
||||
const auto & initial_values_column = block.getByPosition(arguments[0]).column;
|
||||
const auto & override_values_column = block.getByPosition(arguments[2]).column;
|
||||
auto col_res = initial_values_column->cloneEmpty();
|
||||
size_t override_index = 0;
|
||||
for (size_t i = 0; i < initial_values_column->size(); ++i) {
|
||||
if (projection_column->getUInt8(i)) {
|
||||
col_res->insertFrom(*override_values_column, override_index++);
|
||||
} else {
|
||||
col_res->insertFrom(*initial_values_column, i);
|
||||
}
|
||||
const auto & projection_column = block.getByPosition(arguments[0]).column;
|
||||
auto col_res = block.getByPosition(arguments[1]).column->cloneEmpty();
|
||||
std::vector<size_t> override_indices(arguments.size() - 1, 0);
|
||||
for (size_t i = 0; i < projection_column->size(); ++i) {
|
||||
size_t argument_index = projection_column->getUInt8(i) + 1;
|
||||
col_res->insertFrom(*block.getByPosition(arguments[argument_index]).column, ++override_indices[argument_index]);
|
||||
}
|
||||
block.getByPosition(result).column = std::move(col_res);
|
||||
}
|
||||
|
@ -5,6 +5,10 @@
|
||||
|
||||
namespace DB {
|
||||
|
||||
/*
|
||||
* This function accepts one column and converts it to UInt8, replacing values, which evaluate to true, with 1, and values,
|
||||
* which evaluate to false with 0
|
||||
*/
|
||||
class FunctionOneOrZero final : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "one_or_zero";
|
||||
@ -15,6 +19,12 @@ public:
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* FunctionProject accepts two columns: data column and projection column.
|
||||
* Projection column is a column of UInt8 values 0 and 1, which indicate the binary mask of rows, where to project.
|
||||
* This function builds a column of a smaller, which contains values of the data column at the positions where
|
||||
* the projection column contained 1. The size of result column equals the count of ones in the projection column.
|
||||
*/
|
||||
class FunctionProject final : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "__inner_project__";
|
||||
@ -25,6 +35,10 @@ public:
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* FunctionBuildProjectionComposition constructs the composition of two projection columns. The size of
|
||||
* second projection column should equal the count of ones in the first input projection column.
|
||||
*/
|
||||
class FunctionBuildProjectionComposition final : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "__inner_build_projection_composition__";
|
||||
@ -35,11 +49,16 @@ public:
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
|
||||
};
|
||||
|
||||
/*
|
||||
* Accepts mapping column with values from range [0, N) and N more columns as arguments.
|
||||
* Forms a column by taking value from column, which number is in the mapping column.
|
||||
*/
|
||||
class FunctionRestoreProjection final : public IFunction {
|
||||
public:
|
||||
static constexpr auto name = "__inner_restore_projection__";
|
||||
static FunctionPtr create(const Context &);
|
||||
String getName() const override;
|
||||
bool isVariadic() const override;
|
||||
size_t getNumberOfArguments() const override;
|
||||
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
|
||||
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
|
||||
|
@ -51,7 +51,7 @@ Names ExpressionAction::getNeededColumns() const
|
||||
ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & function_,
|
||||
const std::vector<std::string> & argument_names_,
|
||||
std::string result_name_,
|
||||
const std::string & input_projection_expression)
|
||||
const std::string & input_row_projection_expression)
|
||||
{
|
||||
if (result_name_ == "")
|
||||
{
|
||||
@ -70,19 +70,21 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func
|
||||
a.result_name = result_name_;
|
||||
a.function_builder = function_;
|
||||
a.argument_names = argument_names_;
|
||||
a.input_projection_expression = input_projection_expression;
|
||||
a.input_row_projection_expression = input_row_projection_expression;
|
||||
return a;
|
||||
}
|
||||
|
||||
ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_,
|
||||
const std::string & input_projection_expression)
|
||||
const std::string & input_row_projection_expression,
|
||||
const is_row_projection_complementary)
|
||||
{
|
||||
ExpressionAction a;
|
||||
a.type = ADD_COLUMN;
|
||||
a.result_name = added_column_.name;
|
||||
a.result_type = added_column_.type;
|
||||
a.added_column = added_column_.column;
|
||||
a.input_projection_expression = input_projection_expression;
|
||||
a.input_row_projection_expression = input_row_projection_expression;
|
||||
a.is_row_projection_complementary = is_row_projection_complementary;
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -122,12 +124,12 @@ ExpressionAction ExpressionAction::project(const Names & projected_columns_)
|
||||
}
|
||||
|
||||
ExpressionAction ExpressionAction::measureInputRowsCount(const std::string & source_name,
|
||||
const std::string & output_projection_expression)
|
||||
const std::string & output_row_projection_expression)
|
||||
{
|
||||
ExpressionAction a;
|
||||
a.type = MEASURE_INPUT_ROWS_COUNT;
|
||||
a.source_name = source_name;
|
||||
a.output_projection_expression = output_projection_expression;
|
||||
a.output_row_projection_expression = output_row_projection_expression;
|
||||
return a;
|
||||
}
|
||||
|
||||
@ -300,7 +302,10 @@ void ExpressionAction::execute(Block & block, std::unordered_map<std::string, si
|
||||
{
|
||||
// std::cerr << "executing: " << toString() << std::endl;
|
||||
|
||||
size_t input_rows_count = input_rows_counts[input_projection_expression];
|
||||
size_t input_rows_count = input_rows_counts[input_row_projection_expression];
|
||||
if (is_row_projection_complementary) {
|
||||
input_rows_count = input_rows_counts[""] - input_rows_count;
|
||||
}
|
||||
|
||||
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
|
||||
if (!block.has(source_name))
|
||||
@ -342,7 +347,7 @@ void ExpressionAction::execute(Block & block, std::unordered_map<std::string, si
|
||||
}
|
||||
}
|
||||
|
||||
input_rows_counts[output_projection_expression] = projection_size;
|
||||
input_rows_counts[output_row_projection_expression] = projection_size;
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -69,9 +69,10 @@ public:
|
||||
std::string result_name;
|
||||
DataTypePtr result_type;
|
||||
|
||||
/// For projections
|
||||
std::string input_projection_expression;
|
||||
std::string output_projection_expression;
|
||||
/// For conditional projections (projections on subset of rows)
|
||||
std::string input_row_projection_expression;
|
||||
bool is_row_projection_complementary = false;
|
||||
std::string output_row_projection_expression;
|
||||
|
||||
/// For ADD_COLUMN.
|
||||
ColumnPtr added_column;
|
||||
@ -95,16 +96,17 @@ public:
|
||||
/// If result_name_ == "", as name "function_name(arguments separated by commas) is used".
|
||||
static ExpressionAction applyFunction(
|
||||
const FunctionBuilderPtr & function_, const std::vector<std::string> & argument_names_, std::string result_name_ = "",
|
||||
const std::string & input_projection_expression = "");
|
||||
const std::string & input_row_projection_expression = "");
|
||||
|
||||
static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_,
|
||||
const std::string & input_projection_expression);
|
||||
const std::string & input_row_projection_expression,
|
||||
bool is_row_projection_complementary);
|
||||
static ExpressionAction removeColumn(const std::string & removed_name);
|
||||
static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name);
|
||||
static ExpressionAction project(const NamesWithAliases & projected_columns_);
|
||||
static ExpressionAction project(const Names & projected_columns_);
|
||||
static ExpressionAction measureInputRowsCount(const std::string & source_name,
|
||||
const std::string & output_projection_expression);
|
||||
const std::string & output_row_projection_expression);
|
||||
static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context);
|
||||
static ExpressionAction ordinaryJoin(std::shared_ptr<const Join> join_, const NamesAndTypesList & columns_added_by_join_);
|
||||
|
||||
|
@ -2055,7 +2055,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
|
||||
ColumnWithTypeAndName fake_column;
|
||||
fake_column.name = projection_manipulator->getColumnName(node->getColumnName());
|
||||
fake_column.type = std::make_shared<DataTypeUInt8>();
|
||||
actions_stack.addAction(ExpressionAction::addColumn(fake_column, projection_manipulator->getProjectionExpression()));
|
||||
actions_stack.addAction(ExpressionAction::addColumn(fake_column, projection_manipulator->getProjectionExpression(), false));
|
||||
getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack,
|
||||
projection_manipulator);
|
||||
}
|
||||
@ -2069,7 +2069,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
|
||||
{
|
||||
actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName(
|
||||
ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared<DataTypeUInt8>(),
|
||||
projection_manipulator->getColumnName(node->getColumnName())), projection_manipulator->getProjectionExpression()));
|
||||
projection_manipulator->getColumnName(node->getColumnName())), projection_manipulator->getProjectionExpression(), false));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -2125,7 +2125,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
|
||||
{
|
||||
column.column = ColumnSet::create(1, set);
|
||||
|
||||
actions_stack.addAction(ExpressionAction::addColumn(column, projection_manipulator->getProjectionExpression()));
|
||||
actions_stack.addAction(ExpressionAction::addColumn(column, projection_manipulator->getProjectionExpression(), false));
|
||||
}
|
||||
|
||||
argument_types.push_back(column.type);
|
||||
@ -2255,7 +2255,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
|
||||
column.type = type;
|
||||
column.name = node->getColumnName();
|
||||
|
||||
actions_stack.addAction(ExpressionAction::addColumn(column, ""));
|
||||
actions_stack.addAction(ExpressionAction::addColumn(column, "", false));
|
||||
projection_manipulator->isAlreadyComputed(column.name);
|
||||
}
|
||||
else
|
||||
|
@ -152,7 +152,7 @@ void ConditionalTree::goToProjection(const std::string & field_name)
|
||||
}
|
||||
|
||||
void ConditionalTree::restoreColumn(
|
||||
const std::string & inital_values_name,
|
||||
const std::string & default_values_name,
|
||||
const std::string & new_values_name,
|
||||
const size_t levels_up,
|
||||
const std::string & result_name
|
||||
@ -168,8 +168,8 @@ void ConditionalTree::restoreColumn(
|
||||
scopes.addAction(ExpressionAction::applyFunction(
|
||||
function_builder,
|
||||
{
|
||||
getColumnNameByIndex(inital_values_name, target_node),
|
||||
getProjectionColumnName(target_node, current_node),
|
||||
getColumnNameByIndex(default_values_name, target_node),
|
||||
getColumnNameByIndex(new_values_name, current_node)
|
||||
},
|
||||
getColumnNameByIndex(result_name, target_node), getProjectionExpression()));
|
||||
@ -257,18 +257,13 @@ void AndOperatorProjectionAction::createZerosColumn()
|
||||
{
|
||||
scopes.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName(
|
||||
ColumnUInt8::create(0, 1), std::make_shared<DataTypeUInt8>(), zeros_column_name),
|
||||
projection_manipulator->getProjectionExpression()));
|
||||
projection_manipulator->getProjectionExpression(), true));
|
||||
}
|
||||
}
|
||||
|
||||
void AndOperatorProjectionAction::preArgumentAction()
|
||||
{
|
||||
if (previous_argument_name.empty())
|
||||
{
|
||||
// Before processing first argument
|
||||
createZerosColumn();
|
||||
}
|
||||
else
|
||||
if (!previous_argument_name.empty())
|
||||
{
|
||||
// Before processing arguments starting from second to last
|
||||
if (auto * conditional_tree = typeid_cast<ConditionalTree *>(projection_manipulator.get())) {
|
||||
@ -297,6 +292,7 @@ void AndOperatorProjectionAction::preCalculation()
|
||||
},
|
||||
projection_manipulator->getColumnName(final_column),
|
||||
projection_manipulator->getProjectionExpression()));
|
||||
createZerosColumn();
|
||||
conditional_tree->restoreColumn(getZerosColumnName(), final_column,
|
||||
projection_levels_count, expression_name);
|
||||
conditional_tree->goUp(projection_levels_count);
|
||||
|
@ -14,6 +14,9 @@ extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND;
|
||||
extern const int ILLEGAL_PROJECTION_MANIPULATOR;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is a base class for the ConditionalTree. Look at the description of ConditionalTree.
|
||||
*/
|
||||
struct ProjectionManipulatorBase {
|
||||
public:
|
||||
virtual bool isAlreadyComputed(const std::string & column_name) = 0;
|
||||
@ -27,6 +30,11 @@ public:
|
||||
|
||||
using ProjectionManipulatorPtr = std::shared_ptr<ProjectionManipulatorBase>;
|
||||
|
||||
/*
|
||||
* This is the default ProjectionManipulator. It is needed for backwards compatibility.
|
||||
* For the better understanding of what ProjectionManipulator does,
|
||||
* look at the description of ConditionalTree.
|
||||
*/
|
||||
struct DefaultProjectionManipulator : public ProjectionManipulatorBase {
|
||||
private:
|
||||
ScopeStack & scopes;
|
||||
@ -40,6 +48,40 @@ public:
|
||||
std::string getProjectionExpression() final;
|
||||
};
|
||||
|
||||
/*
|
||||
* ConditionalTree is a projection manipulator. It is used in ExpressionAnalyzer::getActionsImpl.
|
||||
* It is a helper class, which helps to build sequence of ExpressionAction instances -- actions, needed for
|
||||
* computation of expression. It represents the current state of a projection layer. That is, if we have an expression
|
||||
* f and g, we need to calculate f, afterwards we need to calculate g on the projection layer <f != 0>.
|
||||
* This projection layer is stored in the ConditionalTree. Also, it stores the tree of all projection layers, which
|
||||
* was seen before. If we have seen the projection layer <f != 0> and <f != 0 and g != 0>, conditional tree will put
|
||||
* the second layer as a child to the first one.
|
||||
*
|
||||
* The description of what methods do:
|
||||
* 1) getColumnName -- constructs the name of expression. which contains the information of the projection layer.
|
||||
* It is needed to make computed column name unique. That is, if we have an expression g and conditional layer
|
||||
* <f != 0>, it forms the name g<f != 0>
|
||||
*
|
||||
* 2) goToProjection -- accepts field name f and builds child projection layer with the additional condition
|
||||
* <f>. For instance, if we are on the projection layer a != 0 and the function accepts the expression b != 0,
|
||||
* it will build a projection layer <a != 0 and b != 0>, and remember that this layer is a child to a previous one.
|
||||
* Moreover, the function will store the actions to build projection between this two layers in the corresponding
|
||||
* ScopeStack
|
||||
*
|
||||
* 3) restoreColumn(default_values_name, new_values_name, levels, result_name) -- stores action to restore calculated
|
||||
* 'new_values_name' column, to insert its values to the projection layer, which is 'levels' number of levels higher.
|
||||
*
|
||||
* 4) goUp -- goes several levels up in the conditional tree, raises the exception if we hit the root of the tree and
|
||||
* there are still remained some levels up to go.
|
||||
*
|
||||
* 5) isAlreadyComputed -- goes up to the root projection level and checks whether the expression is
|
||||
* already calculated somewhere in the higher projection level. If it is, we may just project it to the current
|
||||
* layer to have it computed in the current layer. In this case, the function stores all actions needed to compute
|
||||
* the projection: computes composition of projections and uses it to project the column. In the other case, if
|
||||
* the column is not computed on the higher level, the function returns false. It is used in getActinosImpl to
|
||||
* understand whether we need to scan the expression deeply, or can it be easily computed just with the projection
|
||||
* from one of the higher projection layers.
|
||||
*/
|
||||
struct ConditionalTree : public ProjectionManipulatorBase {
|
||||
private:
|
||||
struct Node {
|
||||
@ -79,9 +121,9 @@ public:
|
||||
void goToProjection(const std::string & field_name);
|
||||
|
||||
void restoreColumn(
|
||||
const std::string & inital_values_name,
|
||||
const std::string & default_values_name,
|
||||
const std::string & new_values_name,
|
||||
size_t levels_up,
|
||||
const size_t levels_up,
|
||||
const std::string & result_name
|
||||
);
|
||||
|
||||
@ -94,14 +136,31 @@ public:
|
||||
|
||||
using ConditionalTreePtr = std::shared_ptr<ConditionalTree>;
|
||||
|
||||
/*
|
||||
* ProjectionAction describes in what way should some specific function use the projection manipulator.
|
||||
* This class has two inherited classes: DefaultProjectionAction, which does nothing, and AndOperatorProjectionAction,
|
||||
* which represents how function "and" uses projection manipulator.
|
||||
*/
|
||||
class ProjectionActionBase {
|
||||
public:
|
||||
/*
|
||||
* What to do before scanning the function argument (each of it)
|
||||
*/
|
||||
virtual void preArgumentAction() = 0;
|
||||
|
||||
/*
|
||||
* What to do after scanning each argument
|
||||
*/
|
||||
virtual void postArgumentAction(const std::string & argument_name) = 0;
|
||||
|
||||
/*
|
||||
* What to do after scanning all the arguments, before the computation
|
||||
*/
|
||||
virtual void preCalculation() = 0;
|
||||
|
||||
/*
|
||||
* Should default computation procedure be run or not
|
||||
*/
|
||||
virtual bool isCalculationRequired() = 0;
|
||||
|
||||
virtual ~ProjectionActionBase();
|
||||
@ -120,6 +179,9 @@ public:
|
||||
bool isCalculationRequired() final;
|
||||
};
|
||||
|
||||
/*
|
||||
* This is a specification of ProjectionAction specifically for the 'and' operation
|
||||
*/
|
||||
class AndOperatorProjectionAction : public ProjectionActionBase {
|
||||
private:
|
||||
ScopeStack & scopes;
|
||||
@ -140,15 +202,35 @@ public:
|
||||
const std::string & expression_name,
|
||||
const Context& context);
|
||||
|
||||
/*
|
||||
* Before scanning each argument, we should go to the next projection layer. For example, if the expression is
|
||||
* f and g and h, then before computing g we should project to <f != 0> and before computing h we should project to
|
||||
* <f != 0 and g != 0>
|
||||
*/
|
||||
void preArgumentAction() final;
|
||||
|
||||
/*
|
||||
* Stores the previous argument name
|
||||
*/
|
||||
void postArgumentAction(const std::string & argument_name) final;
|
||||
|
||||
/*
|
||||
* Restores the result column to the uppermost projection level. For example, if the expression is f and g and h,
|
||||
* we should restore h<f,g> to the main projection layer
|
||||
*/
|
||||
void preCalculation() final;
|
||||
|
||||
/*
|
||||
* After what is done in preCalculation, we do not need to run default calculation of 'and' operator. So, the
|
||||
* function returns false.
|
||||
*/
|
||||
bool isCalculationRequired() final;
|
||||
};
|
||||
|
||||
/*
|
||||
* This function accepts the operator name and returns its projection action. For example, for 'and' operator,
|
||||
* it returns the pointer to AndOperatorProjectionAction.
|
||||
*/
|
||||
ProjectionActionPtr getProjectionAction(const std::string & node_name,
|
||||
ScopeStack & scopes,
|
||||
ProjectionManipulatorPtr projection_manipulator,
|
||||
|
@ -248,7 +248,7 @@ struct Settings
|
||||
M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.") \
|
||||
M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.")\
|
||||
M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") \
|
||||
M(SettingUInt64, enable_conditional_computation, 0, "Enable conditional computations")
|
||||
M(SettingUInt64, enable_conditional_computation, 0, "Enable conditional computations") \
|
||||
|
||||
#define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \
|
||||
TYPE NAME {DEFAULT};
|
||||
|
@ -1008,7 +1008,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
|
||||
/// This is temporary name for expression. TODO Invent the name more safely.
|
||||
const String new_type_name_column = '#' + new_type_name + "_column";
|
||||
out_expression->add(ExpressionAction::addColumn(
|
||||
{ DataTypeString().createColumnConst(1, new_type_name), std::make_shared<DataTypeString>(), new_type_name_column }, ""));
|
||||
{ DataTypeString().createColumnConst(1, new_type_name), std::make_shared<DataTypeString>(), new_type_name_column }, "", false));
|
||||
|
||||
const auto & function = FunctionFactory::instance().get("CAST", context);
|
||||
out_expression->add(ExpressionAction::applyFunction(
|
||||
|
Loading…
Reference in New Issue
Block a user