Add comments. Change projection restoring function.

This commit is contained in:
Tsarkova Anastasia 2018-04-29 15:49:58 +02:00
parent 12c8014e5c
commit 2c96b1c684
9 changed files with 147 additions and 44 deletions

View File

@ -130,29 +130,28 @@ String FunctionRestoreProjection::getName() const
return name;
}
bool FunctionRestoreProjection::isVariadic() const {
return true;
}
size_t FunctionRestoreProjection::getNumberOfArguments() const
{
return 3;
return 0;
}
DataTypePtr FunctionRestoreProjection::getReturnTypeImpl(const DataTypes & arguments) const
{
return arguments[0];
return arguments[1];
}
void FunctionRestoreProjection::executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t /*input_rows_count*/)
{
const auto & projection_column = block.getByPosition(arguments[1]).column;
const auto & initial_values_column = block.getByPosition(arguments[0]).column;
const auto & override_values_column = block.getByPosition(arguments[2]).column;
auto col_res = initial_values_column->cloneEmpty();
size_t override_index = 0;
for (size_t i = 0; i < initial_values_column->size(); ++i) {
if (projection_column->getUInt8(i)) {
col_res->insertFrom(*override_values_column, override_index++);
} else {
col_res->insertFrom(*initial_values_column, i);
}
const auto & projection_column = block.getByPosition(arguments[0]).column;
auto col_res = block.getByPosition(arguments[1]).column->cloneEmpty();
std::vector<size_t> override_indices(arguments.size() - 1, 0);
for (size_t i = 0; i < projection_column->size(); ++i) {
size_t argument_index = projection_column->getUInt8(i) + 1;
col_res->insertFrom(*block.getByPosition(arguments[argument_index]).column, ++override_indices[argument_index]);
}
block.getByPosition(result).column = std::move(col_res);
}

View File

@ -5,6 +5,10 @@
namespace DB {
/*
* This function accepts one column and converts it to UInt8, replacing values, which evaluate to true, with 1, and values,
* which evaluate to false with 0
*/
class FunctionOneOrZero final : public IFunction {
public:
static constexpr auto name = "one_or_zero";
@ -15,6 +19,12 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
};
/*
* FunctionProject accepts two columns: data column and projection column.
* Projection column is a column of UInt8 values 0 and 1, which indicate the binary mask of rows, where to project.
* This function builds a column of a smaller, which contains values of the data column at the positions where
* the projection column contained 1. The size of result column equals the count of ones in the projection column.
*/
class FunctionProject final : public IFunction {
public:
static constexpr auto name = "__inner_project__";
@ -25,6 +35,10 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
};
/*
* FunctionBuildProjectionComposition constructs the composition of two projection columns. The size of
* second projection column should equal the count of ones in the first input projection column.
*/
class FunctionBuildProjectionComposition final : public IFunction {
public:
static constexpr auto name = "__inner_build_projection_composition__";
@ -35,11 +49,16 @@ public:
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;
};
/*
* Accepts mapping column with values from range [0, N) and N more columns as arguments.
* Forms a column by taking value from column, which number is in the mapping column.
*/
class FunctionRestoreProjection final : public IFunction {
public:
static constexpr auto name = "__inner_restore_projection__";
static FunctionPtr create(const Context &);
String getName() const override;
bool isVariadic() const override;
size_t getNumberOfArguments() const override;
DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override;
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result, size_t input_rows_count) override;

View File

@ -51,7 +51,7 @@ Names ExpressionAction::getNeededColumns() const
ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & function_,
const std::vector<std::string> & argument_names_,
std::string result_name_,
const std::string & input_projection_expression)
const std::string & input_row_projection_expression)
{
if (result_name_ == "")
{
@ -70,19 +70,21 @@ ExpressionAction ExpressionAction::applyFunction(const FunctionBuilderPtr & func
a.result_name = result_name_;
a.function_builder = function_;
a.argument_names = argument_names_;
a.input_projection_expression = input_projection_expression;
a.input_row_projection_expression = input_row_projection_expression;
return a;
}
ExpressionAction ExpressionAction::addColumn(const ColumnWithTypeAndName & added_column_,
const std::string & input_projection_expression)
const std::string & input_row_projection_expression,
const is_row_projection_complementary)
{
ExpressionAction a;
a.type = ADD_COLUMN;
a.result_name = added_column_.name;
a.result_type = added_column_.type;
a.added_column = added_column_.column;
a.input_projection_expression = input_projection_expression;
a.input_row_projection_expression = input_row_projection_expression;
a.is_row_projection_complementary = is_row_projection_complementary;
return a;
}
@ -122,12 +124,12 @@ ExpressionAction ExpressionAction::project(const Names & projected_columns_)
}
ExpressionAction ExpressionAction::measureInputRowsCount(const std::string & source_name,
const std::string & output_projection_expression)
const std::string & output_row_projection_expression)
{
ExpressionAction a;
a.type = MEASURE_INPUT_ROWS_COUNT;
a.source_name = source_name;
a.output_projection_expression = output_projection_expression;
a.output_row_projection_expression = output_row_projection_expression;
return a;
}
@ -300,7 +302,10 @@ void ExpressionAction::execute(Block & block, std::unordered_map<std::string, si
{
// std::cerr << "executing: " << toString() << std::endl;
size_t input_rows_count = input_rows_counts[input_projection_expression];
size_t input_rows_count = input_rows_counts[input_row_projection_expression];
if (is_row_projection_complementary) {
input_rows_count = input_rows_counts[""] - input_rows_count;
}
if (type == REMOVE_COLUMN || type == COPY_COLUMN)
if (!block.has(source_name))
@ -342,7 +347,7 @@ void ExpressionAction::execute(Block & block, std::unordered_map<std::string, si
}
}
input_rows_counts[output_projection_expression] = projection_size;
input_rows_counts[output_row_projection_expression] = projection_size;
break;
}

View File

@ -69,9 +69,10 @@ public:
std::string result_name;
DataTypePtr result_type;
/// For projections
std::string input_projection_expression;
std::string output_projection_expression;
/// For conditional projections (projections on subset of rows)
std::string input_row_projection_expression;
bool is_row_projection_complementary = false;
std::string output_row_projection_expression;
/// For ADD_COLUMN.
ColumnPtr added_column;
@ -95,16 +96,17 @@ public:
/// If result_name_ == "", as name "function_name(arguments separated by commas) is used".
static ExpressionAction applyFunction(
const FunctionBuilderPtr & function_, const std::vector<std::string> & argument_names_, std::string result_name_ = "",
const std::string & input_projection_expression = "");
const std::string & input_row_projection_expression = "");
static ExpressionAction addColumn(const ColumnWithTypeAndName & added_column_,
const std::string & input_projection_expression);
const std::string & input_row_projection_expression,
bool is_row_projection_complementary);
static ExpressionAction removeColumn(const std::string & removed_name);
static ExpressionAction copyColumn(const std::string & from_name, const std::string & to_name);
static ExpressionAction project(const NamesWithAliases & projected_columns_);
static ExpressionAction project(const Names & projected_columns_);
static ExpressionAction measureInputRowsCount(const std::string & source_name,
const std::string & output_projection_expression);
const std::string & output_row_projection_expression);
static ExpressionAction arrayJoin(const NameSet & array_joined_columns, bool array_join_is_left, const Context & context);
static ExpressionAction ordinaryJoin(std::shared_ptr<const Join> join_, const NamesAndTypesList & columns_added_by_join_);

View File

@ -2055,7 +2055,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
ColumnWithTypeAndName fake_column;
fake_column.name = projection_manipulator->getColumnName(node->getColumnName());
fake_column.type = std::make_shared<DataTypeUInt8>();
actions_stack.addAction(ExpressionAction::addColumn(fake_column, projection_manipulator->getProjectionExpression()));
actions_stack.addAction(ExpressionAction::addColumn(fake_column, projection_manipulator->getProjectionExpression(), false));
getActionsImpl(node->arguments->children.at(0), no_subqueries, only_consts, actions_stack,
projection_manipulator);
}
@ -2069,7 +2069,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
{
actions_stack.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName(
ColumnConst::create(ColumnUInt8::create(1, 1), 1), std::make_shared<DataTypeUInt8>(),
projection_manipulator->getColumnName(node->getColumnName())), projection_manipulator->getProjectionExpression()));
projection_manipulator->getColumnName(node->getColumnName())), projection_manipulator->getProjectionExpression(), false));
return;
}
@ -2125,7 +2125,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
{
column.column = ColumnSet::create(1, set);
actions_stack.addAction(ExpressionAction::addColumn(column, projection_manipulator->getProjectionExpression()));
actions_stack.addAction(ExpressionAction::addColumn(column, projection_manipulator->getProjectionExpression(), false));
}
argument_types.push_back(column.type);
@ -2255,7 +2255,7 @@ void ExpressionAnalyzer::getActionsImpl(const ASTPtr & ast, bool no_subqueries,
column.type = type;
column.name = node->getColumnName();
actions_stack.addAction(ExpressionAction::addColumn(column, ""));
actions_stack.addAction(ExpressionAction::addColumn(column, "", false));
projection_manipulator->isAlreadyComputed(column.name);
}
else

View File

@ -152,7 +152,7 @@ void ConditionalTree::goToProjection(const std::string & field_name)
}
void ConditionalTree::restoreColumn(
const std::string & inital_values_name,
const std::string & default_values_name,
const std::string & new_values_name,
const size_t levels_up,
const std::string & result_name
@ -168,8 +168,8 @@ void ConditionalTree::restoreColumn(
scopes.addAction(ExpressionAction::applyFunction(
function_builder,
{
getColumnNameByIndex(inital_values_name, target_node),
getProjectionColumnName(target_node, current_node),
getColumnNameByIndex(default_values_name, target_node),
getColumnNameByIndex(new_values_name, current_node)
},
getColumnNameByIndex(result_name, target_node), getProjectionExpression()));
@ -257,18 +257,13 @@ void AndOperatorProjectionAction::createZerosColumn()
{
scopes.addAction(ExpressionAction::addColumn(ColumnWithTypeAndName(
ColumnUInt8::create(0, 1), std::make_shared<DataTypeUInt8>(), zeros_column_name),
projection_manipulator->getProjectionExpression()));
projection_manipulator->getProjectionExpression(), true));
}
}
void AndOperatorProjectionAction::preArgumentAction()
{
if (previous_argument_name.empty())
{
// Before processing first argument
createZerosColumn();
}
else
if (!previous_argument_name.empty())
{
// Before processing arguments starting from second to last
if (auto * conditional_tree = typeid_cast<ConditionalTree *>(projection_manipulator.get())) {
@ -297,6 +292,7 @@ void AndOperatorProjectionAction::preCalculation()
},
projection_manipulator->getColumnName(final_column),
projection_manipulator->getProjectionExpression()));
createZerosColumn();
conditional_tree->restoreColumn(getZerosColumnName(), final_column,
projection_levels_count, expression_name);
conditional_tree->goUp(projection_levels_count);

View File

@ -14,6 +14,9 @@ extern const int CONDITIONAL_TREE_PARENT_NOT_FOUND;
extern const int ILLEGAL_PROJECTION_MANIPULATOR;
}
/*
* This is a base class for the ConditionalTree. Look at the description of ConditionalTree.
*/
struct ProjectionManipulatorBase {
public:
virtual bool isAlreadyComputed(const std::string & column_name) = 0;
@ -27,6 +30,11 @@ public:
using ProjectionManipulatorPtr = std::shared_ptr<ProjectionManipulatorBase>;
/*
* This is the default ProjectionManipulator. It is needed for backwards compatibility.
* For the better understanding of what ProjectionManipulator does,
* look at the description of ConditionalTree.
*/
struct DefaultProjectionManipulator : public ProjectionManipulatorBase {
private:
ScopeStack & scopes;
@ -40,6 +48,40 @@ public:
std::string getProjectionExpression() final;
};
/*
* ConditionalTree is a projection manipulator. It is used in ExpressionAnalyzer::getActionsImpl.
* It is a helper class, which helps to build sequence of ExpressionAction instances -- actions, needed for
* computation of expression. It represents the current state of a projection layer. That is, if we have an expression
* f and g, we need to calculate f, afterwards we need to calculate g on the projection layer <f != 0>.
* This projection layer is stored in the ConditionalTree. Also, it stores the tree of all projection layers, which
* was seen before. If we have seen the projection layer <f != 0> and <f != 0 and g != 0>, conditional tree will put
* the second layer as a child to the first one.
*
* The description of what methods do:
* 1) getColumnName -- constructs the name of expression. which contains the information of the projection layer.
* It is needed to make computed column name unique. That is, if we have an expression g and conditional layer
* <f != 0>, it forms the name g<f != 0>
*
* 2) goToProjection -- accepts field name f and builds child projection layer with the additional condition
* <f>. For instance, if we are on the projection layer a != 0 and the function accepts the expression b != 0,
* it will build a projection layer <a != 0 and b != 0>, and remember that this layer is a child to a previous one.
* Moreover, the function will store the actions to build projection between this two layers in the corresponding
* ScopeStack
*
* 3) restoreColumn(default_values_name, new_values_name, levels, result_name) -- stores action to restore calculated
* 'new_values_name' column, to insert its values to the projection layer, which is 'levels' number of levels higher.
*
* 4) goUp -- goes several levels up in the conditional tree, raises the exception if we hit the root of the tree and
* there are still remained some levels up to go.
*
* 5) isAlreadyComputed -- goes up to the root projection level and checks whether the expression is
* already calculated somewhere in the higher projection level. If it is, we may just project it to the current
* layer to have it computed in the current layer. In this case, the function stores all actions needed to compute
* the projection: computes composition of projections and uses it to project the column. In the other case, if
* the column is not computed on the higher level, the function returns false. It is used in getActinosImpl to
* understand whether we need to scan the expression deeply, or can it be easily computed just with the projection
* from one of the higher projection layers.
*/
struct ConditionalTree : public ProjectionManipulatorBase {
private:
struct Node {
@ -79,9 +121,9 @@ public:
void goToProjection(const std::string & field_name);
void restoreColumn(
const std::string & inital_values_name,
const std::string & default_values_name,
const std::string & new_values_name,
size_t levels_up,
const size_t levels_up,
const std::string & result_name
);
@ -94,14 +136,31 @@ public:
using ConditionalTreePtr = std::shared_ptr<ConditionalTree>;
/*
* ProjectionAction describes in what way should some specific function use the projection manipulator.
* This class has two inherited classes: DefaultProjectionAction, which does nothing, and AndOperatorProjectionAction,
* which represents how function "and" uses projection manipulator.
*/
class ProjectionActionBase {
public:
/*
* What to do before scanning the function argument (each of it)
*/
virtual void preArgumentAction() = 0;
/*
* What to do after scanning each argument
*/
virtual void postArgumentAction(const std::string & argument_name) = 0;
/*
* What to do after scanning all the arguments, before the computation
*/
virtual void preCalculation() = 0;
/*
* Should default computation procedure be run or not
*/
virtual bool isCalculationRequired() = 0;
virtual ~ProjectionActionBase();
@ -120,6 +179,9 @@ public:
bool isCalculationRequired() final;
};
/*
* This is a specification of ProjectionAction specifically for the 'and' operation
*/
class AndOperatorProjectionAction : public ProjectionActionBase {
private:
ScopeStack & scopes;
@ -140,15 +202,35 @@ public:
const std::string & expression_name,
const Context& context);
/*
* Before scanning each argument, we should go to the next projection layer. For example, if the expression is
* f and g and h, then before computing g we should project to <f != 0> and before computing h we should project to
* <f != 0 and g != 0>
*/
void preArgumentAction() final;
/*
* Stores the previous argument name
*/
void postArgumentAction(const std::string & argument_name) final;
/*
* Restores the result column to the uppermost projection level. For example, if the expression is f and g and h,
* we should restore h<f,g> to the main projection layer
*/
void preCalculation() final;
/*
* After what is done in preCalculation, we do not need to run default calculation of 'and' operator. So, the
* function returns false.
*/
bool isCalculationRequired() final;
};
/*
* This function accepts the operator name and returns its projection action. For example, for 'and' operator,
* it returns the pointer to AndOperatorProjectionAction.
*/
ProjectionActionPtr getProjectionAction(const std::string & node_name,
ScopeStack & scopes,
ProjectionManipulatorPtr projection_manipulator,

View File

@ -248,7 +248,7 @@ struct Settings
M(SettingUInt64, max_network_bytes, 0, "The maximum number of bytes (compressed) to receive or transmit over the network for execution of the query.") \
M(SettingUInt64, max_network_bandwidth_for_user, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running user queries. Zero means unlimited.")\
M(SettingUInt64, max_network_bandwidth_for_all_users, 0, "The maximum speed of data exchange over the network in bytes per second for all concurrently running queries. Zero means unlimited.") \
M(SettingUInt64, enable_conditional_computation, 0, "Enable conditional computations")
M(SettingUInt64, enable_conditional_computation, 0, "Enable conditional computations") \
#define DECLARE(TYPE, NAME, DEFAULT, DESCRIPTION) \
TYPE NAME {DEFAULT};

View File

@ -1008,7 +1008,7 @@ void MergeTreeData::createConvertExpression(const DataPartPtr & part, const Name
/// This is temporary name for expression. TODO Invent the name more safely.
const String new_type_name_column = '#' + new_type_name + "_column";
out_expression->add(ExpressionAction::addColumn(
{ DataTypeString().createColumnConst(1, new_type_name), std::make_shared<DataTypeString>(), new_type_name_column }, ""));
{ DataTypeString().createColumnConst(1, new_type_name), std::make_shared<DataTypeString>(), new_type_name_column }, "", false));
const auto & function = FunctionFactory::instance().get("CAST", context);
out_expression->add(ExpressionAction::applyFunction(