Remove filter column from HAVING when it is not needed.

This commit is contained in:
Nikolai Kochetov 2021-09-28 13:01:47 +03:00
parent 09cd955aa6
commit 1a4ee1f77f
8 changed files with 75 additions and 26 deletions

View File

@ -1481,18 +1481,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const Settings & settings = context->getSettingsRef();
const ConstStoragePtr & storage = query_analyzer.storage();
bool finalized = false;
size_t where_step_num = 0;
ssize_t prewhere_step_num = -1;
ssize_t where_step_num = -1;
ssize_t having_step_num = -1;
auto finalize_chain = [&](ExpressionActionsChain & chain)
{
chain.finalize();
if (!finalized)
{
finalize(chain, where_step_num, query);
finalized = true;
}
finalize(chain, prewhere_step_num, where_step_num, having_step_num, query);
chain.clear();
};
@ -1523,6 +1520,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
/// Prewhere is always the first one.
prewhere_step_num = 0;
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
@ -1591,6 +1590,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (query_analyzer.appendHaving(chain, only_types || !second_stage))
{
having_step_num = chain.steps.size() - 1;
before_having = chain.getLastActions();
chain.addStep();
}
@ -1691,13 +1691,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
checkActions();
}
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query)
void ExpressionAnalysisResult::finalize(
const ExpressionActionsChain & chain,
ssize_t & prewhere_step_num,
ssize_t & where_step_num,
ssize_t & having_step_num,
const ASTSelectQuery & query)
{
size_t next_step_i = 0;
if (hasPrewhere())
if (prewhere_step_num >= 0)
{
const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++);
const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num);
prewhere_info->prewhere_actions->projectInput(false);
NameSet columns_to_remove;
@ -1710,12 +1713,21 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
}
columns_to_remove_after_prewhere = std::move(columns_to_remove);
prewhere_step_num = -1;
}
if (hasWhere())
if (where_step_num >= 0)
{
where_column_name = query.where()->getColumnName();
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
where_step_num = -1;
}
if (having_step_num >= 0)
{
having_column_name = query.having()->getColumnName();
remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second;
having_step_num = -1;
}
}

View File

@ -229,6 +229,8 @@ struct ExpressionAnalysisResult
ActionsDAGPtr before_where;
ActionsDAGPtr before_aggregation;
ActionsDAGPtr before_having;
String having_column_name;
bool remove_having_filter = false;
ActionsDAGPtr before_window;
ActionsDAGPtr before_order_by;
ActionsDAGPtr before_limit_by;
@ -274,7 +276,12 @@ struct ExpressionAnalysisResult
void removeExtraColumns() const;
void checkActions() const;
void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query);
void finalize(
const ExpressionActionsChain & chain,
ssize_t & prewhere_step_num,
ssize_t & where_step_num,
ssize_t & having_step_num,
const ASTSelectQuery & query);
};
/// SelectQuery specific ExpressionAnalyzer part.

View File

@ -1248,7 +1248,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
{
bool final = !query.group_by_with_rollup && !query.group_by_with_cube;
executeTotalsAndHaving(
query_plan, expressions.hasHaving(), expressions.before_having, aggregate_overflow_row, final);
query_plan, expressions.hasHaving(), expressions.before_having, expressions.remove_having_filter, aggregate_overflow_row, final);
}
if (query.group_by_with_rollup)
@ -1262,11 +1262,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
throw Exception(
"WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING",
ErrorCodes::NOT_IMPLEMENTED);
executeHaving(query_plan, expressions.before_having);
executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter);
}
}
else if (expressions.hasHaving())
executeHaving(query_plan, expressions.before_having);
executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter);
}
else if (query.group_by_with_totals || query.group_by_with_rollup || query.group_by_with_cube)
throw Exception("WITH TOTALS, ROLLUP or CUBE are not supported without aggregation", ErrorCodes::NOT_IMPLEMENTED);
@ -2133,10 +2133,10 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
}
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
{
auto having_step
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter);
having_step->setStepDescription("HAVING");
query_plan.addStep(std::move(having_step));
@ -2144,7 +2144,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions
void InterpreterSelectQuery::executeTotalsAndHaving(
QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final)
QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final)
{
const Settings & settings = context->getSettingsRef();
@ -2153,6 +2153,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
overflow_row,
expression,
has_having ? getSelectQuery().having()->getColumnName() : "",
remove_filter,
settings.totals_mode,
settings.totals_auto_threshold,
final);

View File

@ -131,8 +131,8 @@ private:
void executeAggregation(
QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info);
void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final);
void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final);
void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression);
void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final);
void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter);
static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description);
/// FIXME should go through ActionsDAG to behave as a proper function
void executeWindow(QueryPlan & query_plan);

View File

@ -30,6 +30,7 @@ TotalsHavingStep::TotalsHavingStep(
bool overflow_row_,
const ActionsDAGPtr & actions_dag_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_)
@ -38,11 +39,14 @@ TotalsHavingStep::TotalsHavingStep(
TotalsHavingTransform::transformHeader(
input_stream_.header,
actions_dag_.get(),
filter_column_,
remove_filter_,
final_),
getTraits(!filter_column_.empty()))
, overflow_row(overflow_row_)
, actions_dag(actions_dag_)
, filter_column_name(filter_column_)
, remove_filter(remove_filter_)
, totals_mode(totals_mode_)
, auto_include_threshold(auto_include_threshold_)
, final(final_)
@ -58,6 +62,7 @@ void TotalsHavingStep::transformPipeline(QueryPipeline & pipeline, const BuildQu
overflow_row,
expression_actions,
filter_column_name,
remove_filter,
totals_mode,
auto_include_threshold,
final);
@ -85,7 +90,10 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr
void TotalsHavingStep::describeActions(FormatSettings & settings) const
{
String prefix(settings.offset, ' ');
settings.out << prefix << "Filter column: " << filter_column_name << '\n';
settings.out << prefix << "Filter column: " << filter_column_name;
if (remove_filter)
settings.out << " (removed)";
settings.out << '\n';
settings.out << prefix << "Mode: " << totalsModeToString(totals_mode, auto_include_threshold) << '\n';
if (actions_dag)

View File

@ -18,6 +18,7 @@ public:
bool overflow_row_,
const ActionsDAGPtr & actions_dag_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_);
@ -35,6 +36,7 @@ private:
bool overflow_row;
ActionsDAGPtr actions_dag;
String filter_column_name;
bool remove_filter;
TotalsMode totals_mode;
double auto_include_threshold;
bool final;

View File

@ -28,13 +28,22 @@ void finalizeChunk(Chunk & chunk)
chunk.setColumns(std::move(columns), num_rows);
}
Block TotalsHavingTransform::transformHeader(Block block, const ActionsDAG * expression, bool final)
Block TotalsHavingTransform::transformHeader(
Block block,
const ActionsDAG * expression,
const std::string & filter_column_name,
bool remove_filter,
bool final)
{
if (final)
finalizeBlock(block);
if (expression)
{
block = expression->updateHeader(std::move(block));
if (remove_filter)
block.erase(filter_column_name);
}
return block;
}
@ -44,13 +53,15 @@ TotalsHavingTransform::TotalsHavingTransform(
bool overflow_row_,
const ExpressionActionsPtr & expression_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_)
: ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, final_), true)
: ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, filter_column_, remove_filter_, final_), true)
, overflow_row(overflow_row_)
, expression(expression_)
, filter_column_name(filter_column_)
, remove_filter(remove_filter_)
, totals_mode(totals_mode_)
, auto_include_threshold(auto_include_threshold_)
, final(final_)
@ -67,6 +78,8 @@ TotalsHavingTransform::TotalsHavingTransform(
auto totals_header = finalized_header;
size_t num_rows = totals_header.rows();
expression->execute(totals_header, num_rows);
if (remove_filter)
totals_header.erase(filter_column_name);
outputs.emplace_back(totals_header, this);
}
else
@ -167,6 +180,8 @@ void TotalsHavingTransform::transform(Chunk & chunk)
}
expression->execute(finalized_block, num_rows);
if (remove_filter)
finalized_block.erase(filter_column_name);
auto columns = finalized_block.getColumns();
ColumnPtr filter_column_ptr = columns[filter_column_pos];
@ -270,6 +285,8 @@ void TotalsHavingTransform::prepareTotals()
size_t num_rows = totals.getNumRows();
auto block = finalized_header.cloneWithColumns(totals.detachColumns());
expression->execute(block, num_rows);
if (remove_filter)
block.erase(filter_column_name);
/// Note: after expression totals may have several rows if `arrayJoin` was used in expression.
totals = Chunk(block.getColumns(), num_rows);
}

View File

@ -28,6 +28,7 @@ public:
bool overflow_row_,
const ExpressionActionsPtr & expression_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_);
@ -39,7 +40,7 @@ public:
Status prepare() override;
void work() override;
static Block transformHeader(Block block, const ActionsDAG * expression, bool final);
static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final);
protected:
void transform(Chunk & chunk) override;
@ -55,6 +56,7 @@ private:
bool overflow_row;
ExpressionActionsPtr expression;
String filter_column_name;
bool remove_filter;
TotalsMode totals_mode;
double auto_include_threshold;
bool final;