Merge pull request #29475 from ClickHouse/fix-29010

Remove filter column from HAVING when it is not needed.
This commit is contained in:
Kruglov Pavel 2021-09-29 13:03:27 +03:00 committed by GitHub
commit 14be2f31f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 121 additions and 30 deletions

View File

@ -1482,18 +1482,15 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
const Settings & settings = context->getSettingsRef();
const ConstStoragePtr & storage = query_analyzer.storage();
bool finalized = false;
size_t where_step_num = 0;
ssize_t prewhere_step_num = -1;
ssize_t where_step_num = -1;
ssize_t having_step_num = -1;
auto finalize_chain = [&](ExpressionActionsChain & chain)
{
chain.finalize();
if (!finalized)
{
finalize(chain, where_step_num, query);
finalized = true;
}
finalize(chain, prewhere_step_num, where_step_num, having_step_num, query);
chain.clear();
};
@ -1524,6 +1521,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (auto actions = query_analyzer.appendPrewhere(chain, !first_stage, additional_required_columns_after_prewhere))
{
/// Prewhere is always the first one.
prewhere_step_num = 0;
prewhere_info = std::make_shared<PrewhereInfo>(actions, query.prewhere()->getColumnName());
if (allowEarlyConstantFolding(*prewhere_info->prewhere_actions, settings))
@ -1592,6 +1591,7 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
if (query_analyzer.appendHaving(chain, only_types || !second_stage))
{
having_step_num = chain.steps.size() - 1;
before_having = chain.getLastActions();
chain.addStep();
}
@ -1692,13 +1692,16 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
checkActions();
}
void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query)
void ExpressionAnalysisResult::finalize(
const ExpressionActionsChain & chain,
ssize_t & prewhere_step_num,
ssize_t & where_step_num,
ssize_t & having_step_num,
const ASTSelectQuery & query)
{
size_t next_step_i = 0;
if (hasPrewhere())
if (prewhere_step_num >= 0)
{
const ExpressionActionsChain::Step & step = *chain.steps.at(next_step_i++);
const ExpressionActionsChain::Step & step = *chain.steps.at(prewhere_step_num);
prewhere_info->prewhere_actions->projectInput(false);
NameSet columns_to_remove;
@ -1711,12 +1714,21 @@ void ExpressionAnalysisResult::finalize(const ExpressionActionsChain & chain, si
}
columns_to_remove_after_prewhere = std::move(columns_to_remove);
prewhere_step_num = -1;
}
if (hasWhere())
if (where_step_num >= 0)
{
where_column_name = query.where()->getColumnName();
remove_where_filter = chain.steps.at(where_step_num)->required_output.find(where_column_name)->second;
where_step_num = -1;
}
if (having_step_num >= 0)
{
having_column_name = query.having()->getColumnName();
remove_having_filter = chain.steps.at(having_step_num)->required_output.find(having_column_name)->second;
having_step_num = -1;
}
}

View File

@ -229,6 +229,8 @@ struct ExpressionAnalysisResult
ActionsDAGPtr before_where;
ActionsDAGPtr before_aggregation;
ActionsDAGPtr before_having;
String having_column_name;
bool remove_having_filter = false;
ActionsDAGPtr before_window;
ActionsDAGPtr before_order_by;
ActionsDAGPtr before_limit_by;
@ -274,7 +276,12 @@ struct ExpressionAnalysisResult
void removeExtraColumns() const;
void checkActions() const;
void finalize(const ExpressionActionsChain & chain, size_t where_step_num, const ASTSelectQuery & query);
void finalize(
const ExpressionActionsChain & chain,
ssize_t & prewhere_step_num,
ssize_t & where_step_num,
ssize_t & having_step_num,
const ASTSelectQuery & query);
};
/// SelectQuery specific ExpressionAnalyzer part.

View File

@ -1248,7 +1248,7 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
{
bool final = !query.group_by_with_rollup && !query.group_by_with_cube;
executeTotalsAndHaving(
query_plan, expressions.hasHaving(), expressions.before_having, aggregate_overflow_row, final);
query_plan, expressions.hasHaving(), expressions.before_having, expressions.remove_having_filter, aggregate_overflow_row, final);
}
if (query.group_by_with_rollup)
@ -1262,11 +1262,11 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
throw Exception(
"WITH TOTALS and WITH ROLLUP or CUBE are not supported together in presence of HAVING",
ErrorCodes::NOT_IMPLEMENTED);
executeHaving(query_plan, expressions.before_having);
executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter);
}
}
else if (expressions.hasHaving())
executeHaving(query_plan, expressions.before_having);
executeHaving(query_plan, expressions.before_having, expressions.remove_having_filter);
}
else if (query.group_by_with_totals || query.group_by_with_rollup || query.group_by_with_cube)
throw Exception("WITH TOTALS, ROLLUP or CUBE are not supported without aggregation", ErrorCodes::NOT_IMPLEMENTED);
@ -2133,10 +2133,10 @@ void InterpreterSelectQuery::executeMergeAggregated(QueryPlan & query_plan, bool
}
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression)
void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter)
{
auto having_step
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), false);
= std::make_unique<FilterStep>(query_plan.getCurrentDataStream(), expression, getSelectQuery().having()->getColumnName(), remove_filter);
having_step->setStepDescription("HAVING");
query_plan.addStep(std::move(having_step));
@ -2144,7 +2144,7 @@ void InterpreterSelectQuery::executeHaving(QueryPlan & query_plan, const Actions
void InterpreterSelectQuery::executeTotalsAndHaving(
QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final)
QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final)
{
const Settings & settings = context->getSettingsRef();
@ -2153,6 +2153,7 @@ void InterpreterSelectQuery::executeTotalsAndHaving(
overflow_row,
expression,
has_having ? getSelectQuery().having()->getColumnName() : "",
remove_filter,
settings.totals_mode,
settings.totals_auto_threshold,
final);

View File

@ -131,8 +131,8 @@ private:
void executeAggregation(
QueryPlan & query_plan, const ActionsDAGPtr & expression, bool overflow_row, bool final, InputOrderInfoPtr group_by_info);
void executeMergeAggregated(QueryPlan & query_plan, bool overflow_row, bool final);
void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool overflow_row, bool final);
void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression);
void executeTotalsAndHaving(QueryPlan & query_plan, bool has_having, const ActionsDAGPtr & expression, bool remove_filter, bool overflow_row, bool final);
void executeHaving(QueryPlan & query_plan, const ActionsDAGPtr & expression, bool remove_filter);
static void executeExpression(QueryPlan & query_plan, const ActionsDAGPtr & expression, const std::string & description);
/// FIXME should go through ActionsDAG to behave as a proper function
void executeWindow(QueryPlan & query_plan);

View File

@ -30,6 +30,7 @@ TotalsHavingStep::TotalsHavingStep(
bool overflow_row_,
const ActionsDAGPtr & actions_dag_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_)
@ -38,11 +39,14 @@ TotalsHavingStep::TotalsHavingStep(
TotalsHavingTransform::transformHeader(
input_stream_.header,
actions_dag_.get(),
filter_column_,
remove_filter_,
final_),
getTraits(!filter_column_.empty()))
, overflow_row(overflow_row_)
, actions_dag(actions_dag_)
, filter_column_name(filter_column_)
, remove_filter(remove_filter_)
, totals_mode(totals_mode_)
, auto_include_threshold(auto_include_threshold_)
, final(final_)
@ -58,6 +62,7 @@ void TotalsHavingStep::transformPipeline(QueryPipelineBuilder & pipeline, const
overflow_row,
expression_actions,
filter_column_name,
remove_filter,
totals_mode,
auto_include_threshold,
final);
@ -85,7 +90,10 @@ static String totalsModeToString(TotalsMode totals_mode, double auto_include_thr
void TotalsHavingStep::describeActions(FormatSettings & settings) const
{
String prefix(settings.offset, ' ');
settings.out << prefix << "Filter column: " << filter_column_name << '\n';
settings.out << prefix << "Filter column: " << filter_column_name;
if (remove_filter)
settings.out << " (removed)";
settings.out << '\n';
settings.out << prefix << "Mode: " << totalsModeToString(totals_mode, auto_include_threshold) << '\n';
if (actions_dag)

View File

@ -18,6 +18,7 @@ public:
bool overflow_row_,
const ActionsDAGPtr & actions_dag_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_);
@ -35,6 +36,7 @@ private:
bool overflow_row;
ActionsDAGPtr actions_dag;
String filter_column_name;
bool remove_filter;
TotalsMode totals_mode;
double auto_include_threshold;
bool final;

View File

@ -28,13 +28,22 @@ void finalizeChunk(Chunk & chunk)
chunk.setColumns(std::move(columns), num_rows);
}
Block TotalsHavingTransform::transformHeader(Block block, const ActionsDAG * expression, bool final)
Block TotalsHavingTransform::transformHeader(
Block block,
const ActionsDAG * expression,
const std::string & filter_column_name,
bool remove_filter,
bool final)
{
if (final)
finalizeBlock(block);
if (expression)
{
block = expression->updateHeader(std::move(block));
if (remove_filter)
block.erase(filter_column_name);
}
return block;
}
@ -44,20 +53,19 @@ TotalsHavingTransform::TotalsHavingTransform(
bool overflow_row_,
const ExpressionActionsPtr & expression_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_)
: ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, final_), true)
: ISimpleTransform(header, transformHeader(header, expression_ ? &expression_->getActionsDAG() : nullptr, filter_column_, remove_filter_, final_), true)
, overflow_row(overflow_row_)
, expression(expression_)
, filter_column_name(filter_column_)
, remove_filter(remove_filter_)
, totals_mode(totals_mode_)
, auto_include_threshold(auto_include_threshold_)
, final(final_)
{
if (!filter_column_name.empty())
filter_column_pos = outputs.front().getHeader().getPositionByName(filter_column_name);
finalized_header = getInputPort().getHeader();
finalizeBlock(finalized_header);
@ -67,10 +75,17 @@ TotalsHavingTransform::TotalsHavingTransform(
auto totals_header = finalized_header;
size_t num_rows = totals_header.rows();
expression->execute(totals_header, num_rows);
filter_column_pos = totals_header.getPositionByName(filter_column_name);
if (remove_filter)
totals_header.erase(filter_column_name);
outputs.emplace_back(totals_header, this);
}
else
{
if (!filter_column_name.empty())
filter_column_pos = finalized_header.getPositionByName(filter_column_name);
outputs.emplace_back(finalized_header, this);
}
/// Initialize current totals with initial state.
current_totals.reserve(header.columns());
@ -167,9 +182,11 @@ void TotalsHavingTransform::transform(Chunk & chunk)
}
expression->execute(finalized_block, num_rows);
ColumnPtr filter_column_ptr = finalized_block.getByPosition(filter_column_pos).column;
if (remove_filter)
finalized_block.erase(filter_column_name);
auto columns = finalized_block.getColumns();
ColumnPtr filter_column_ptr = columns[filter_column_pos];
ConstantFilterDescription const_filter_description(*filter_column_ptr);
if (const_filter_description.always_true)
@ -270,6 +287,8 @@ void TotalsHavingTransform::prepareTotals()
size_t num_rows = totals.getNumRows();
auto block = finalized_header.cloneWithColumns(totals.detachColumns());
expression->execute(block, num_rows);
if (remove_filter)
block.erase(filter_column_name);
/// Note: after expression totals may have several rows if `arrayJoin` was used in expression.
totals = Chunk(block.getColumns(), num_rows);
}

View File

@ -28,6 +28,7 @@ public:
bool overflow_row_,
const ExpressionActionsPtr & expression_,
const std::string & filter_column_,
bool remove_filter_,
TotalsMode totals_mode_,
double auto_include_threshold_,
bool final_);
@ -39,7 +40,7 @@ public:
Status prepare() override;
void work() override;
static Block transformHeader(Block block, const ActionsDAG * expression, bool final);
static Block transformHeader(Block block, const ActionsDAG * expression, const std::string & filter_column_name, bool remove_filter, bool final);
protected:
void transform(Chunk & chunk) override;
@ -55,6 +56,7 @@ private:
bool overflow_row;
ExpressionActionsPtr expression;
String filter_column_name;
bool remove_filter;
TotalsMode totals_mode;
double auto_include_threshold;
bool final;

View File

@ -0,0 +1,40 @@
drop table if exists test;
-- #29010
CREATE TABLE test
(
d DateTime,
a String,
b UInt64
)
ENGINE = MergeTree
PARTITION BY toDate(d)
ORDER BY d;
SELECT *
FROM (
SELECT
a,
max((d, b)).2 AS value
FROM test
GROUP BY rollup(a)
)
WHERE a <> '';
-- the same query, but after syntax optimization
SELECT
a,
value
FROM
(
SELECT
a,
max((d, b)).2 AS value
FROM test
GROUP BY a
WITH ROLLUP
HAVING a != ''
)
WHERE a != '';
drop table if exists test;