mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Avoid removing columns used in sample expression in prewhere actions. [#CLICKHOUSE-3969]
This commit is contained in:
parent
8200827723
commit
6e41be3476
@ -2668,13 +2668,17 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool only_types)
|
||||
bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool only_types, const ASTPtr & sampling_expression)
|
||||
{
|
||||
assertSelect();
|
||||
|
||||
if (!select_query->prewhere_expression)
|
||||
return false;
|
||||
|
||||
Names required_sample_columns;
|
||||
if (sampling_expression)
|
||||
required_sample_columns = ExpressionAnalyzer(sampling_expression, context, nullptr, source_columns).getRequiredSourceColumns();
|
||||
|
||||
initChain(chain, source_columns);
|
||||
auto & step = chain.getLastStep();
|
||||
getRootActions(select_query->prewhere_expression, only_types, false, step.actions);
|
||||
@ -2682,6 +2686,15 @@ bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool onl
|
||||
step.required_output.push_back(prewhere_column_name);
|
||||
step.can_remove_required_output.push_back(true);
|
||||
|
||||
/// Add required columns for sample expression to required output in order not to remove them after
|
||||
/// prewhere execution because sampling is executed after prewhere.
|
||||
/// TODO: add sampling execution to common chain.
|
||||
for (const auto & column : required_sample_columns)
|
||||
{
|
||||
step.required_output.push_back(column);
|
||||
step.can_remove_required_output.push_back(true);
|
||||
}
|
||||
|
||||
{
|
||||
/// Remove unused source_columns from prewhere actions.
|
||||
auto tmp_actions = std::make_shared<ExpressionActions>(source_columns, context);
|
||||
|
@ -142,7 +142,8 @@ public:
|
||||
bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
|
||||
bool appendJoin(ExpressionActionsChain & chain, bool only_types);
|
||||
/// remove_filter is set in ExpressionActionsChain::finalize();
|
||||
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types);
|
||||
/// sampling_expression is needed if sampling is used in order to not remove columns are used in it.
|
||||
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const ASTPtr & sampling_expression);
|
||||
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
|
||||
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
|
||||
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);
|
||||
|
@ -47,6 +47,7 @@
|
||||
#include <Common/typeid_cast.h>
|
||||
#include <Parsers/queryToString.h>
|
||||
#include <ext/map.h>
|
||||
#include <memory>
|
||||
|
||||
|
||||
namespace DB
|
||||
@ -63,6 +64,7 @@ namespace ErrorCodes
|
||||
extern const int LOGICAL_ERROR;
|
||||
extern const int NOT_IMPLEMENTED;
|
||||
extern const int PARAMETER_OUT_OF_BOUND;
|
||||
extern const int ARGUMENT_OUT_OF_BOUND;
|
||||
}
|
||||
|
||||
InterpreterSelectQuery::InterpreterSelectQuery(
|
||||
@ -279,7 +281,6 @@ BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
|
||||
return pipeline.streams;
|
||||
}
|
||||
|
||||
|
||||
InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage, bool dry_run)
|
||||
{
|
||||
AnalysisResult res;
|
||||
@ -305,7 +306,27 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
|
||||
chain.finalize();
|
||||
|
||||
if (has_prewhere)
|
||||
res.prewhere_info->remove_prewhere_column = chain.steps.at(0).can_remove_required_output.at(0);
|
||||
{
|
||||
const ExpressionActionsChain::Step & step = chain.steps.at(0);
|
||||
res.prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0);
|
||||
|
||||
Names columns_to_remove_after_sampling;
|
||||
for (size_t i = 1; i < step.required_output.size(); ++i)
|
||||
{
|
||||
if (step.can_remove_required_output[i])
|
||||
columns_to_remove_after_sampling.push_back(step.required_output[i]);
|
||||
}
|
||||
|
||||
if (!columns_to_remove_after_sampling.empty())
|
||||
{
|
||||
auto columns = res.prewhere_info->prewhere_actions->getSampleBlock().getNamesAndTypesList();
|
||||
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(columns, context);
|
||||
for (const auto & column : columns_to_remove_after_sampling)
|
||||
actions->add(ExpressionAction::removeColumn(column));
|
||||
|
||||
res.prewhere_info->after_sampling_actions = std::move(actions);
|
||||
}
|
||||
}
|
||||
if (has_where)
|
||||
res.remove_where_filter = chain.steps.at(where_step_num).can_remove_required_output.at(0);
|
||||
|
||||
@ -317,7 +338,8 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
|
||||
{
|
||||
ExpressionActionsChain chain(context);
|
||||
|
||||
if (query_analyzer->appendPrewhere(chain, !res.first_stage))
|
||||
ASTPtr sampling_expression = storage ? storage->getSamplingExpression() : nullptr;
|
||||
if (query_analyzer->appendPrewhere(chain, !res.first_stage, sampling_expression))
|
||||
{
|
||||
has_prewhere = true;
|
||||
|
||||
|
@ -343,6 +343,9 @@ public:
|
||||
/// Returns data path if storage supports it, empty string otherwise.
|
||||
virtual String getDataPath() const { return {}; }
|
||||
|
||||
/// Returns sampling expression for storage or nullptr if there is no.
|
||||
virtual ASTPtr getSamplingExpression() const { return nullptr; }
|
||||
|
||||
protected:
|
||||
using ITableDeclaration::ITableDeclaration;
|
||||
using std::enable_shared_from_this<IStorage>::shared_from_this;
|
||||
|
@ -593,6 +593,10 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
|
||||
stream = std::make_shared<AddingConstColumnBlockInputStream<Float64>>(
|
||||
stream, std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor");
|
||||
|
||||
if (query_info.prewhere_info && query_info.prewhere_info->after_sampling_actions)
|
||||
for (auto & stream : res)
|
||||
stream = std::make_shared<ExpressionBlockInputStream>(stream, query_info.prewhere_info->after_sampling_actions);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
|
@ -21,10 +21,12 @@ using PreparedSets = std::unordered_map<StringRange, SetPtr, StringRangePointers
|
||||
|
||||
struct PrewhereInfo
|
||||
{
|
||||
/// Ections which are executed in order to alias columns are used for prewhere actions.
|
||||
/// Actions which are executed in order to alias columns are used for prewhere actions.
|
||||
ExpressionActionsPtr alias_actions;
|
||||
/// Actions which are executed on block in order to get filter column for prewhere step.
|
||||
ExpressionActionsPtr prewhere_actions;
|
||||
/// Actions which are executed after sampling in order to remove unused columns.
|
||||
ExpressionActionsPtr after_sampling_actions;
|
||||
String prewhere_column_name;
|
||||
bool remove_prewhere_column = false;
|
||||
|
||||
|
@ -94,6 +94,8 @@ public:
|
||||
|
||||
String getDataPath() const override { return full_path; }
|
||||
|
||||
ASTPtr getSamplingExpression() const override { return data.sampling_expression; }
|
||||
|
||||
private:
|
||||
String path;
|
||||
String database_name;
|
||||
|
@ -193,6 +193,8 @@ public:
|
||||
|
||||
String getDataPath() const override { return full_path; }
|
||||
|
||||
ASTPtr getSamplingExpression() const override { return data.sampling_expression; }
|
||||
|
||||
private:
|
||||
/// Delete old parts from disk and from ZooKeeper.
|
||||
void clearOldPartsAndRemoveFromZK();
|
||||
|
Loading…
Reference in New Issue
Block a user