Avoid removing columns used in sample expression in prewhere actions. [#CLICKHOUSE-3969]

This commit is contained in:
Nikolai Kochetov 2018-09-07 18:13:08 +03:00
parent 8200827723
commit 6e41be3476
8 changed files with 55 additions and 6 deletions

View File

@ -2668,13 +2668,17 @@ bool ExpressionAnalyzer::appendJoin(ExpressionActionsChain & chain, bool only_ty
return true;
}
bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool only_types)
bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool only_types, const ASTPtr & sampling_expression)
{
assertSelect();
if (!select_query->prewhere_expression)
return false;
Names required_sample_columns;
if (sampling_expression)
required_sample_columns = ExpressionAnalyzer(sampling_expression, context, nullptr, source_columns).getRequiredSourceColumns();
initChain(chain, source_columns);
auto & step = chain.getLastStep();
getRootActions(select_query->prewhere_expression, only_types, false, step.actions);
@ -2682,6 +2686,15 @@ bool ExpressionAnalyzer::appendPrewhere(ExpressionActionsChain & chain, bool onl
step.required_output.push_back(prewhere_column_name);
step.can_remove_required_output.push_back(true);
/// Add required columns for sample expression to required output in order not to remove them after
/// prewhere execution because sampling is executed after prewhere.
/// TODO: add sampling execution to common chain.
for (const auto & column : required_sample_columns)
{
step.required_output.push_back(column);
step.can_remove_required_output.push_back(true);
}
{
/// Remove unused source_columns from prewhere actions.
auto tmp_actions = std::make_shared<ExpressionActions>(source_columns, context);

View File

@ -142,7 +142,8 @@ public:
bool appendArrayJoin(ExpressionActionsChain & chain, bool only_types);
bool appendJoin(ExpressionActionsChain & chain, bool only_types);
/// remove_filter is set in ExpressionActionsChain::finalize();
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types);
/// sampling_expression is needed if sampling is used in order to not remove columns are used in it.
bool appendPrewhere(ExpressionActionsChain & chain, bool only_types, const ASTPtr & sampling_expression);
bool appendWhere(ExpressionActionsChain & chain, bool only_types);
bool appendGroupBy(ExpressionActionsChain & chain, bool only_types);
void appendAggregateFunctionsArguments(ExpressionActionsChain & chain, bool only_types);

View File

@ -47,6 +47,7 @@
#include <Common/typeid_cast.h>
#include <Parsers/queryToString.h>
#include <ext/map.h>
#include <memory>
namespace DB
@ -63,6 +64,7 @@ namespace ErrorCodes
extern const int LOGICAL_ERROR;
extern const int NOT_IMPLEMENTED;
extern const int PARAMETER_OUT_OF_BOUND;
extern const int ARGUMENT_OUT_OF_BOUND;
}
InterpreterSelectQuery::InterpreterSelectQuery(
@ -279,7 +281,6 @@ BlockInputStreams InterpreterSelectQuery::executeWithMultipleStreams()
return pipeline.streams;
}
InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpressions(QueryProcessingStage::Enum from_stage, bool dry_run)
{
AnalysisResult res;
@ -305,7 +306,27 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
chain.finalize();
if (has_prewhere)
res.prewhere_info->remove_prewhere_column = chain.steps.at(0).can_remove_required_output.at(0);
{
const ExpressionActionsChain::Step & step = chain.steps.at(0);
res.prewhere_info->remove_prewhere_column = step.can_remove_required_output.at(0);
Names columns_to_remove_after_sampling;
for (size_t i = 1; i < step.required_output.size(); ++i)
{
if (step.can_remove_required_output[i])
columns_to_remove_after_sampling.push_back(step.required_output[i]);
}
if (!columns_to_remove_after_sampling.empty())
{
auto columns = res.prewhere_info->prewhere_actions->getSampleBlock().getNamesAndTypesList();
ExpressionActionsPtr actions = std::make_shared<ExpressionActions>(columns, context);
for (const auto & column : columns_to_remove_after_sampling)
actions->add(ExpressionAction::removeColumn(column));
res.prewhere_info->after_sampling_actions = std::move(actions);
}
}
if (has_where)
res.remove_where_filter = chain.steps.at(where_step_num).can_remove_required_output.at(0);
@ -317,7 +338,8 @@ InterpreterSelectQuery::AnalysisResult InterpreterSelectQuery::analyzeExpression
{
ExpressionActionsChain chain(context);
if (query_analyzer->appendPrewhere(chain, !res.first_stage))
ASTPtr sampling_expression = storage ? storage->getSamplingExpression() : nullptr;
if (query_analyzer->appendPrewhere(chain, !res.first_stage, sampling_expression))
{
has_prewhere = true;

View File

@ -343,6 +343,9 @@ public:
/// Returns data path if storage supports it, empty string otherwise.
virtual String getDataPath() const { return {}; }
/// Returns sampling expression for storage or nullptr if there is no.
virtual ASTPtr getSamplingExpression() const { return nullptr; }
protected:
using ITableDeclaration::ITableDeclaration;
using std::enable_shared_from_this<IStorage>::shared_from_this;

View File

@ -593,6 +593,10 @@ BlockInputStreams MergeTreeDataSelectExecutor::readFromParts(
stream = std::make_shared<AddingConstColumnBlockInputStream<Float64>>(
stream, std::make_shared<DataTypeFloat64>(), used_sample_factor, "_sample_factor");
if (query_info.prewhere_info && query_info.prewhere_info->after_sampling_actions)
for (auto & stream : res)
stream = std::make_shared<ExpressionBlockInputStream>(stream, query_info.prewhere_info->after_sampling_actions);
return res;
}

View File

@ -21,10 +21,12 @@ using PreparedSets = std::unordered_map<StringRange, SetPtr, StringRangePointers
struct PrewhereInfo
{
/// Ections which are executed in order to alias columns are used for prewhere actions.
/// Actions which are executed in order to alias columns are used for prewhere actions.
ExpressionActionsPtr alias_actions;
/// Actions which are executed on block in order to get filter column for prewhere step.
ExpressionActionsPtr prewhere_actions;
/// Actions which are executed after sampling in order to remove unused columns.
ExpressionActionsPtr after_sampling_actions;
String prewhere_column_name;
bool remove_prewhere_column = false;

View File

@ -94,6 +94,8 @@ public:
String getDataPath() const override { return full_path; }
ASTPtr getSamplingExpression() const override { return data.sampling_expression; }
private:
String path;
String database_name;

View File

@ -193,6 +193,8 @@ public:
String getDataPath() const override { return full_path; }
ASTPtr getSamplingExpression() const override { return data.sampling_expression; }
private:
/// Delete old parts from disk and from ZooKeeper.
void clearOldPartsAndRemoveFromZK();