Try use expression for KeyCondition from query plan.

This commit is contained in:
Nikolai Kochetov 2021-06-22 16:54:00 +03:00
parent 21e39e10ea
commit 47f130d39c
7 changed files with 104 additions and 10 deletions

View File

@ -473,6 +473,7 @@ class IColumn;
M(Bool, query_plan_enable_optimizations, true, "Apply optimizations to query plan", 0) \
M(UInt64, query_plan_max_optimizations_to_apply, 10000, "Limit the total number of optimizations applied to query plan. If zero, ignored. If limit reached, throw exception", 0) \
M(Bool, query_plan_filter_push_down, true, "Allow to push down filter by predicate query plan step", 0) \
M(Bool, query_plan_optimize_primary_key, true, "Analyze primary key using query plan (instead of AST)", 0) \
\
M(UInt64, limit, 0, "Limit on read rows from the most 'end' result for select query, default 0 means no limit length", 0) \
M(UInt64, offset, 0, "Offset on read rows from the most 'end' result for select query", 0) \

View File

@ -12,6 +12,8 @@ namespace QueryPlanOptimizations
/// This is the main function which optimizes the whole QueryPlan tree.
void optimizeTree(const QueryPlanOptimizationSettings & settings, QueryPlan::Node & root, QueryPlan::Nodes & nodes);
void optimizePrimaryKeyCondition(QueryPlan::Node & root);
/// Optimization is a function applied to QueryPlan::Node.
/// It can read and update subtree of specified node.
/// It return the number of updated layers of subtree if some change happened.

View File

@ -0,0 +1,53 @@
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/FilterStep.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Processors/QueryPlan/SettingQuotaAndLimitsStep.h>
#include <Interpreters/ActionsDAG.h>
#include <stack>
namespace DB::QueryPlanOptimizations
{
void optimizePrimaryKeyCondition(QueryPlan::Node & root)
{
struct Frame
{
QueryPlan::Node * node = nullptr;
size_t next_child = 0;
};
std::stack<Frame> stack;
stack.push({.node = &root});
while (!stack.empty())
{
auto & frame = stack.top();
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
stack.push({.node = frame.node->children[frame.next_child]});
++frame.next_child;
continue;
}
if (auto * filter_step = typeid_cast<FilterStep *>(frame.node->step.get()))
{
auto * child = frame.node->children.at(0);
if (typeid_cast<SettingQuotaAndLimitsStep *>(child->step.get()))
{
auto * child_child = child->children.at(0);
if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(child_child->step.get()))
read_from_merge_tree->addFilter(filter_step->getExpression(), filter_step->getFilterColumnName());
}
else if (auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(child->step.get()))
read_from_merge_tree->addFilter(filter_step->getExpression(), filter_step->getFilterColumnName());
}
stack.pop();
}
}
}

View File

@ -432,6 +432,7 @@ void QueryPlan::explainPipeline(WriteBuffer & buffer, const ExplainPipelineOptio
void QueryPlan::optimize(const QueryPlanOptimizationSettings & optimization_settings)
{
QueryPlanOptimizations::optimizeTree(optimization_settings, *root, nodes);
QueryPlanOptimizations::optimizePrimaryKeyCondition(*root);
}
}

View File

@ -788,16 +788,38 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::selectRangesToRead(MergeTre
// Build and check if primary key is used when necessary
const auto & primary_key = metadata_snapshot->getPrimaryKey();
Names primary_key_columns = primary_key.column_names;
KeyCondition key_condition(query_info.query, query_info.syntax_analyzer_result, query_info.sets, context, primary_key_columns, primary_key.expression);
std::optional<KeyCondition> key_condition;
if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue())
if (settings.query_plan_optimize_primary_key)
{
ActionDAGNodes nodes;
if (prewhere_info)
{
const auto & node = prewhere_info->prewhere_actions->getActionsDAG().findInIndex(prewhere_info->prewhere_column_name);
nodes.nodes.push_back(&node);
}
if (added_filter)
{
const auto & node = added_filter->findInIndex(added_filter_column_name);
nodes.nodes.push_back(&node);
}
key_condition.emplace(std::move(nodes), query_info.sets, context, primary_key_columns, primary_key.expression);
}
else
{
key_condition.emplace(query_info.query, query_info.syntax_analyzer_result, query_info.sets, context, primary_key_columns, primary_key.expression);
}
if (settings.force_primary_key && key_condition->alwaysUnknownOrTrue())
{
throw Exception(
ErrorCodes::INDEX_NOT_USED,
"Primary key ({}) is not used and setting 'force_primary_key' is set.",
fmt::join(primary_key_columns, ", "));
}
LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
LOG_DEBUG(log, "Key condition: {}", key_condition->toString());
const auto & select = query_info.query->as<ASTSelectQuery &>();
@ -806,7 +828,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::selectRangesToRead(MergeTre
max_block_numbers_to_read.get(), log, result.index_stats);
result.sampling = MergeTreeDataSelectExecutor::getSampling(
select, metadata_snapshot->getColumns().getAllPhysical(), parts, key_condition,
select, metadata_snapshot->getColumns().getAllPhysical(), parts, *key_condition,
data, metadata_snapshot, context, sample_factor_column_queried, log);
if (result.sampling.read_nothing)
@ -823,7 +845,7 @@ ReadFromMergeTree::AnalysisResult ReadFromMergeTree::selectRangesToRead(MergeTre
metadata_snapshot,
query_info,
context,
key_condition,
*key_condition,
reader_settings,
log,
requested_num_streams,

View File

@ -80,6 +80,12 @@ public:
void describeActions(JSONBuilder::JSONMap & map) const override;
void describeIndexes(JSONBuilder::JSONMap & map) const override;
void addFilter(ActionsDAGPtr expression, std::string column_name)
{
added_filter = std::move(expression);
added_filter_column_name = std::move(column_name);
}
private:
const MergeTreeReaderSettings reader_settings;
@ -91,6 +97,9 @@ private:
SelectQueryInfo query_info;
PrewhereInfoPtr prewhere_info;
ActionsDAGPtr added_filter;
std::string added_filter_column_name;
StorageMetadataPtr metadata_snapshot;
StorageMetadataPtr metadata_snapshot_base;

View File

@ -201,7 +201,7 @@ public:
if (ast)
return typeid_cast<const ASTLiteral *>(ast);
else
return dag->type == ActionsDAG::ActionType::COLUMN;
return dag->column && isColumnConst(*dag->column);
}
ColumnWithTypeAndName getConstant() const
@ -310,10 +310,16 @@ public:
{
if (dag->column)
{
const auto * col_set = typeid_cast<const ColumnSet *>(dag->column.get());
auto set = col_set->getData();
if (set->isCreated())
return set;
const IColumn * col = dag->column.get();
if (const auto * col_const = typeid_cast<const ColumnConst *>(col))
col = &col_const->getDataColumn();
if (const auto * col_set = typeid_cast<const ColumnSet *>(col))
{
auto set = col_set->getData();
if (set->isCreated())
return set;
}
}
}