Merge pull request #58554 from ClickHouse/try-to-always-push-down-prewhere-from-query-plan

Simplify optimize-push-to-prewhere from query plan
This commit is contained in:
Nikolai Kochetov 2024-02-12 12:52:39 +01:00 committed by GitHub
commit 09d3b3c0d0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
21 changed files with 488 additions and 496 deletions

View File

@ -1645,7 +1645,7 @@ void ActionsDAG::mergeNodes(ActionsDAG && second)
}
}
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping) const
{
/// Split DAG into two parts.
/// (first_nodes, first_outputs) is a part which will have split_list in result.
@ -1779,13 +1779,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
}
/// Input from second DAG should also be in the first.
if (copy.type == ActionType::INPUT)
{
auto & input_copy = first_nodes.emplace_back(*cur.node);
assert(cur_data.to_first == nullptr);
cur_data.to_first = &input_copy;
new_inputs.push_back(cur.node);
}
// if (copy.type == ActionType::INPUT)
// {
// auto & input_copy = first_nodes.emplace_back(*cur.node);
// assert(cur_data.to_first == nullptr);
// cur_data.to_first = &input_copy;
// new_inputs.push_back(cur.node);
// }
}
else
{
@ -1804,11 +1804,12 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
/// If this node is needed in result, add it as input.
Node input_node;
input_node.type = ActionType::INPUT;
input_node.result_type = node.result_type;
input_node.result_name = node.result_name;
input_node.result_type = cur.node->result_type;
input_node.result_name = cur.node->result_name;
cur_data.to_second = &second_nodes.emplace_back(std::move(input_node));
new_inputs.push_back(cur.node);
if (cur.node->type != ActionType::INPUT)
new_inputs.push_back(cur.node);
}
}
}
@ -1824,14 +1825,29 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
for (const auto * input_node : inputs)
{
const auto & cur = data[input_node];
first_inputs.push_back(cur.to_first);
if (cur.to_first)
{
first_inputs.push_back(cur.to_first);
if (cur.to_second)
first_outputs.push_back(cur.to_first);
}
}
for (const auto * input : new_inputs)
{
const auto & cur = data[input];
second_inputs.push_back(cur.to_second);
first_outputs.push_back(cur.to_first);
if (cur.to_second)
second_inputs.push_back(cur.to_second);
if (cur.to_first)
first_outputs.push_back(cur.to_first);
}
for (const auto * input_node : inputs)
{
const auto & cur = data[input_node];
if (cur.to_second)
second_inputs.push_back(cur.to_second);
}
auto first_actions = std::make_shared<ActionsDAG>();
@ -1844,7 +1860,14 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
second_actions->outputs.swap(second_outputs);
second_actions->inputs.swap(second_inputs);
return {std::move(first_actions), std::move(second_actions)};
std::unordered_map<const Node *, const Node *> split_nodes_mapping;
if (create_split_nodes_mapping)
{
for (const auto * node : split_nodes)
split_nodes_mapping[node] = data[node].to_first;
}
return {std::move(first_actions), std::move(second_actions), std::move(split_nodes_mapping)};
}
ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const

View File

@ -327,13 +327,18 @@ public:
/// Merge current nodes with specified dag nodes
void mergeNodes(ActionsDAG && second);
using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
struct SplitResult
{
ActionsDAGPtr first;
ActionsDAGPtr second;
std::unordered_map<const Node *, const Node *> split_nodes_mapping;
};
/// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
/// Execution of first then second parts on block is equivalent to execution of initial DAG.
/// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal outputs.
/// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
SplitResult split(std::unordered_set<const Node *> split_nodes) const;
SplitResult split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping = false) const;
/// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;

View File

@ -66,7 +66,7 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
NameSet sort_columns;
for (const auto & col : sorting_step->getSortDescription())
sort_columns.insert(col.column_name);
auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
// No calculations can be postponed.
if (unneeded_for_sorting->trivial())

View File

@ -5,68 +5,35 @@
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
#include <Interpreters/ActionsDAG.h>
#include <Planner/ActionsChain.h>
#include <deque>
#include "Functions/FunctionsLogical.h"
#include "Functions/IFunctionAdaptors.h"
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
namespace
{
void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header)
{
std::unordered_map<std::string, const ActionsDAG::Node *> output_name_to_node;
for (const auto * output_node : actions_dag->getOutputs())
output_name_to_node.emplace(output_node->result_name, output_node);
std::unordered_set<const ActionsDAG::Node *> used_output_nodes;
ActionsDAG::NodeRawConstPtrs updated_outputs;
updated_outputs.reserve(expected_header.columns());
for (const auto & column : expected_header)
{
auto output_node_it = output_name_to_node.find(column.name);
if (output_node_it == output_name_to_node.end())
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Invalid move to PREWHERE optimization. Cannot find column {} in output",
column.name);
updated_outputs.push_back(output_node_it->second);
used_output_nodes.insert(output_node_it->second);
}
ActionsDAG::NodeRawConstPtrs unused_outputs;
for (const auto * output_node : actions_dag->getOutputs())
{
if (used_output_nodes.contains(output_node))
continue;
unused_outputs.push_back(output_node);
}
auto & actions_dag_outputs = actions_dag->getOutputs();
actions_dag_outputs = std::move(updated_outputs);
actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end());
}
}
namespace QueryPlanOptimizations
{
void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
static void removeFromOutput(ActionsDAG & dag, const std::string name)
{
const auto * node = &dag.findInOutputs(name);
auto & outputs = dag.getOutputs();
for (size_t i = 0; i < outputs.size(); ++i)
{
if (node == outputs[i])
{
outputs.erase(outputs.begin() + i);
return;
}
}
}
void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
{
if (stack.size() < 3)
return;
const auto & frame = stack.back();
auto & frame = stack.back();
/** Assume that on stack there are at least 3 nodes:
*
@ -82,60 +49,26 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
if (storage_prewhere_info && storage_prewhere_info->prewhere_actions)
return;
const QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
const auto * filter_step = typeid_cast<FilterStep *>(filter_node->step.get());
if (!filter_step)
return;
/** Collect required filter output columns.
* Collect output nodes that are mapped to input nodes.
* Collect input node to output nodes mapping.
*/
ColumnsWithTypeAndName required_columns_after_filter;
std::unordered_set<std::string> output_nodes_mapped_to_input;
std::unordered_map<std::string, std::vector<std::string>> input_node_to_output_names;
for (const auto * output_node : filter_step->getExpression()->getOutputs())
{
const auto * node_without_alias = output_node;
while (node_without_alias->type == ActionsDAG::ActionType::ALIAS)
node_without_alias = node_without_alias->children[0];
if (node_without_alias->type == ActionsDAG::ActionType::INPUT)
{
output_nodes_mapped_to_input.emplace(output_node->result_name);
auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name);
if (output_names_it == input_node_to_output_names.end())
{
auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector<std::string>());
output_names_it = insert_it;
}
output_names_it->second.push_back(output_node->result_name);
}
if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn())
continue;
required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name));
}
const auto & context = read_from_merge_tree->getContext();
const auto & settings = context->getSettingsRef();
if (!settings.allow_experimental_analyzer)
return;
const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal();
bool is_final = read_from_merge_tree->isQueryWithFinal();
bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
if (!optimize_move_to_prewhere)
return;
const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot();
if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio())
ColumnsWithTypeAndName required_columns_after_filter;
if (read_from_merge_tree->isQueryWithSampling())
{
const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey();
const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes();
@ -170,7 +103,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
filter_step->getFilterColumnName(),
read_from_merge_tree->getContext(),
is_final);
if (!optimize_result.has_value())
if (optimize_result.prewhere_nodes.empty())
return;
PrewhereInfoPtr prewhere_info;
@ -181,201 +115,85 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
prewhere_info->need_filter = true;
auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
auto filter_expression = filter_step->getExpression();
const auto & filter_column_name = filter_step->getFilterColumnName();
ActionsChain actions_chain;
std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name;
actions_chain.addStep(std::make_unique<ActionsChainStep>(prewhere_filter_actions));
auto & filter_actions = optimize_result->filter_actions;
/** Merge tree where optimizer splits conjunctions in filter expression into 2 parts:
* 1. Filter expressions.
* 2. Prewhere filter expressions.
*
* There can be cases when all expressions are moved to PREWHERE, but it is not
* enough to produce required filter output columns.
*
* Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c;
* In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`.
* It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions.
*
* In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE.
*/
bool need_additional_filter_after_prewhere = false;
if (!filter_actions)
if (optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn())
{
/// Any node from PREWHERE filter actions can be used as possible output node
std::unordered_set<std::string> possible_prewhere_output_nodes;
for (const auto & node : prewhere_filter_actions->getNodes())
possible_prewhere_output_nodes.insert(node.result_name);
removeFromOutput(*filter_expression, filter_column_name);
auto & outputs = filter_expression->getOutputs();
size_t size = outputs.size();
outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end());
filter_expression->removeUnusedActions(false);
outputs.resize(size);
}
for (auto & required_column : required_columns_after_filter)
auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
/// This is the leak of abstraction.
/// Splited actions may have inputs which are needed only for PREWHERE.
/// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation.
///
/// See 00950_default_prewhere for example.
/// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1`
/// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE
/// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed).
/// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value.
/// If column SessionType is removed by PREWHERE actions, we use zero as default, and get a wrong result.
///
/// So, here we restore removed inputs for PREWHERE actions
{
std::unordered_set<const ActionsDAG::Node *> first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end());
for (const auto * input : split_result.first->getInputs())
{
if (!possible_prewhere_output_nodes.contains(required_column.name) &&
!output_nodes_mapped_to_input.contains(required_column.name))
if (!first_outputs.contains(input))
{
need_additional_filter_after_prewhere = true;
break;
split_result.first->getOutputs().push_back(input);
/// Add column to second actions as input.
/// Do not add it to result, so it would be removed.
split_result.second->addInput(input->result_name, input->result_type);
}
}
}
/** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter
* actions output columns as filter actions dag input columns.
* Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from
* PREWHERE filter actions.
*/
if (need_additional_filter_after_prewhere || filter_actions)
ActionsDAG::NodeRawConstPtrs conditions;
conditions.reserve(split_result.split_nodes_mapping.size());
for (const auto * condition : optimize_result.prewhere_nodes)
conditions.push_back(split_result.split_nodes_mapping.at(condition));
prewhere_info->prewhere_actions = std::move(split_result.first);
prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
if (conditions.size() == 1)
{
auto merged_filter_actions = std::make_shared<ActionsDAG>(actions_chain.getLastStepAvailableOutputColumns());
merged_filter_actions->getOutputs().clear();
merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone()));
/// Add old filter step filter column to outputs
for (const auto & node : merged_filter_actions->getNodes())
{
if (node.result_name == filter_step->getFilterColumnName())
{
merged_filter_actions->getOutputs().push_back(&node);
break;
}
}
filter_actions = std::move(merged_filter_actions);
/// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage
prewhere_info->need_filter = false;
actions_chain.addStep(std::make_unique<ActionsChainStep>(filter_actions));
}
auto required_output_actions = std::make_shared<ActionsDAG>(required_columns_after_filter);
actions_chain.addStep(std::make_unique<ActionsChainStep>(required_output_actions));
actions_chain.finalize();
prewhere_filter_actions->projectInput(false);
auto & prewhere_actions_chain_node = actions_chain[0];
prewhere_info->prewhere_actions = std::move(prewhere_filter_actions);
prewhere_info->prewhere_column_name = prewere_filter_node_name;
prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name);
read_from_merge_tree->updatePrewhereInfo(prewhere_info);
QueryPlan::Node * replace_old_filter_node = nullptr;
bool remove_filter_node = false;
if (filter_actions)
{
filter_actions->projectInput(false);
/// Match dag output nodes with old filter step header
matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header);
auto & filter_actions_chain_node = actions_chain[1];
bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName());
auto after_prewhere_filter_step = std::make_unique<FilterStep>(read_from_merge_tree->getOutputStream(),
filter_actions,
filter_step->getFilterColumnName(),
remove_filter_column);
auto & node = nodes.emplace_back();
node.children.emplace_back(frame.node);
node.step = std::move(after_prewhere_filter_step);
replace_old_filter_node = &node;
prewhere_info->prewhere_column_name = conditions.front()->result_name;
prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front());
}
else
{
auto rename_actions_dag = std::make_shared<ActionsDAG>(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName());
bool apply_rename_step = false;
prewhere_info->remove_prewhere_column = true;
ActionsDAG::NodeRawConstPtrs updated_outputs;
/** If in output after read from merge tree there are column names without aliases,
* apply old filter step aliases to them.
*/
for (const auto * output_node : rename_actions_dag->getOutputs())
{
const auto alias_it = input_node_to_output_names.find(output_node->result_name);
if (alias_it == input_node_to_output_names.end())
{
updated_outputs.push_back(output_node);
continue;
}
for (auto & output_name : alias_it->second)
{
if (output_name == output_node->result_name)
{
updated_outputs.push_back(output_node);
continue;
}
updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name));
apply_rename_step = true;
}
}
rename_actions_dag->getOutputs() = std::move(updated_outputs);
bool apply_match_step = false;
/// If column order does not match old filter step column order, match dag output nodes with header
if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header))
{
apply_match_step = true;
matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header);
}
if (apply_rename_step || apply_match_step)
{
auto rename_step = std::make_unique<ExpressionStep>(read_from_merge_tree->getOutputStream(), rename_actions_dag);
if (apply_rename_step)
rename_step->setStepDescription("Change column names to column identifiers");
auto & node = nodes.emplace_back();
node.children.emplace_back(frame.node);
node.step = std::move(rename_step);
replace_old_filter_node = &node;
}
else
{
replace_old_filter_node = frame.node;
remove_filter_node = true;
}
FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
prewhere_info->prewhere_column_name = node->result_name;
prewhere_info->prewhere_actions->getOutputs().push_back(node);
}
QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
read_from_merge_tree->updatePrewhereInfo(prewhere_info);
for (auto & filter_parent_child : filter_parent_node->children)
if (!optimize_result.fully_moved_to_prewhere)
{
if (filter_parent_child == filter_node)
{
filter_parent_child = replace_old_filter_node;
size_t stack_size = stack.size();
/** If filter step is completely replaced with PREWHERE filter actions, remove it from stack.
* Otherwise replace old filter step with new filter step after PREWHERE.
*/
if (remove_filter_node)
{
std::swap(stack[stack_size - 1], stack[stack_size - 2]);
stack.pop_back();
}
else
{
stack[stack_size - 2] = Frame{.node = replace_old_filter_node, .next_child = 1};
}
break;
}
filter_node->step = std::make_unique<FilterStep>(
read_from_merge_tree->getOutputStream(),
std::move(split_result.second),
filter_step->getFilterColumnName(),
filter_step->removesFilterColumn());
}
else
{
filter_node->step = std::make_unique<ExpressionStep>(
read_from_merge_tree->getOutputStream(),
std::move(split_result.second));
}
}

View File

@ -118,6 +118,34 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
optimizePrewhere(stack, nodes);
optimizePrimaryKeyCondition(stack);
auto & frame = stack.back();
if (frame.next_child == 0)
{
if (optimization_settings.read_in_order)
optimizeReadInOrder(*frame.node, nodes);
if (optimization_settings.distinct_in_order)
tryDistinctReadInOrder(frame.node);
}
/// Traverse all children first.
if (frame.next_child < frame.node->children.size())
{
auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
++frame.next_child;
stack.push_back(next_frame);
continue;
}
stack.pop_back();
}
stack.push_back({.node = &root});
while (!stack.empty())
{
{
/// NOTE: frame cannot be safely used after stack was modified.
auto & frame = stack.back();
@ -126,19 +154,14 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
{
has_reading_from_mt |= typeid_cast<const ReadFromMergeTree *>(frame.node->step.get()) != nullptr;
if (optimization_settings.read_in_order)
optimizeReadInOrder(*frame.node, nodes);
/// Projection optimization relies on PK optimization
if (optimization_settings.optimize_projection)
num_applied_projection
+= optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);
if (optimization_settings.aggregation_in_order)
optimizeAggregationInOrder(*frame.node, nodes);
if (optimization_settings.distinct_in_order)
tryDistinctReadInOrder(frame.node);
}
/// Traverse all children first.

View File

@ -14,19 +14,33 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
return 0;
const auto & expr = filter_step->getExpression();
const std::string & filter_column_name = filter_step->getFilterColumnName();
/// Do not split if there are function like runningDifference.
if (expr->hasStatefulFunctions())
return 0;
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
bool filter_name_clashs_with_input = false;
if (filter_step->removesFilterColumn())
{
for (const auto * input : expr->getInputs())
{
if (input->result_name == filter_column_name)
{
filter_name_clashs_with_input = true;
break;
}
}
}
auto split = expr->splitActionsForFilter(filter_column_name);
if (split.second->trivial())
return 0;
bool remove_filter = false;
if (filter_step->removesFilterColumn())
remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
remove_filter = split.second->removeUnusedResult(filter_column_name);
auto description = filter_step->getStepDescription();
@ -34,10 +48,25 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
node->children.swap(filter_node.children);
node->children.push_back(&filter_node);
std::string split_filter_name = filter_column_name;
if (filter_name_clashs_with_input)
{
split_filter_name = "__split_filter";
for (auto & filter_output : split.first->getOutputs())
{
if (filter_output->result_name == filter_column_name)
{
filter_output = &split.first->addAlias(*filter_output, split_filter_name);
break;
}
}
}
filter_node.step = std::make_unique<FilterStep>(
filter_node.children.at(0)->step->getOutputStream(),
std::move(split.first),
filter_step->getFilterColumnName(),
std::move(split_filter_name),
remove_filter);
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));

View File

@ -89,6 +89,34 @@ size_t countPartitions(const MergeTreeData::DataPartsVector & prepared_parts)
return countPartitions(prepared_parts, get_partition_id);
}
bool restoreDAGInputs(ActionsDAG & dag, const NameSet & inputs)
{
std::unordered_set<const ActionsDAG::Node *> outputs(dag.getOutputs().begin(), dag.getOutputs().end());
bool added = false;
for (const auto * input : dag.getInputs())
{
if (inputs.contains(input->result_name) && !outputs.contains(input))
{
dag.getOutputs().push_back(input);
added = true;
}
}
return added;
}
bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs)
{
bool added = false;
if (info.row_level_filter)
added = added || restoreDAGInputs(*info.row_level_filter, inputs);
if (info.prewhere_actions)
added = added || restoreDAGInputs(*info.prewhere_actions, inputs);
return added;
}
}
namespace ProfileEvents
@ -786,18 +814,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
/// To fix this, we prohibit removing any input in prewhere actions. Instead, projection actions will be added after sorting.
/// See 02354_read_in_order_prewhere.sql as an example.
bool have_input_columns_removed_after_prewhere = false;
if (prewhere_info && prewhere_info->prewhere_actions)
if (prewhere_info)
{
auto & outputs = prewhere_info->prewhere_actions->getOutputs();
std::unordered_set<const ActionsDAG::Node *> outputs_set(outputs.begin(), outputs.end());
for (const auto * input : prewhere_info->prewhere_actions->getInputs())
{
if (!outputs_set.contains(input))
{
outputs.push_back(input);
have_input_columns_removed_after_prewhere = true;
}
}
NameSet sorting_columns;
for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
sorting_columns.insert(column.name);
have_input_columns_removed_after_prewhere = restorePrewhereInputs(*prewhere_info, sorting_columns);
}
/// Let's split ranges to avoid reading much data.
@ -984,7 +1007,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
/// Thus we need to merge all partition parts into a single sorted stream.
Pipe pipe = Pipe::unitePipes(std::move(pipes));
merge_streams(pipe);
out_projection = createProjection(pipe_header);
return pipe;
}
@ -1133,6 +1155,14 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
auto sorting_expr = std::make_shared<ExpressionActions>(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone());
if (prewhere_info)
{
NameSet sorting_columns;
for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
sorting_columns.insert(column.name);
restorePrewhereInputs(*prewhere_info, sorting_columns);
}
for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
{
/// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
@ -1804,13 +1834,20 @@ Pipe ReadFromMergeTree::spreadMarkRanges(
if (!final && result.sampling.use_sampling)
{
NameSet sampling_columns;
/// Add columns needed for `sample_by_ast` to `column_names_to_read`.
/// Skip this if final was used, because such columns were already added from PK.
for (const auto & column : result.sampling.filter_expression->getRequiredColumns().getNames())
{
if (!names.contains(column))
column_names_to_read.push_back(column);
sampling_columns.insert(column);
}
if (prewhere_info)
restorePrewhereInputs(*prewhere_info, sampling_columns);
}
if (final)
@ -2004,6 +2041,24 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
});
}
/// Some extra columns could be added by sample/final/in-order/etc
/// Remove them from header if not needed.
if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header))
{
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
pipe.getHeader().getColumnsWithTypeAndName(),
getOutputStream().header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Name,
true);
auto converting_dag_expr = std::make_shared<ExpressionActions>(convert_actions_dag);
pipe.addSimpleTransform([&](const Block & header)
{
return std::make_shared<ExpressionTransform>(header, converting_dag_expr);
});
}
for (const auto & processor : pipe.getProcessors())
processors.emplace_back(processor);

View File

@ -449,8 +449,8 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere
Block MergeTreeSelectProcessor::transformHeader(
Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns)
{
injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns);
auto transformed = applyPrewhereActions(std::move(block), prewhere_info);
injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns);
return transformed;
}

View File

@ -112,7 +112,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length));
}
std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
const std::string & filter_column_name,
const ContextPtr & context,
bool is_final)
@ -132,11 +132,14 @@ std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhe
if (!optimize_result)
return {};
auto filter_actions = reconstructDAG(optimize_result->where_conditions);
auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions);
// if (optimize_result->where_conditions.empty())
// return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};
FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) };
return result;
std::unordered_set<const ActionsDAG::Node *> prewhere_conditions;
for (const auto & condition : optimize_result->prewhere_conditions)
prewhere_conditions.insert(condition.node.getDAGNode());
return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()};
}
static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column)
@ -343,20 +346,6 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions)
return function;
}
ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions)
{
if (conditions.empty())
return {};
ActionsDAG::NodeRawConstPtrs filter_nodes;
filter_nodes.reserve(conditions.size());
for (const auto & condition : conditions)
filter_nodes.push_back(condition.node.getDAGNode());
return ActionsDAG::buildFilterActionsDAG(filter_nodes);
}
std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node,
const WhereOptimizerContext & where_optimizer_context) const
{

View File

@ -47,11 +47,11 @@ public:
struct FilterActionsOptimizeResult
{
ActionsDAGPtr filter_actions;
ActionsDAGPtr prewhere_filter_actions;
std::unordered_set<const ActionsDAG::Node *> prewhere_nodes;
bool fully_moved_to_prewhere = false;
};
std::optional<FilterActionsOptimizeResult> optimize(const ActionsDAGPtr & filter_dag,
FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag,
const std::string & filter_column_name,
const ContextPtr & context,
bool is_final);
@ -122,9 +122,6 @@ private:
/// Reconstruct AST from conditions
static ASTPtr reconstructAST(const Conditions & conditions);
/// Reconstruct DAG from conditions
static ActionsDAGPtr reconstructDAG(const Conditions & conditions);
void optimizeArbitrary(ASTSelectQuery & select) const;
UInt64 getColumnsSize(const NameSet & columns) const;

View File

@ -28,7 +28,7 @@ Expression ((Projection + Before ORDER BY))
Expression ((Project names + Projection))
Filter ((WHERE + DROP unused columns after JOIN))
Join (JOIN FillRightFirst)
Expression (Change column names to column identifiers)
Expression
ReadFromMergeTree (default.t1)
Indexes:
PrimaryKey

View File

@ -3,21 +3,18 @@
MinMax
Keys:
y
Condition: (y in [1, +Inf))
Parts: 4/5
Granules: 11/12
Partition
Keys:
y
bitAnd(z, 3)
Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
Parts: 3/4
Granules: 10/11
PrimaryKey
Keys:
x
y
Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
Parts: 2/3
Granules: 6/10
Skip
@ -37,7 +34,6 @@
{
"Type": "MinMax",
"Keys": ["y"],
"Condition": "(y in [1, +Inf))",
"Initial Parts": 5,
"Selected Parts": 4,
"Initial Granules": 12,
@ -46,7 +42,6 @@
{
"Type": "Partition",
"Keys": ["y", "bitAnd(z, 3)"],
"Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
"Initial Parts": 4,
"Selected Parts": 3,
"Initial Granules": 11,
@ -55,7 +50,6 @@
{
"Type": "PrimaryKey",
"Keys": ["x", "y"],
"Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
"Initial Parts": 3,
"Selected Parts": 2,
"Initial Granules": 10,
@ -109,21 +103,18 @@
MinMax
Keys:
y
Condition: (y in [1, +Inf))
Parts: 4/5
Granules: 11/12
Partition
Keys:
y
bitAnd(z, 3)
Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
Parts: 3/4
Granules: 10/11
PrimaryKey
Keys:
x
y
Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
Parts: 2/3
Granules: 6/10
Skip
@ -138,7 +129,6 @@
{
"Type": "MinMax",
"Keys": ["y"],
"Condition": "(y in [1, +Inf))",
"Initial Parts": 5,
"Selected Parts": 4,
"Initial Granules": 12,
@ -147,7 +137,6 @@
{
"Type": "Partition",
"Keys": ["y", "bitAnd(z, 3)"],
"Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
"Initial Parts": 4,
"Selected Parts": 3,
"Initial Granules": 11,
@ -156,7 +145,6 @@
{
"Type": "PrimaryKey",
"Keys": ["x", "y"],
"Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
"Initial Parts": 3,
"Selected Parts": 2,
"Initial Granules": 10,

View File

@ -17,13 +17,13 @@ do
$CH_CLIENT -q "
explain indexes = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14;
" | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
" | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"
echo "-----------------"
$CH_CLIENT -q "
explain indexes = 1, json = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14 format TSVRaw;
" | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
" | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"
echo "-----------------"

View File

@ -76,8 +76,7 @@ ExpressionTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
ExpressionTransform
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
2020-10-11 0 0
2020-10-11 0 10
2020-10-11 0 20
@ -106,8 +105,7 @@ ExpressionTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
ExpressionTransform
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
2020-10-12 0
2020-10-12 1
2020-10-12 2
@ -140,9 +138,8 @@ ExpressionTransform
(Expression)
ExpressionTransform
(ReadFromMergeTree)
ExpressionTransform
ReverseTransform
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1
ReverseTransform
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1
2020-10-12 99999
2020-10-12 99998
2020-10-12 99997

View File

@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000
SELECT 'PREWHERE';
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000;
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 }
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 }
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 }
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 }
SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;
SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;

View File

@ -1,3 +1,5 @@
-- { echoOn }
explain pipeline select a from t1 group by a;
(Expression)
ExpressionTransform × 16
(Aggregating)
@ -15,6 +17,8 @@ ExpressionTransform × 16
Resize 3 → 1
MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 3 0 → 1
1000000
-- { echoOn }
explain pipeline select a from t2 group by a;
(Expression)
ExpressionTransform × 16
(Aggregating)
@ -40,6 +44,8 @@ ExpressionTransform × 16
Resize 2 → 1
MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 2 0 → 1
1000000
-- { echoOn }
explain pipeline select a from t3 group by a;
(Expression)
ExpressionTransform × 16
(Aggregating)
@ -82,6 +88,8 @@ ExpressionTransform × 16
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
1000000
1000000
-- { echoOn }
explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
(Expression)
ExpressionTransform × 16
(Aggregating)
@ -91,20 +99,21 @@ ExpressionTransform × 16
(Expression)
ExpressionTransform × 4
(ReadFromMergeTree)
ExpressionTransform × 4
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
1000000
-- { echoOn }
explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
(Expression)
ExpressionTransform × 16
(Aggregating)
@ -114,41 +123,6 @@ ExpressionTransform × 16
(Expression)
ExpressionTransform × 8
(ReadFromMergeTree)
ExpressionTransform × 8
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
1000000
(Expression)
ExpressionTransform × 16
(Aggregating)
FinalizeAggregatedTransform × 16
AggregatingInOrderTransform × 16
(Expression)
ExpressionTransform × 16
(ReadFromMergeTree)
ExpressionTransform × 16
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
@ -173,30 +147,65 @@ ExpressionTransform × 16
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
1000000
-- { echoOn }
explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
(Expression)
ExpressionTransform × 16
(Aggregating)
FinalizeAggregatedTransform × 16
AggregatingInOrderTransform × 16
(Expression)
ExpressionTransform × 16
(ReadFromMergeTree)
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
MergingSortedTransform 2 → 1
ExpressionTransform × 2
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
1000000
Skip merging: 1
Skip merging: 1

View File

@ -15,7 +15,9 @@ system stop merges t1;
insert into t1 select number from numbers_mt(1e6);
insert into t1 select number from numbers_mt(1e6);
-- { echoOn }
explain pipeline select a from t1 group by a;
-- { echoOff }
select count() from (select throwIf(count() != 2) from t1 group by a);
@ -28,7 +30,9 @@ system stop merges t2;
insert into t2 select number from numbers_mt(1e6);
insert into t2 select number from numbers_mt(1e6);
-- { echoOn }
explain pipeline select a from t2 group by a;
-- { echoOff }
select count() from (select throwIf(count() != 2) from t2 group by a);
@ -41,7 +45,9 @@ system stop merges t3;
insert into t3 select number from numbers_mt(1e6);
insert into t3 select number from numbers_mt(1e6);
-- { echoOn }
explain pipeline select a from t3 group by a;
-- { echoOff }
select count() from (select throwIf(count() != 2) from t3 group by a);
@ -63,7 +69,9 @@ system stop merges t4;
insert into t4 select number from numbers_mt(1e6);
insert into t4 select number from numbers_mt(1e6);
-- { echoOn }
explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
-- { echoOff }
select count() from (select throwIf(count() != 2) from t4 group by a);
@ -76,7 +84,9 @@ system stop merges t5;
insert into t5 select number from numbers_mt(1e6);
insert into t5 select number from numbers_mt(1e6);
-- { echoOn }
explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
-- { echoOff }
select count() from (select throwIf(count() != 2) from t5 group by a);
@ -89,7 +99,9 @@ system stop merges t6;
insert into t6 select number from numbers_mt(1e6);
insert into t6 select number from numbers_mt(1e6);
-- { echoOn }
explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
-- { echoOff }
select count() from (select throwIf(count() != 2) from t6 group by a);

View File

@ -38,3 +38,40 @@
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0
ReadFromMergeTree (default.data_02771)
Indexes:
PrimaryKey
Condition: true
Parts: 1/1
Granules: 1/1
Skip
Name: x_idx
Description: minmax GRANULARITY 1
Parts: 0/1
Granules: 0/1
Skip
Name: y_idx
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0
Skip
Name: xy_idx
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0
ReadFromMergeTree (default.data_02771)
Indexes:
PrimaryKey
Condition: true
Parts: 1/1
Granules: 1/1
Skip
Name: x_idx
Description: minmax GRANULARITY 1
Parts: 0/1
Granules: 0/1
Skip
Name: y_idx
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0

View File

@ -1,5 +1,3 @@
SET allow_experimental_analyzer = 0;
DROP TABLE IF EXISTS data_02771;
@ -24,6 +22,14 @@ SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';
SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
SET allow_experimental_analyzer = 0;
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
SET allow_experimental_analyzer = 1;
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';

View File

@ -1,6 +1,6 @@
CreatingSets (Create sets before main query execution)
Expression ((Projection + Before ORDER BY))
ReadFromMergeTree (default.test_table)
CreatingSets
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
@ -9,9 +9,9 @@ CreatingSets (Create sets before main query execution)
Condition: and((id in (-Inf, 10]), (value in 1-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Projection + Before ORDER BY))
ReadFromMergeTree (default.test_table)
CreatingSets
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
@ -20,9 +20,9 @@ CreatingSets (Create sets before main query execution)
Condition: and((id in (-Inf, 10]), (value in 1-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Projection + Before ORDER BY))
ReadFromMergeTree (default.test_table)
CreatingSets
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
@ -31,53 +31,9 @@ CreatingSets (Create sets before main query execution)
Condition: and((id in (-Inf, 10]), (value in 5-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Projection + Before ORDER BY))
ReadFromMergeTree (default.test_table)
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((id in (-Inf, 10]), (value in 5-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Project names + Projection))
ReadFromMergeTree (default.test_table)
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((id in (-Inf, 10]), (value in 1-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Project names + Projection))
ReadFromMergeTree (default.test_table)
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((id in (-Inf, 10]), (value in 1-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Project names + Projection))
ReadFromMergeTree (default.test_table)
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((id in (-Inf, 10]), (value in 5-element set))
Parts: 1/1
Granules: 1/1
CreatingSets (Create sets before main query execution)
Expression ((Project names + Projection))
ReadFromMergeTree (default.test_table)
CreatingSets
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
@ -86,3 +42,51 @@ CreatingSets (Create sets before main query execution)
Condition: and((id in (-Inf, 10]), (value in 5-element set))
Parts: 1/1
Granules: 1/1
CreatingSets
Expression
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((value in 1-element set), (id in (-Inf, 10]))
Parts: 1/1
Granules: 1/1
CreatingSets
Expression
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((value in 1-element set), (id in (-Inf, 10]))
Parts: 1/1
Granules: 1/1
CreatingSets
Expression
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((value in 5-element set), (id in (-Inf, 10]))
Parts: 1/1
Granules: 1/1
CreatingSets
Expression
Expression
ReadFromMergeTree
Indexes:
PrimaryKey
Keys:
id
value
Condition: and((value in 5-element set), (id in (-Inf, 10]))
Parts: 1/1
Granules: 1/1

View File

@ -7,18 +7,18 @@ CREATE TABLE test_table
INSERT INTO test_table SELECT number, number FROM numbers(10);
SET allow_experimental_analyzer = 0;
set allow_experimental_analyzer = 0;
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
SET allow_experimental_analyzer = 1;
set allow_experimental_analyzer = 1;
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
DROP TABLE test_table;