mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-30 11:32:03 +00:00
Merge pull request #58554 from ClickHouse/try-to-always-push-down-prewhere-from-query-plan
Simplify optimize-push-to-prewhere from query plan
This commit is contained in:
commit
09d3b3c0d0
@ -1645,7 +1645,7 @@ void ActionsDAG::mergeNodes(ActionsDAG && second)
|
||||
}
|
||||
}
|
||||
|
||||
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes) const
|
||||
ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping) const
|
||||
{
|
||||
/// Split DAG into two parts.
|
||||
/// (first_nodes, first_outputs) is a part which will have split_list in result.
|
||||
@ -1779,13 +1779,13 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
|
||||
}
|
||||
|
||||
/// Input from second DAG should also be in the first.
|
||||
if (copy.type == ActionType::INPUT)
|
||||
{
|
||||
auto & input_copy = first_nodes.emplace_back(*cur.node);
|
||||
assert(cur_data.to_first == nullptr);
|
||||
cur_data.to_first = &input_copy;
|
||||
new_inputs.push_back(cur.node);
|
||||
}
|
||||
// if (copy.type == ActionType::INPUT)
|
||||
// {
|
||||
// auto & input_copy = first_nodes.emplace_back(*cur.node);
|
||||
// assert(cur_data.to_first == nullptr);
|
||||
// cur_data.to_first = &input_copy;
|
||||
// new_inputs.push_back(cur.node);
|
||||
// }
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1804,10 +1804,11 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
|
||||
/// If this node is needed in result, add it as input.
|
||||
Node input_node;
|
||||
input_node.type = ActionType::INPUT;
|
||||
input_node.result_type = node.result_type;
|
||||
input_node.result_name = node.result_name;
|
||||
input_node.result_type = cur.node->result_type;
|
||||
input_node.result_name = cur.node->result_name;
|
||||
cur_data.to_second = &second_nodes.emplace_back(std::move(input_node));
|
||||
|
||||
if (cur.node->type != ActionType::INPUT)
|
||||
new_inputs.push_back(cur.node);
|
||||
}
|
||||
}
|
||||
@ -1824,16 +1825,31 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
|
||||
for (const auto * input_node : inputs)
|
||||
{
|
||||
const auto & cur = data[input_node];
|
||||
if (cur.to_first)
|
||||
{
|
||||
first_inputs.push_back(cur.to_first);
|
||||
|
||||
if (cur.to_second)
|
||||
first_outputs.push_back(cur.to_first);
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto * input : new_inputs)
|
||||
{
|
||||
const auto & cur = data[input];
|
||||
if (cur.to_second)
|
||||
second_inputs.push_back(cur.to_second);
|
||||
if (cur.to_first)
|
||||
first_outputs.push_back(cur.to_first);
|
||||
}
|
||||
|
||||
for (const auto * input_node : inputs)
|
||||
{
|
||||
const auto & cur = data[input_node];
|
||||
if (cur.to_second)
|
||||
second_inputs.push_back(cur.to_second);
|
||||
}
|
||||
|
||||
auto first_actions = std::make_shared<ActionsDAG>();
|
||||
first_actions->nodes.swap(first_nodes);
|
||||
first_actions->outputs.swap(first_outputs);
|
||||
@ -1844,7 +1860,14 @@ ActionsDAG::SplitResult ActionsDAG::split(std::unordered_set<const Node *> split
|
||||
second_actions->outputs.swap(second_outputs);
|
||||
second_actions->inputs.swap(second_inputs);
|
||||
|
||||
return {std::move(first_actions), std::move(second_actions)};
|
||||
std::unordered_map<const Node *, const Node *> split_nodes_mapping;
|
||||
if (create_split_nodes_mapping)
|
||||
{
|
||||
for (const auto * node : split_nodes)
|
||||
split_nodes_mapping[node] = data[node].to_first;
|
||||
}
|
||||
|
||||
return {std::move(first_actions), std::move(second_actions), std::move(split_nodes_mapping)};
|
||||
}
|
||||
|
||||
ActionsDAG::SplitResult ActionsDAG::splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const
|
||||
|
@ -327,13 +327,18 @@ public:
|
||||
/// Merge current nodes with specified dag nodes
|
||||
void mergeNodes(ActionsDAG && second);
|
||||
|
||||
using SplitResult = std::pair<ActionsDAGPtr, ActionsDAGPtr>;
|
||||
struct SplitResult
|
||||
{
|
||||
ActionsDAGPtr first;
|
||||
ActionsDAGPtr second;
|
||||
std::unordered_map<const Node *, const Node *> split_nodes_mapping;
|
||||
};
|
||||
|
||||
/// Split ActionsDAG into two DAGs, where first part contains all nodes from split_nodes and their children.
|
||||
/// Execution of first then second parts on block is equivalent to execution of initial DAG.
|
||||
/// First DAG and initial DAG have equal inputs, second DAG and initial DAG has equal outputs.
|
||||
/// Second DAG inputs may contain less inputs then first DAG (but also include other columns).
|
||||
SplitResult split(std::unordered_set<const Node *> split_nodes) const;
|
||||
SplitResult split(std::unordered_set<const Node *> split_nodes, bool create_split_nodes_mapping = false) const;
|
||||
|
||||
/// Splits actions into two parts. Returned first half may be swapped with ARRAY JOIN.
|
||||
SplitResult splitActionsBeforeArrayJoin(const NameSet & array_joined_columns) const;
|
||||
|
@ -66,7 +66,7 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
|
||||
NameSet sort_columns;
|
||||
for (const auto & col : sorting_step->getSortDescription())
|
||||
sort_columns.insert(col.column_name);
|
||||
auto [needed_for_sorting, unneeded_for_sorting] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
|
||||
auto [needed_for_sorting, unneeded_for_sorting, _] = expression_step->getExpression()->splitActionsBySortingDescription(sort_columns);
|
||||
|
||||
// No calculations can be postponed.
|
||||
if (unneeded_for_sorting->trivial())
|
||||
|
@ -5,68 +5,35 @@
|
||||
#include <Storages/MergeTree/MergeTreeWhereOptimizer.h>
|
||||
#include <Interpreters/ActionsDAG.h>
|
||||
#include <Planner/ActionsChain.h>
|
||||
#include <deque>
|
||||
#include "Functions/FunctionsLogical.h"
|
||||
#include "Functions/IFunctionAdaptors.h"
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
void matchDAGOutputNodesOrderWithHeader(ActionsDAGPtr & actions_dag, const Block & expected_header)
|
||||
{
|
||||
std::unordered_map<std::string, const ActionsDAG::Node *> output_name_to_node;
|
||||
for (const auto * output_node : actions_dag->getOutputs())
|
||||
output_name_to_node.emplace(output_node->result_name, output_node);
|
||||
|
||||
std::unordered_set<const ActionsDAG::Node *> used_output_nodes;
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs updated_outputs;
|
||||
updated_outputs.reserve(expected_header.columns());
|
||||
|
||||
for (const auto & column : expected_header)
|
||||
{
|
||||
auto output_node_it = output_name_to_node.find(column.name);
|
||||
if (output_node_it == output_name_to_node.end())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR,
|
||||
"Invalid move to PREWHERE optimization. Cannot find column {} in output",
|
||||
column.name);
|
||||
|
||||
updated_outputs.push_back(output_node_it->second);
|
||||
used_output_nodes.insert(output_node_it->second);
|
||||
}
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs unused_outputs;
|
||||
for (const auto * output_node : actions_dag->getOutputs())
|
||||
{
|
||||
if (used_output_nodes.contains(output_node))
|
||||
continue;
|
||||
|
||||
unused_outputs.push_back(output_node);
|
||||
}
|
||||
|
||||
auto & actions_dag_outputs = actions_dag->getOutputs();
|
||||
actions_dag_outputs = std::move(updated_outputs);
|
||||
actions_dag_outputs.insert(actions_dag_outputs.end(), unused_outputs.begin(), unused_outputs.end());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
namespace QueryPlanOptimizations
|
||||
{
|
||||
|
||||
void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
static void removeFromOutput(ActionsDAG & dag, const std::string name)
|
||||
{
|
||||
const auto * node = &dag.findInOutputs(name);
|
||||
auto & outputs = dag.getOutputs();
|
||||
for (size_t i = 0; i < outputs.size(); ++i)
|
||||
{
|
||||
if (node == outputs[i])
|
||||
{
|
||||
outputs.erase(outputs.begin() + i);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void optimizePrewhere(Stack & stack, QueryPlan::Nodes &)
|
||||
{
|
||||
if (stack.size() < 3)
|
||||
return;
|
||||
|
||||
const auto & frame = stack.back();
|
||||
auto & frame = stack.back();
|
||||
|
||||
/** Assume that on stack there are at least 3 nodes:
|
||||
*
|
||||
@ -82,60 +49,26 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
if (storage_prewhere_info && storage_prewhere_info->prewhere_actions)
|
||||
return;
|
||||
|
||||
const QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
|
||||
QueryPlan::Node * filter_node = (stack.rbegin() + 1)->node;
|
||||
const auto * filter_step = typeid_cast<FilterStep *>(filter_node->step.get());
|
||||
if (!filter_step)
|
||||
return;
|
||||
|
||||
/** Collect required filter output columns.
|
||||
* Collect output nodes that are mapped to input nodes.
|
||||
* Collect input node to output nodes mapping.
|
||||
*/
|
||||
ColumnsWithTypeAndName required_columns_after_filter;
|
||||
std::unordered_set<std::string> output_nodes_mapped_to_input;
|
||||
std::unordered_map<std::string, std::vector<std::string>> input_node_to_output_names;
|
||||
|
||||
for (const auto * output_node : filter_step->getExpression()->getOutputs())
|
||||
{
|
||||
const auto * node_without_alias = output_node;
|
||||
while (node_without_alias->type == ActionsDAG::ActionType::ALIAS)
|
||||
node_without_alias = node_without_alias->children[0];
|
||||
|
||||
if (node_without_alias->type == ActionsDAG::ActionType::INPUT)
|
||||
{
|
||||
output_nodes_mapped_to_input.emplace(output_node->result_name);
|
||||
|
||||
auto output_names_it = input_node_to_output_names.find(node_without_alias->result_name);
|
||||
if (output_names_it == input_node_to_output_names.end())
|
||||
{
|
||||
auto [insert_it, _] = input_node_to_output_names.emplace(node_without_alias->result_name, std::vector<std::string>());
|
||||
output_names_it = insert_it;
|
||||
}
|
||||
|
||||
output_names_it->second.push_back(output_node->result_name);
|
||||
}
|
||||
|
||||
if (output_node->result_name == filter_step->getFilterColumnName() && filter_step->removesFilterColumn())
|
||||
continue;
|
||||
|
||||
required_columns_after_filter.push_back(ColumnWithTypeAndName(output_node->result_type, output_node->result_name));
|
||||
}
|
||||
|
||||
const auto & context = read_from_merge_tree->getContext();
|
||||
const auto & settings = context->getSettingsRef();
|
||||
|
||||
if (!settings.allow_experimental_analyzer)
|
||||
return;
|
||||
|
||||
const auto & table_expression_modifiers = read_from_merge_tree->getQueryInfo().table_expression_modifiers;
|
||||
bool is_final = table_expression_modifiers && table_expression_modifiers->hasFinal();
|
||||
bool is_final = read_from_merge_tree->isQueryWithFinal();
|
||||
bool optimize_move_to_prewhere = settings.optimize_move_to_prewhere && (!is_final || settings.optimize_move_to_prewhere_if_final);
|
||||
if (!optimize_move_to_prewhere)
|
||||
return;
|
||||
|
||||
const auto & storage_snapshot = read_from_merge_tree->getStorageSnapshot();
|
||||
|
||||
if (table_expression_modifiers && table_expression_modifiers->hasSampleSizeRatio())
|
||||
ColumnsWithTypeAndName required_columns_after_filter;
|
||||
if (read_from_merge_tree->isQueryWithSampling())
|
||||
{
|
||||
const auto & sampling_key = storage_snapshot->getMetadataForQuery()->getSamplingKey();
|
||||
const auto & sampling_source_columns = sampling_key.expression->getRequiredColumnsWithTypes();
|
||||
@ -170,7 +103,8 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
filter_step->getFilterColumnName(),
|
||||
read_from_merge_tree->getContext(),
|
||||
is_final);
|
||||
if (!optimize_result.has_value())
|
||||
|
||||
if (optimize_result.prewhere_nodes.empty())
|
||||
return;
|
||||
|
||||
PrewhereInfoPtr prewhere_info;
|
||||
@ -181,201 +115,85 @@ void optimizePrewhere(Stack & stack, QueryPlan::Nodes & nodes)
|
||||
|
||||
prewhere_info->need_filter = true;
|
||||
|
||||
auto & prewhere_filter_actions = optimize_result->prewhere_filter_actions;
|
||||
auto filter_expression = filter_step->getExpression();
|
||||
const auto & filter_column_name = filter_step->getFilterColumnName();
|
||||
|
||||
ActionsChain actions_chain;
|
||||
|
||||
std::string prewere_filter_node_name = prewhere_filter_actions->getOutputs().at(0)->result_name;
|
||||
actions_chain.addStep(std::make_unique<ActionsChainStep>(prewhere_filter_actions));
|
||||
|
||||
auto & filter_actions = optimize_result->filter_actions;
|
||||
|
||||
/** Merge tree where optimizer splits conjunctions in filter expression into 2 parts:
|
||||
* 1. Filter expressions.
|
||||
* 2. Prewhere filter expressions.
|
||||
*
|
||||
* There can be cases when all expressions are moved to PREWHERE, but it is not
|
||||
* enough to produce required filter output columns.
|
||||
*
|
||||
* Example: SELECT (a AND b) AS cond FROM test_table WHERE cond AND c;
|
||||
* In this example condition expressions `a`, `b`, `c` can move to PREWHERE, but PREWHERE will not contain expression `and(a, b)`.
|
||||
* It will contain only `a`, `b`, `c`, `and(a, b, c)` expressions.
|
||||
*
|
||||
* In such scenario we need to create additional step to calculate `and(a, b)` expression after PREWHERE.
|
||||
*/
|
||||
bool need_additional_filter_after_prewhere = false;
|
||||
|
||||
if (!filter_actions)
|
||||
if (optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn())
|
||||
{
|
||||
/// Any node from PREWHERE filter actions can be used as possible output node
|
||||
std::unordered_set<std::string> possible_prewhere_output_nodes;
|
||||
for (const auto & node : prewhere_filter_actions->getNodes())
|
||||
possible_prewhere_output_nodes.insert(node.result_name);
|
||||
removeFromOutput(*filter_expression, filter_column_name);
|
||||
auto & outputs = filter_expression->getOutputs();
|
||||
size_t size = outputs.size();
|
||||
outputs.insert(outputs.end(), optimize_result.prewhere_nodes.begin(), optimize_result.prewhere_nodes.end());
|
||||
filter_expression->removeUnusedActions(false);
|
||||
outputs.resize(size);
|
||||
}
|
||||
|
||||
for (auto & required_column : required_columns_after_filter)
|
||||
auto split_result = filter_step->getExpression()->split(optimize_result.prewhere_nodes, true);
|
||||
|
||||
/// This is the leak of abstraction.
|
||||
/// Splited actions may have inputs which are needed only for PREWHERE.
|
||||
/// This is fine for ActionsDAG to have such a split, but it breaks defaults calculation.
|
||||
///
|
||||
/// See 00950_default_prewhere for example.
|
||||
/// Table has structure `APIKey UInt8, SessionType UInt8` and default `OperatingSystem = SessionType+1`
|
||||
/// For a query with `SELECT OperatingSystem WHERE APIKey = 42 AND SessionType = 42` we push everything to PREWHERE
|
||||
/// and columns APIKey, SessionType are removed from inputs (cause only OperatingSystem is needed).
|
||||
/// However, column OperatingSystem is calculated after PREWHERE stage, based on SessionType value.
|
||||
/// If column SessionType is removed by PREWHERE actions, we use zero as default, and get a wrong result.
|
||||
///
|
||||
/// So, here we restore removed inputs for PREWHERE actions
|
||||
{
|
||||
if (!possible_prewhere_output_nodes.contains(required_column.name) &&
|
||||
!output_nodes_mapped_to_input.contains(required_column.name))
|
||||
std::unordered_set<const ActionsDAG::Node *> first_outputs(split_result.first->getOutputs().begin(), split_result.first->getOutputs().end());
|
||||
for (const auto * input : split_result.first->getInputs())
|
||||
{
|
||||
need_additional_filter_after_prewhere = true;
|
||||
break;
|
||||
if (!first_outputs.contains(input))
|
||||
{
|
||||
split_result.first->getOutputs().push_back(input);
|
||||
/// Add column to second actions as input.
|
||||
/// Do not add it to result, so it would be removed.
|
||||
split_result.second->addInput(input->result_name, input->result_type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** If there are additional filter actions after PREWHERE filter actions, we create filter actions dag using PREWHERE filter
|
||||
* actions output columns as filter actions dag input columns.
|
||||
* Then we merge this filter actions dag nodes with old filter step actions dag nodes, to reuse some expressions from
|
||||
* PREWHERE filter actions.
|
||||
*/
|
||||
if (need_additional_filter_after_prewhere || filter_actions)
|
||||
ActionsDAG::NodeRawConstPtrs conditions;
|
||||
conditions.reserve(split_result.split_nodes_mapping.size());
|
||||
for (const auto * condition : optimize_result.prewhere_nodes)
|
||||
conditions.push_back(split_result.split_nodes_mapping.at(condition));
|
||||
|
||||
prewhere_info->prewhere_actions = std::move(split_result.first);
|
||||
prewhere_info->remove_prewhere_column = optimize_result.fully_moved_to_prewhere && filter_step->removesFilterColumn();
|
||||
|
||||
if (conditions.size() == 1)
|
||||
{
|
||||
auto merged_filter_actions = std::make_shared<ActionsDAG>(actions_chain.getLastStepAvailableOutputColumns());
|
||||
merged_filter_actions->getOutputs().clear();
|
||||
merged_filter_actions->mergeNodes(std::move(*filter_step->getExpression()->clone()));
|
||||
|
||||
/// Add old filter step filter column to outputs
|
||||
for (const auto & node : merged_filter_actions->getNodes())
|
||||
prewhere_info->prewhere_column_name = conditions.front()->result_name;
|
||||
prewhere_info->prewhere_actions->getOutputs().push_back(conditions.front());
|
||||
}
|
||||
else
|
||||
{
|
||||
if (node.result_name == filter_step->getFilterColumnName())
|
||||
{
|
||||
merged_filter_actions->getOutputs().push_back(&node);
|
||||
break;
|
||||
prewhere_info->remove_prewhere_column = true;
|
||||
|
||||
FunctionOverloadResolverPtr func_builder_and = std::make_unique<FunctionToOverloadResolverAdaptor>(std::make_shared<FunctionAnd>());
|
||||
const auto * node = &prewhere_info->prewhere_actions->addFunction(func_builder_and, std::move(conditions), {});
|
||||
prewhere_info->prewhere_column_name = node->result_name;
|
||||
prewhere_info->prewhere_actions->getOutputs().push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
filter_actions = std::move(merged_filter_actions);
|
||||
|
||||
/// If there is filter after PREWHERE, we can ignore filtering during PREWHERE stage
|
||||
prewhere_info->need_filter = false;
|
||||
|
||||
actions_chain.addStep(std::make_unique<ActionsChainStep>(filter_actions));
|
||||
}
|
||||
|
||||
auto required_output_actions = std::make_shared<ActionsDAG>(required_columns_after_filter);
|
||||
actions_chain.addStep(std::make_unique<ActionsChainStep>(required_output_actions));
|
||||
|
||||
actions_chain.finalize();
|
||||
|
||||
prewhere_filter_actions->projectInput(false);
|
||||
|
||||
auto & prewhere_actions_chain_node = actions_chain[0];
|
||||
prewhere_info->prewhere_actions = std::move(prewhere_filter_actions);
|
||||
prewhere_info->prewhere_column_name = prewere_filter_node_name;
|
||||
prewhere_info->remove_prewhere_column = !prewhere_actions_chain_node->getChildRequiredOutputColumnsNames().contains(prewere_filter_node_name);
|
||||
|
||||
read_from_merge_tree->updatePrewhereInfo(prewhere_info);
|
||||
|
||||
QueryPlan::Node * replace_old_filter_node = nullptr;
|
||||
bool remove_filter_node = false;
|
||||
|
||||
if (filter_actions)
|
||||
if (!optimize_result.fully_moved_to_prewhere)
|
||||
{
|
||||
filter_actions->projectInput(false);
|
||||
|
||||
/// Match dag output nodes with old filter step header
|
||||
matchDAGOutputNodesOrderWithHeader(filter_actions, filter_step->getOutputStream().header);
|
||||
|
||||
auto & filter_actions_chain_node = actions_chain[1];
|
||||
bool remove_filter_column = !filter_actions_chain_node->getChildRequiredOutputColumnsNames().contains(filter_step->getFilterColumnName());
|
||||
auto after_prewhere_filter_step = std::make_unique<FilterStep>(read_from_merge_tree->getOutputStream(),
|
||||
filter_actions,
|
||||
filter_node->step = std::make_unique<FilterStep>(
|
||||
read_from_merge_tree->getOutputStream(),
|
||||
std::move(split_result.second),
|
||||
filter_step->getFilterColumnName(),
|
||||
remove_filter_column);
|
||||
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.emplace_back(frame.node);
|
||||
node.step = std::move(after_prewhere_filter_step);
|
||||
|
||||
replace_old_filter_node = &node;
|
||||
filter_step->removesFilterColumn());
|
||||
}
|
||||
else
|
||||
{
|
||||
auto rename_actions_dag = std::make_shared<ActionsDAG>(read_from_merge_tree->getOutputStream().header.getColumnsWithTypeAndName());
|
||||
bool apply_rename_step = false;
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs updated_outputs;
|
||||
|
||||
/** If in output after read from merge tree there are column names without aliases,
|
||||
* apply old filter step aliases to them.
|
||||
*/
|
||||
for (const auto * output_node : rename_actions_dag->getOutputs())
|
||||
{
|
||||
const auto alias_it = input_node_to_output_names.find(output_node->result_name);
|
||||
if (alias_it == input_node_to_output_names.end())
|
||||
{
|
||||
updated_outputs.push_back(output_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
for (auto & output_name : alias_it->second)
|
||||
{
|
||||
if (output_name == output_node->result_name)
|
||||
{
|
||||
updated_outputs.push_back(output_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
updated_outputs.push_back(&rename_actions_dag->addAlias(*output_node, output_name));
|
||||
apply_rename_step = true;
|
||||
}
|
||||
}
|
||||
|
||||
rename_actions_dag->getOutputs() = std::move(updated_outputs);
|
||||
|
||||
bool apply_match_step = false;
|
||||
|
||||
/// If column order does not match old filter step column order, match dag output nodes with header
|
||||
if (!blocksHaveEqualStructure(read_from_merge_tree->getOutputStream().header, filter_step->getOutputStream().header))
|
||||
{
|
||||
apply_match_step = true;
|
||||
matchDAGOutputNodesOrderWithHeader(rename_actions_dag, filter_step->getOutputStream().header);
|
||||
}
|
||||
|
||||
if (apply_rename_step || apply_match_step)
|
||||
{
|
||||
auto rename_step = std::make_unique<ExpressionStep>(read_from_merge_tree->getOutputStream(), rename_actions_dag);
|
||||
if (apply_rename_step)
|
||||
rename_step->setStepDescription("Change column names to column identifiers");
|
||||
|
||||
auto & node = nodes.emplace_back();
|
||||
node.children.emplace_back(frame.node);
|
||||
node.step = std::move(rename_step);
|
||||
|
||||
replace_old_filter_node = &node;
|
||||
}
|
||||
else
|
||||
{
|
||||
replace_old_filter_node = frame.node;
|
||||
remove_filter_node = true;
|
||||
}
|
||||
}
|
||||
|
||||
QueryPlan::Node * filter_parent_node = (stack.rbegin() + 2)->node;
|
||||
|
||||
for (auto & filter_parent_child : filter_parent_node->children)
|
||||
{
|
||||
if (filter_parent_child == filter_node)
|
||||
{
|
||||
filter_parent_child = replace_old_filter_node;
|
||||
|
||||
size_t stack_size = stack.size();
|
||||
|
||||
/** If filter step is completely replaced with PREWHERE filter actions, remove it from stack.
|
||||
* Otherwise replace old filter step with new filter step after PREWHERE.
|
||||
*/
|
||||
if (remove_filter_node)
|
||||
{
|
||||
std::swap(stack[stack_size - 1], stack[stack_size - 2]);
|
||||
stack.pop_back();
|
||||
}
|
||||
else
|
||||
{
|
||||
stack[stack_size - 2] = Frame{.node = replace_old_filter_node, .next_child = 1};
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
filter_node->step = std::make_unique<ExpressionStep>(
|
||||
read_from_merge_tree->getOutputStream(),
|
||||
std::move(split_result.second));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -118,6 +118,34 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
|
||||
optimizePrewhere(stack, nodes);
|
||||
optimizePrimaryKeyCondition(stack);
|
||||
|
||||
auto & frame = stack.back();
|
||||
|
||||
if (frame.next_child == 0)
|
||||
{
|
||||
|
||||
if (optimization_settings.read_in_order)
|
||||
optimizeReadInOrder(*frame.node, nodes);
|
||||
|
||||
if (optimization_settings.distinct_in_order)
|
||||
tryDistinctReadInOrder(frame.node);
|
||||
}
|
||||
|
||||
/// Traverse all children first.
|
||||
if (frame.next_child < frame.node->children.size())
|
||||
{
|
||||
auto next_frame = Frame{.node = frame.node->children[frame.next_child]};
|
||||
++frame.next_child;
|
||||
stack.push_back(next_frame);
|
||||
continue;
|
||||
}
|
||||
|
||||
stack.pop_back();
|
||||
}
|
||||
|
||||
stack.push_back({.node = &root});
|
||||
|
||||
while (!stack.empty())
|
||||
{
|
||||
{
|
||||
/// NOTE: frame cannot be safely used after stack was modified.
|
||||
auto & frame = stack.back();
|
||||
@ -126,19 +154,14 @@ void optimizeTreeSecondPass(const QueryPlanOptimizationSettings & optimization_s
|
||||
{
|
||||
has_reading_from_mt |= typeid_cast<const ReadFromMergeTree *>(frame.node->step.get()) != nullptr;
|
||||
|
||||
if (optimization_settings.read_in_order)
|
||||
optimizeReadInOrder(*frame.node, nodes);
|
||||
|
||||
/// Projection optimization relies on PK optimization
|
||||
if (optimization_settings.optimize_projection)
|
||||
num_applied_projection
|
||||
+= optimizeUseAggregateProjections(*frame.node, nodes, optimization_settings.optimize_use_implicit_projections);
|
||||
|
||||
|
||||
if (optimization_settings.aggregation_in_order)
|
||||
optimizeAggregationInOrder(*frame.node, nodes);
|
||||
|
||||
if (optimization_settings.distinct_in_order)
|
||||
tryDistinctReadInOrder(frame.node);
|
||||
}
|
||||
|
||||
/// Traverse all children first.
|
||||
|
@ -14,19 +14,33 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
|
||||
return 0;
|
||||
|
||||
const auto & expr = filter_step->getExpression();
|
||||
const std::string & filter_column_name = filter_step->getFilterColumnName();
|
||||
|
||||
/// Do not split if there are function like runningDifference.
|
||||
if (expr->hasStatefulFunctions())
|
||||
return 0;
|
||||
|
||||
auto split = expr->splitActionsForFilter(filter_step->getFilterColumnName());
|
||||
bool filter_name_clashs_with_input = false;
|
||||
if (filter_step->removesFilterColumn())
|
||||
{
|
||||
for (const auto * input : expr->getInputs())
|
||||
{
|
||||
if (input->result_name == filter_column_name)
|
||||
{
|
||||
filter_name_clashs_with_input = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto split = expr->splitActionsForFilter(filter_column_name);
|
||||
|
||||
if (split.second->trivial())
|
||||
return 0;
|
||||
|
||||
bool remove_filter = false;
|
||||
if (filter_step->removesFilterColumn())
|
||||
remove_filter = split.second->removeUnusedResult(filter_step->getFilterColumnName());
|
||||
remove_filter = split.second->removeUnusedResult(filter_column_name);
|
||||
|
||||
auto description = filter_step->getStepDescription();
|
||||
|
||||
@ -34,10 +48,25 @@ size_t trySplitFilter(QueryPlan::Node * node, QueryPlan::Nodes & nodes)
|
||||
node->children.swap(filter_node.children);
|
||||
node->children.push_back(&filter_node);
|
||||
|
||||
std::string split_filter_name = filter_column_name;
|
||||
if (filter_name_clashs_with_input)
|
||||
{
|
||||
split_filter_name = "__split_filter";
|
||||
|
||||
for (auto & filter_output : split.first->getOutputs())
|
||||
{
|
||||
if (filter_output->result_name == filter_column_name)
|
||||
{
|
||||
filter_output = &split.first->addAlias(*filter_output, split_filter_name);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
filter_node.step = std::make_unique<FilterStep>(
|
||||
filter_node.children.at(0)->step->getOutputStream(),
|
||||
std::move(split.first),
|
||||
filter_step->getFilterColumnName(),
|
||||
std::move(split_filter_name),
|
||||
remove_filter);
|
||||
|
||||
node->step = std::make_unique<ExpressionStep>(filter_node.step->getOutputStream(), std::move(split.second));
|
||||
|
@ -89,6 +89,34 @@ size_t countPartitions(const MergeTreeData::DataPartsVector & prepared_parts)
|
||||
return countPartitions(prepared_parts, get_partition_id);
|
||||
}
|
||||
|
||||
bool restoreDAGInputs(ActionsDAG & dag, const NameSet & inputs)
|
||||
{
|
||||
std::unordered_set<const ActionsDAG::Node *> outputs(dag.getOutputs().begin(), dag.getOutputs().end());
|
||||
bool added = false;
|
||||
for (const auto * input : dag.getInputs())
|
||||
{
|
||||
if (inputs.contains(input->result_name) && !outputs.contains(input))
|
||||
{
|
||||
dag.getOutputs().push_back(input);
|
||||
added = true;
|
||||
}
|
||||
}
|
||||
|
||||
return added;
|
||||
}
|
||||
|
||||
bool restorePrewhereInputs(PrewhereInfo & info, const NameSet & inputs)
|
||||
{
|
||||
bool added = false;
|
||||
if (info.row_level_filter)
|
||||
added = added || restoreDAGInputs(*info.row_level_filter, inputs);
|
||||
|
||||
if (info.prewhere_actions)
|
||||
added = added || restoreDAGInputs(*info.prewhere_actions, inputs);
|
||||
|
||||
return added;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
namespace ProfileEvents
|
||||
@ -786,18 +814,13 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
|
||||
/// To fix this, we prohibit removing any input in prewhere actions. Instead, projection actions will be added after sorting.
|
||||
/// See 02354_read_in_order_prewhere.sql as an example.
|
||||
bool have_input_columns_removed_after_prewhere = false;
|
||||
if (prewhere_info && prewhere_info->prewhere_actions)
|
||||
if (prewhere_info)
|
||||
{
|
||||
auto & outputs = prewhere_info->prewhere_actions->getOutputs();
|
||||
std::unordered_set<const ActionsDAG::Node *> outputs_set(outputs.begin(), outputs.end());
|
||||
for (const auto * input : prewhere_info->prewhere_actions->getInputs())
|
||||
{
|
||||
if (!outputs_set.contains(input))
|
||||
{
|
||||
outputs.push_back(input);
|
||||
have_input_columns_removed_after_prewhere = true;
|
||||
}
|
||||
}
|
||||
NameSet sorting_columns;
|
||||
for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
|
||||
sorting_columns.insert(column.name);
|
||||
|
||||
have_input_columns_removed_after_prewhere = restorePrewhereInputs(*prewhere_info, sorting_columns);
|
||||
}
|
||||
|
||||
/// Let's split ranges to avoid reading much data.
|
||||
@ -984,7 +1007,6 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsWithOrder(
|
||||
/// Thus we need to merge all partition parts into a single sorted stream.
|
||||
Pipe pipe = Pipe::unitePipes(std::move(pipes));
|
||||
merge_streams(pipe);
|
||||
out_projection = createProjection(pipe_header);
|
||||
return pipe;
|
||||
}
|
||||
|
||||
@ -1133,6 +1155,14 @@ Pipe ReadFromMergeTree::spreadMarkRangesAmongStreamsFinal(
|
||||
|
||||
auto sorting_expr = std::make_shared<ExpressionActions>(metadata_for_reading->getSortingKey().expression->getActionsDAG().clone());
|
||||
|
||||
if (prewhere_info)
|
||||
{
|
||||
NameSet sorting_columns;
|
||||
for (const auto & column : metadata_for_reading->getSortingKey().expression->getRequiredColumnsWithTypes())
|
||||
sorting_columns.insert(column.name);
|
||||
restorePrewhereInputs(*prewhere_info, sorting_columns);
|
||||
}
|
||||
|
||||
for (size_t range_index = 0; range_index < parts_to_merge_ranges.size() - 1; ++range_index)
|
||||
{
|
||||
/// If do_not_merge_across_partitions_select_final is true and there is only one part in partition
|
||||
@ -1804,13 +1834,20 @@ Pipe ReadFromMergeTree::spreadMarkRanges(
|
||||
|
||||
if (!final && result.sampling.use_sampling)
|
||||
{
|
||||
NameSet sampling_columns;
|
||||
|
||||
/// Add columns needed for `sample_by_ast` to `column_names_to_read`.
|
||||
/// Skip this if final was used, because such columns were already added from PK.
|
||||
for (const auto & column : result.sampling.filter_expression->getRequiredColumns().getNames())
|
||||
{
|
||||
if (!names.contains(column))
|
||||
column_names_to_read.push_back(column);
|
||||
|
||||
sampling_columns.insert(column);
|
||||
}
|
||||
|
||||
if (prewhere_info)
|
||||
restorePrewhereInputs(*prewhere_info, sampling_columns);
|
||||
}
|
||||
|
||||
if (final)
|
||||
@ -2004,6 +2041,24 @@ void ReadFromMergeTree::initializePipeline(QueryPipelineBuilder & pipeline, cons
|
||||
});
|
||||
}
|
||||
|
||||
/// Some extra columns could be added by sample/final/in-order/etc
|
||||
/// Remove them from header if not needed.
|
||||
if (!blocksHaveEqualStructure(pipe.getHeader(), getOutputStream().header))
|
||||
{
|
||||
auto convert_actions_dag = ActionsDAG::makeConvertingActions(
|
||||
pipe.getHeader().getColumnsWithTypeAndName(),
|
||||
getOutputStream().header.getColumnsWithTypeAndName(),
|
||||
ActionsDAG::MatchColumnsMode::Name,
|
||||
true);
|
||||
|
||||
auto converting_dag_expr = std::make_shared<ExpressionActions>(convert_actions_dag);
|
||||
|
||||
pipe.addSimpleTransform([&](const Block & header)
|
||||
{
|
||||
return std::make_shared<ExpressionTransform>(header, converting_dag_expr);
|
||||
});
|
||||
}
|
||||
|
||||
for (const auto & processor : pipe.getProcessors())
|
||||
processors.emplace_back(processor);
|
||||
|
||||
|
@ -449,8 +449,8 @@ Block MergeTreeSelectProcessor::applyPrewhereActions(Block block, const Prewhere
|
||||
Block MergeTreeSelectProcessor::transformHeader(
|
||||
Block block, const PrewhereInfoPtr & prewhere_info, const DataTypePtr & partition_value_type, const Names & virtual_columns)
|
||||
{
|
||||
injectVirtualColumns(block, 0, nullptr, partition_value_type, virtual_columns);
|
||||
auto transformed = applyPrewhereActions(std::move(block), prewhere_info);
|
||||
injectVirtualColumns(transformed, 0, nullptr, partition_value_type, virtual_columns);
|
||||
return transformed;
|
||||
}
|
||||
|
||||
|
@ -112,7 +112,7 @@ void MergeTreeWhereOptimizer::optimize(SelectQueryInfo & select_query_info, cons
|
||||
LOG_DEBUG(log, "MergeTreeWhereOptimizer: condition \"{}\" moved to PREWHERE", select.prewhere()->formatForLogging(log_queries_cut_to_length));
|
||||
}
|
||||
|
||||
std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
|
||||
MergeTreeWhereOptimizer::FilterActionsOptimizeResult MergeTreeWhereOptimizer::optimize(const ActionsDAGPtr & filter_dag,
|
||||
const std::string & filter_column_name,
|
||||
const ContextPtr & context,
|
||||
bool is_final)
|
||||
@ -132,11 +132,14 @@ std::optional<MergeTreeWhereOptimizer::FilterActionsOptimizeResult> MergeTreeWhe
|
||||
if (!optimize_result)
|
||||
return {};
|
||||
|
||||
auto filter_actions = reconstructDAG(optimize_result->where_conditions);
|
||||
auto prewhere_filter_actions = reconstructDAG(optimize_result->prewhere_conditions);
|
||||
// if (optimize_result->where_conditions.empty())
|
||||
// return {.prewhere_nodes = {}, .fully_moved_to_prewhere = true};
|
||||
|
||||
FilterActionsOptimizeResult result = { std::move(filter_actions), std::move(prewhere_filter_actions) };
|
||||
return result;
|
||||
std::unordered_set<const ActionsDAG::Node *> prewhere_conditions;
|
||||
for (const auto & condition : optimize_result->prewhere_conditions)
|
||||
prewhere_conditions.insert(condition.node.getDAGNode());
|
||||
|
||||
return {.prewhere_nodes = std::move(prewhere_conditions), .fully_moved_to_prewhere = optimize_result->where_conditions.empty()};
|
||||
}
|
||||
|
||||
static void collectColumns(const RPNBuilderTreeNode & node, const NameSet & columns_names, NameSet & result_set, bool & has_invalid_column)
|
||||
@ -343,20 +346,6 @@ ASTPtr MergeTreeWhereOptimizer::reconstructAST(const Conditions & conditions)
|
||||
return function;
|
||||
}
|
||||
|
||||
ActionsDAGPtr MergeTreeWhereOptimizer::reconstructDAG(const Conditions & conditions)
|
||||
{
|
||||
if (conditions.empty())
|
||||
return {};
|
||||
|
||||
ActionsDAG::NodeRawConstPtrs filter_nodes;
|
||||
filter_nodes.reserve(conditions.size());
|
||||
|
||||
for (const auto & condition : conditions)
|
||||
filter_nodes.push_back(condition.node.getDAGNode());
|
||||
|
||||
return ActionsDAG::buildFilterActionsDAG(filter_nodes);
|
||||
}
|
||||
|
||||
std::optional<MergeTreeWhereOptimizer::OptimizeResult> MergeTreeWhereOptimizer::optimizeImpl(const RPNBuilderTreeNode & node,
|
||||
const WhereOptimizerContext & where_optimizer_context) const
|
||||
{
|
||||
|
@ -47,11 +47,11 @@ public:
|
||||
|
||||
struct FilterActionsOptimizeResult
|
||||
{
|
||||
ActionsDAGPtr filter_actions;
|
||||
ActionsDAGPtr prewhere_filter_actions;
|
||||
std::unordered_set<const ActionsDAG::Node *> prewhere_nodes;
|
||||
bool fully_moved_to_prewhere = false;
|
||||
};
|
||||
|
||||
std::optional<FilterActionsOptimizeResult> optimize(const ActionsDAGPtr & filter_dag,
|
||||
FilterActionsOptimizeResult optimize(const ActionsDAGPtr & filter_dag,
|
||||
const std::string & filter_column_name,
|
||||
const ContextPtr & context,
|
||||
bool is_final);
|
||||
@ -122,9 +122,6 @@ private:
|
||||
/// Reconstruct AST from conditions
|
||||
static ASTPtr reconstructAST(const Conditions & conditions);
|
||||
|
||||
/// Reconstruct DAG from conditions
|
||||
static ActionsDAGPtr reconstructDAG(const Conditions & conditions);
|
||||
|
||||
void optimizeArbitrary(ASTSelectQuery & select) const;
|
||||
|
||||
UInt64 getColumnsSize(const NameSet & columns) const;
|
||||
|
@ -28,7 +28,7 @@ Expression ((Projection + Before ORDER BY))
|
||||
Expression ((Project names + Projection))
|
||||
Filter ((WHERE + DROP unused columns after JOIN))
|
||||
Join (JOIN FillRightFirst)
|
||||
Expression (Change column names to column identifiers)
|
||||
Expression
|
||||
ReadFromMergeTree (default.t1)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
|
@ -3,21 +3,18 @@
|
||||
MinMax
|
||||
Keys:
|
||||
y
|
||||
Condition: (y in [1, +Inf))
|
||||
Parts: 4/5
|
||||
Granules: 11/12
|
||||
Partition
|
||||
Keys:
|
||||
y
|
||||
bitAnd(z, 3)
|
||||
Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
|
||||
Parts: 3/4
|
||||
Granules: 10/11
|
||||
PrimaryKey
|
||||
Keys:
|
||||
x
|
||||
y
|
||||
Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
|
||||
Parts: 2/3
|
||||
Granules: 6/10
|
||||
Skip
|
||||
@ -37,7 +34,6 @@
|
||||
{
|
||||
"Type": "MinMax",
|
||||
"Keys": ["y"],
|
||||
"Condition": "(y in [1, +Inf))",
|
||||
"Initial Parts": 5,
|
||||
"Selected Parts": 4,
|
||||
"Initial Granules": 12,
|
||||
@ -46,7 +42,6 @@
|
||||
{
|
||||
"Type": "Partition",
|
||||
"Keys": ["y", "bitAnd(z, 3)"],
|
||||
"Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
|
||||
"Initial Parts": 4,
|
||||
"Selected Parts": 3,
|
||||
"Initial Granules": 11,
|
||||
@ -55,7 +50,6 @@
|
||||
{
|
||||
"Type": "PrimaryKey",
|
||||
"Keys": ["x", "y"],
|
||||
"Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
|
||||
"Initial Parts": 3,
|
||||
"Selected Parts": 2,
|
||||
"Initial Granules": 10,
|
||||
@ -109,21 +103,18 @@
|
||||
MinMax
|
||||
Keys:
|
||||
y
|
||||
Condition: (y in [1, +Inf))
|
||||
Parts: 4/5
|
||||
Granules: 11/12
|
||||
Partition
|
||||
Keys:
|
||||
y
|
||||
bitAnd(z, 3)
|
||||
Condition: and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))
|
||||
Parts: 3/4
|
||||
Granules: 10/11
|
||||
PrimaryKey
|
||||
Keys:
|
||||
x
|
||||
y
|
||||
Condition: and((x in [11, +Inf)), (y in [1, +Inf)))
|
||||
Parts: 2/3
|
||||
Granules: 6/10
|
||||
Skip
|
||||
@ -138,7 +129,6 @@
|
||||
{
|
||||
"Type": "MinMax",
|
||||
"Keys": ["y"],
|
||||
"Condition": "(y in [1, +Inf))",
|
||||
"Initial Parts": 5,
|
||||
"Selected Parts": 4,
|
||||
"Initial Granules": 12,
|
||||
@ -147,7 +137,6 @@
|
||||
{
|
||||
"Type": "Partition",
|
||||
"Keys": ["y", "bitAnd(z, 3)"],
|
||||
"Condition": "and((y in [1, +Inf)), (bitAnd(z, 3) not in [1, 1]))",
|
||||
"Initial Parts": 4,
|
||||
"Selected Parts": 3,
|
||||
"Initial Granules": 11,
|
||||
@ -156,7 +145,6 @@
|
||||
{
|
||||
"Type": "PrimaryKey",
|
||||
"Keys": ["x", "y"],
|
||||
"Condition": "and((x in [11, +Inf)), (y in [1, +Inf)))",
|
||||
"Initial Parts": 3,
|
||||
"Selected Parts": 2,
|
||||
"Initial Granules": 10,
|
||||
|
@ -17,13 +17,13 @@ do
|
||||
|
||||
$CH_CLIENT -q "
|
||||
explain indexes = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14;
|
||||
" | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
|
||||
" | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"
|
||||
|
||||
echo "-----------------"
|
||||
|
||||
$CH_CLIENT -q "
|
||||
explain indexes = 1, json = 1 select *, _part from test_index where t % 19 = 16 and y > 0 and bitAnd(z, 3) != 1 and x > 10 and t % 20 > 14 format TSVRaw;
|
||||
" | grep -A 100 "ReadFromMergeTree" # | grep -v "Description"
|
||||
" | grep -A 100 "ReadFromMergeTree" | grep -v "Condition"
|
||||
|
||||
echo "-----------------"
|
||||
|
||||
|
@ -76,7 +76,6 @@ ExpressionTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
ExpressionTransform
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
|
||||
2020-10-11 0 0
|
||||
2020-10-11 0 10
|
||||
@ -106,7 +105,6 @@ ExpressionTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
ExpressionTransform
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) 0 → 1
|
||||
2020-10-12 0
|
||||
2020-10-12 1
|
||||
@ -140,7 +138,6 @@ ExpressionTransform
|
||||
(Expression)
|
||||
ExpressionTransform
|
||||
(ReadFromMergeTree)
|
||||
ExpressionTransform
|
||||
ReverseTransform
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InReverseOrder) 0 → 1
|
||||
2020-10-12 99999
|
||||
|
@ -52,7 +52,7 @@ SELECT _part_offset, foo FROM t_1 where granule == 0 AND _part_offset >= 100000
|
||||
|
||||
SELECT 'PREWHERE';
|
||||
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere granule == 0 where _part_offset >= 100000;
|
||||
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10 }
|
||||
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10 }
|
||||
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part != '' where granule == 0; -- { serverError 10, 16 }
|
||||
SELECT count(*), sum(_part_offset), sum(order_0) from t_1 prewhere _part_offset > 100000 where granule == 0; -- { serverError 10, 16 }
|
||||
SELECT _part_offset FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;
|
||||
SELECT _part_offset, foo FROM t_1 PREWHERE order_0 % 10000 == 42 ORDER BY order_0 LIMIT 3;
|
||||
|
@ -1,3 +1,5 @@
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t1 group by a;
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(Aggregating)
|
||||
@ -15,6 +17,8 @@ ExpressionTransform × 16
|
||||
Resize 3 → 1
|
||||
MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 3 0 → 1
|
||||
1000000
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t2 group by a;
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(Aggregating)
|
||||
@ -40,6 +44,8 @@ ExpressionTransform × 16
|
||||
Resize 2 → 1
|
||||
MergeTreeSelect(pool: ReadPool, algorithm: Thread) × 2 0 → 1
|
||||
1000000
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t3 group by a;
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(Aggregating)
|
||||
@ -82,6 +88,8 @@ ExpressionTransform × 16
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
|
||||
1000000
|
||||
1000000
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(Aggregating)
|
||||
@ -91,7 +99,6 @@ ExpressionTransform × 16
|
||||
(Expression)
|
||||
ExpressionTransform × 4
|
||||
(ReadFromMergeTree)
|
||||
ExpressionTransform × 4
|
||||
MergingSortedTransform 2 → 1
|
||||
ExpressionTransform × 2
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
|
||||
@ -105,6 +112,8 @@ ExpressionTransform × 16
|
||||
ExpressionTransform × 2
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
|
||||
1000000
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(Aggregating)
|
||||
@ -114,7 +123,6 @@ ExpressionTransform × 16
|
||||
(Expression)
|
||||
ExpressionTransform × 8
|
||||
(ReadFromMergeTree)
|
||||
ExpressionTransform × 8
|
||||
MergingSortedTransform 2 → 1
|
||||
ExpressionTransform × 2
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
|
||||
@ -140,6 +148,8 @@ ExpressionTransform × 16
|
||||
ExpressionTransform × 2
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
|
||||
1000000
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(Aggregating)
|
||||
@ -148,7 +158,6 @@ ExpressionTransform × 16
|
||||
(Expression)
|
||||
ExpressionTransform × 16
|
||||
(ReadFromMergeTree)
|
||||
ExpressionTransform × 16
|
||||
MergingSortedTransform 2 → 1
|
||||
ExpressionTransform × 2
|
||||
MergeTreeSelect(pool: ReadPoolInOrder, algorithm: InOrder) × 2 0 → 1
|
||||
|
@ -15,7 +15,9 @@ system stop merges t1;
|
||||
insert into t1 select number from numbers_mt(1e6);
|
||||
insert into t1 select number from numbers_mt(1e6);
|
||||
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t1 group by a;
|
||||
-- { echoOff }
|
||||
|
||||
select count() from (select throwIf(count() != 2) from t1 group by a);
|
||||
|
||||
@ -28,7 +30,9 @@ system stop merges t2;
|
||||
insert into t2 select number from numbers_mt(1e6);
|
||||
insert into t2 select number from numbers_mt(1e6);
|
||||
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t2 group by a;
|
||||
-- { echoOff }
|
||||
|
||||
select count() from (select throwIf(count() != 2) from t2 group by a);
|
||||
|
||||
@ -41,7 +45,9 @@ system stop merges t3;
|
||||
insert into t3 select number from numbers_mt(1e6);
|
||||
insert into t3 select number from numbers_mt(1e6);
|
||||
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t3 group by a;
|
||||
-- { echoOff }
|
||||
|
||||
select count() from (select throwIf(count() != 2) from t3 group by a);
|
||||
|
||||
@ -63,7 +69,9 @@ system stop merges t4;
|
||||
insert into t4 select number from numbers_mt(1e6);
|
||||
insert into t4 select number from numbers_mt(1e6);
|
||||
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t4 group by a settings read_in_order_two_level_merge_threshold = 1e12;
|
||||
-- { echoOff }
|
||||
|
||||
select count() from (select throwIf(count() != 2) from t4 group by a);
|
||||
|
||||
@ -76,7 +84,9 @@ system stop merges t5;
|
||||
insert into t5 select number from numbers_mt(1e6);
|
||||
insert into t5 select number from numbers_mt(1e6);
|
||||
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t5 group by a settings read_in_order_two_level_merge_threshold = 1e12;
|
||||
-- { echoOff }
|
||||
|
||||
select count() from (select throwIf(count() != 2) from t5 group by a);
|
||||
|
||||
@ -89,7 +99,9 @@ system stop merges t6;
|
||||
insert into t6 select number from numbers_mt(1e6);
|
||||
insert into t6 select number from numbers_mt(1e6);
|
||||
|
||||
-- { echoOn }
|
||||
explain pipeline select a from t6 group by a settings read_in_order_two_level_merge_threshold = 1e12;
|
||||
-- { echoOff }
|
||||
|
||||
select count() from (select throwIf(count() != 2) from t6 group by a);
|
||||
|
||||
|
@ -38,3 +38,40 @@
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
ReadFromMergeTree (default.data_02771)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
Skip
|
||||
Name: x_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/1
|
||||
Granules: 0/1
|
||||
Skip
|
||||
Name: y_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
Skip
|
||||
Name: xy_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
ReadFromMergeTree (default.data_02771)
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Condition: true
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
Skip
|
||||
Name: x_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/1
|
||||
Granules: 0/1
|
||||
Skip
|
||||
Name: y_idx
|
||||
Description: minmax GRANULARITY 1
|
||||
Parts: 0/0
|
||||
Granules: 0/0
|
||||
|
@ -1,5 +1,3 @@
|
||||
SET allow_experimental_analyzer = 0;
|
||||
|
||||
DROP TABLE IF EXISTS data_02771;
|
||||
|
||||
|
||||
@ -24,6 +22,14 @@ SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';
|
||||
|
||||
SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
|
||||
SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
|
||||
|
||||
SET allow_experimental_analyzer = 0;
|
||||
|
||||
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
|
||||
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
|
||||
|
||||
SET allow_experimental_analyzer = 1;
|
||||
|
||||
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
|
||||
SELECT * from ( EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx' ) WHERE explain NOT LIKE '%Expression%' AND explain NOT LIKE '%Filter%';
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
@ -9,9 +9,9 @@ CreatingSets (Create sets before main query execution)
|
||||
Condition: and((id in (-Inf, 10]), (value in 1-element set))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
@ -20,9 +20,9 @@ CreatingSets (Create sets before main query execution)
|
||||
Condition: and((id in (-Inf, 10]), (value in 1-element set))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
@ -31,9 +31,9 @@ CreatingSets (Create sets before main query execution)
|
||||
Condition: and((id in (-Inf, 10]), (value in 5-element set))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Projection + Before ORDER BY))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
@ -42,47 +42,51 @@ CreatingSets (Create sets before main query execution)
|
||||
Condition: and((id in (-Inf, 10]), (value in 5-element set))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Project names + Projection))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
id
|
||||
value
|
||||
Condition: and((id in (-Inf, 10]), (value in 1-element set))
|
||||
Condition: and((value in 1-element set), (id in (-Inf, 10]))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Project names + Projection))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
id
|
||||
value
|
||||
Condition: and((id in (-Inf, 10]), (value in 1-element set))
|
||||
Condition: and((value in 1-element set), (id in (-Inf, 10]))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Project names + Projection))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
id
|
||||
value
|
||||
Condition: and((id in (-Inf, 10]), (value in 5-element set))
|
||||
Condition: and((value in 5-element set), (id in (-Inf, 10]))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
CreatingSets (Create sets before main query execution)
|
||||
Expression ((Project names + Projection))
|
||||
ReadFromMergeTree (default.test_table)
|
||||
CreatingSets
|
||||
Expression
|
||||
Expression
|
||||
ReadFromMergeTree
|
||||
Indexes:
|
||||
PrimaryKey
|
||||
Keys:
|
||||
id
|
||||
value
|
||||
Condition: and((id in (-Inf, 10]), (value in 5-element set))
|
||||
Condition: and((value in 5-element set), (id in (-Inf, 10]))
|
||||
Parts: 1/1
|
||||
Granules: 1/1
|
||||
|
@ -7,18 +7,18 @@ CREATE TABLE test_table
|
||||
|
||||
INSERT INTO test_table SELECT number, number FROM numbers(10);
|
||||
|
||||
SET allow_experimental_analyzer = 0;
|
||||
set allow_experimental_analyzer = 0;
|
||||
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
|
||||
|
||||
SET allow_experimental_analyzer = 1;
|
||||
set allow_experimental_analyzer = 1;
|
||||
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
|
||||
EXPLAIN indexes = 1 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT 5);
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT '5');
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toUInt8(number) FROM numbers(5));
|
||||
EXPLAIN indexes = 1, description=0 SELECT id FROM test_table WHERE id <= 10 AND value IN (SELECT toString(number) FROM numbers(5));
|
||||
|
||||
DROP TABLE test_table;
|
||||
|
Loading…
Reference in New Issue
Block a user