mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-25 03:00:49 +00:00
support prewhere, row_filter, read_in_order and decent projection selection
TODO set index analysis in projection
This commit is contained in:
parent
f7f949c1f9
commit
9c069ebdbf
@ -477,6 +477,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addPrewhereAliasActions();
|
||||||
|
|
||||||
|
query_info.syntax_analyzer_result = syntax_analyzer_result;
|
||||||
|
query_info.required_columns = required_columns;
|
||||||
|
|
||||||
source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID());
|
source_header = metadata_snapshot->getSampleBlockForColumns(required_columns, storage->getVirtuals(), storage->getStorageID());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -589,6 +594,7 @@ Block InterpreterSelectQuery::getSampleBlockImpl()
|
|||||||
{
|
{
|
||||||
from_stage = storage->getQueryProcessingStage(context, options.to_stage, metadata_snapshot, query_info);
|
from_stage = storage->getQueryProcessingStage(context, options.to_stage, metadata_snapshot, query_info);
|
||||||
|
|
||||||
|
/// TODO how can we make IN index work if we cache parts before selecting a projection?
|
||||||
/// XXX Used for IN set index analysis. Is this a proper way?
|
/// XXX Used for IN set index analysis. Is this a proper way?
|
||||||
if (query_info.projection)
|
if (query_info.projection)
|
||||||
metadata_snapshot->selected_projection = query_info.projection->desc;
|
metadata_snapshot->selected_projection = query_info.projection->desc;
|
||||||
@ -1043,7 +1049,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
|||||||
&& !expressions.has_window)
|
&& !expressions.has_window)
|
||||||
{
|
{
|
||||||
if (expressions.has_order_by)
|
if (expressions.has_order_by)
|
||||||
executeOrder(query_plan, query_info.input_order_info);
|
executeOrder(
|
||||||
|
query_plan,
|
||||||
|
query_info.input_order_info ? query_info.input_order_info
|
||||||
|
: (query_info.projection ? query_info.projection->input_order_info : nullptr));
|
||||||
|
|
||||||
if (expressions.has_order_by && query.limitLength())
|
if (expressions.has_order_by && query.limitLength())
|
||||||
executeDistinct(query_plan, false, expressions.selected_columns, true);
|
executeDistinct(query_plan, false, expressions.selected_columns, true);
|
||||||
@ -1169,10 +1178,25 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
|||||||
|
|
||||||
if (expressions.need_aggregate)
|
if (expressions.need_aggregate)
|
||||||
{
|
{
|
||||||
executeAggregation(query_plan, expressions.before_aggregation, aggregate_overflow_row, aggregate_final, query_info.input_order_info);
|
if (query_info.projection)
|
||||||
|
{
|
||||||
|
executeAggregation(
|
||||||
|
query_plan,
|
||||||
|
expressions.before_aggregation,
|
||||||
|
aggregate_overflow_row,
|
||||||
|
aggregate_final,
|
||||||
|
query_info.projection->input_order_info);
|
||||||
|
/// We need to reset input order info, so that executeOrder can't use it
|
||||||
|
query_info.projection->input_order_info.reset();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
executeAggregation(
|
||||||
|
query_plan, expressions.before_aggregation, aggregate_overflow_row, aggregate_final, query_info.input_order_info);
|
||||||
/// We need to reset input order info, so that executeOrder can't use it
|
/// We need to reset input order info, so that executeOrder can't use it
|
||||||
query_info.input_order_info.reset();
|
query_info.input_order_info.reset();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Now we must execute:
|
// Now we must execute:
|
||||||
// 1) expressions before window functions,
|
// 1) expressions before window functions,
|
||||||
@ -1301,7 +1325,10 @@ void InterpreterSelectQuery::executeImpl(QueryPlan & query_plan, const BlockInpu
|
|||||||
else if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
|
else if (!expressions.first_stage && !expressions.need_aggregate && !(query.group_by_with_totals && !aggregate_final))
|
||||||
executeMergeSorted(query_plan, "for ORDER BY");
|
executeMergeSorted(query_plan, "for ORDER BY");
|
||||||
else /// Otherwise, just sort.
|
else /// Otherwise, just sort.
|
||||||
executeOrder(query_plan, query_info.input_order_info);
|
executeOrder(
|
||||||
|
query_plan,
|
||||||
|
query_info.input_order_info ? query_info.input_order_info
|
||||||
|
: (query_info.projection ? query_info.projection->input_order_info : nullptr));
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT,
|
/** Optimization - if there are several sources and there is LIMIT, then first apply the preliminary LIMIT,
|
||||||
@ -1485,13 +1512,168 @@ void InterpreterSelectQuery::addEmptySourceToQueryPlan(
|
|||||||
query_plan.addStep(std::move(read_from_pipe));
|
query_plan.addStep(std::move(read_from_pipe));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void InterpreterSelectQuery::addPrewhereAliasActions()
|
||||||
|
{
|
||||||
|
auto & prewhere_info = analysis_result.prewhere_info;
|
||||||
|
auto & columns_to_remove_after_prewhere = analysis_result.columns_to_remove_after_prewhere;
|
||||||
|
|
||||||
|
/// Detect, if ALIAS columns are required for query execution
|
||||||
|
auto alias_columns_required = false;
|
||||||
|
const ColumnsDescription & storage_columns = metadata_snapshot->getColumns();
|
||||||
|
for (const auto & column_name : required_columns)
|
||||||
|
{
|
||||||
|
auto column_default = storage_columns.getDefault(column_name);
|
||||||
|
if (column_default && column_default->kind == ColumnDefaultKind::Alias)
|
||||||
|
{
|
||||||
|
alias_columns_required = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// There are multiple sources of required columns:
|
||||||
|
/// - raw required columns,
|
||||||
|
/// - columns deduced from ALIAS columns,
|
||||||
|
/// - raw required columns from PREWHERE,
|
||||||
|
/// - columns deduced from ALIAS columns from PREWHERE.
|
||||||
|
/// PREWHERE is a special case, since we need to resolve it and pass directly to `IStorage::read()`
|
||||||
|
/// before any other executions.
|
||||||
|
if (alias_columns_required)
|
||||||
|
{
|
||||||
|
NameSet required_columns_from_prewhere; /// Set of all (including ALIAS) required columns for PREWHERE
|
||||||
|
NameSet required_aliases_from_prewhere; /// Set of ALIAS required columns for PREWHERE
|
||||||
|
|
||||||
|
if (prewhere_info)
|
||||||
|
{
|
||||||
|
/// Get some columns directly from PREWHERE expression actions
|
||||||
|
auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames();
|
||||||
|
required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
|
||||||
|
|
||||||
|
if (prewhere_info->row_level_filter_actions)
|
||||||
|
{
|
||||||
|
auto row_level_required_columns = prewhere_info->row_level_filter_actions->getRequiredColumns().getNames();
|
||||||
|
required_columns_from_prewhere.insert(row_level_required_columns.begin(), row_level_required_columns.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expression, that contains all raw required columns
|
||||||
|
ASTPtr required_columns_all_expr = std::make_shared<ASTExpressionList>();
|
||||||
|
|
||||||
|
/// Expression, that contains raw required columns for PREWHERE
|
||||||
|
ASTPtr required_columns_from_prewhere_expr = std::make_shared<ASTExpressionList>();
|
||||||
|
|
||||||
|
/// Sort out already known required columns between expressions,
|
||||||
|
/// also populate `required_aliases_from_prewhere`.
|
||||||
|
for (const auto & column : required_columns)
|
||||||
|
{
|
||||||
|
ASTPtr column_expr;
|
||||||
|
const auto column_default = storage_columns.getDefault(column);
|
||||||
|
bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
|
||||||
|
if (is_alias)
|
||||||
|
{
|
||||||
|
auto column_decl = storage_columns.get(column);
|
||||||
|
column_expr = column_default->expression->clone();
|
||||||
|
// recursive visit for alias to alias
|
||||||
|
replaceAliasColumnsInQuery(
|
||||||
|
column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), context);
|
||||||
|
|
||||||
|
column_expr = addTypeConversionToAST(
|
||||||
|
std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), context);
|
||||||
|
column_expr = setAlias(column_expr, column);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
column_expr = std::make_shared<ASTIdentifier>(column);
|
||||||
|
|
||||||
|
if (required_columns_from_prewhere.count(column))
|
||||||
|
{
|
||||||
|
required_columns_from_prewhere_expr->children.emplace_back(std::move(column_expr));
|
||||||
|
|
||||||
|
if (is_alias)
|
||||||
|
required_aliases_from_prewhere.insert(column);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
required_columns_all_expr->children.emplace_back(std::move(column_expr));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Columns, which we will get after prewhere and filter executions.
|
||||||
|
NamesAndTypesList required_columns_after_prewhere;
|
||||||
|
NameSet required_columns_after_prewhere_set;
|
||||||
|
|
||||||
|
/// Collect required columns from prewhere expression actions.
|
||||||
|
if (prewhere_info)
|
||||||
|
{
|
||||||
|
NameSet columns_to_remove(columns_to_remove_after_prewhere.begin(), columns_to_remove_after_prewhere.end());
|
||||||
|
Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns();
|
||||||
|
|
||||||
|
/// Populate required columns with the columns, added by PREWHERE actions and not removed afterwards.
|
||||||
|
/// XXX: looks hacky that we already know which columns after PREWHERE we won't need for sure.
|
||||||
|
for (const auto & column : prewhere_actions_result)
|
||||||
|
{
|
||||||
|
if (prewhere_info->remove_prewhere_column && column.name == prewhere_info->prewhere_column_name)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (columns_to_remove.count(column.name))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
required_columns_all_expr->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
|
||||||
|
required_columns_after_prewhere.emplace_back(column.name, column.type);
|
||||||
|
}
|
||||||
|
|
||||||
|
required_columns_after_prewhere_set
|
||||||
|
= ext::map<NameSet>(required_columns_after_prewhere, [](const auto & it) { return it.name; });
|
||||||
|
}
|
||||||
|
|
||||||
|
auto syntax_result
|
||||||
|
= TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot);
|
||||||
|
alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, context).getActionsDAG(true);
|
||||||
|
|
||||||
|
/// The set of required columns could be added as a result of adding an action to calculate ALIAS.
|
||||||
|
required_columns = alias_actions->getRequiredColumns().getNames();
|
||||||
|
|
||||||
|
/// Do not remove prewhere filter if it is a column which is used as alias.
|
||||||
|
if (prewhere_info && prewhere_info->remove_prewhere_column)
|
||||||
|
if (required_columns.end() != std::find(required_columns.begin(), required_columns.end(), prewhere_info->prewhere_column_name))
|
||||||
|
prewhere_info->remove_prewhere_column = false;
|
||||||
|
|
||||||
|
/// Remove columns which will be added by prewhere.
|
||||||
|
required_columns.erase(
|
||||||
|
std::remove_if(
|
||||||
|
required_columns.begin(),
|
||||||
|
required_columns.end(),
|
||||||
|
[&](const String & name) { return required_columns_after_prewhere_set.count(name) != 0; }),
|
||||||
|
required_columns.end());
|
||||||
|
|
||||||
|
if (prewhere_info)
|
||||||
|
{
|
||||||
|
/// Don't remove columns which are needed to be aliased.
|
||||||
|
for (const auto & name : required_columns)
|
||||||
|
prewhere_info->prewhere_actions->tryRestoreColumn(name);
|
||||||
|
|
||||||
|
auto analyzed_result
|
||||||
|
= TreeRewriter(context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical());
|
||||||
|
prewhere_info->alias_actions
|
||||||
|
= ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, context).getActionsDAG(true, false);
|
||||||
|
|
||||||
|
/// Add (physical?) columns required by alias actions.
|
||||||
|
auto required_columns_from_alias = prewhere_info->alias_actions->getRequiredColumns();
|
||||||
|
Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns();
|
||||||
|
for (auto & column : required_columns_from_alias)
|
||||||
|
if (!prewhere_actions_result.has(column.name))
|
||||||
|
if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name))
|
||||||
|
required_columns.push_back(column.name);
|
||||||
|
|
||||||
|
/// Add physical columns required by prewhere actions.
|
||||||
|
for (const auto & column : required_columns_from_prewhere)
|
||||||
|
if (required_aliases_from_prewhere.count(column) == 0)
|
||||||
|
if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column))
|
||||||
|
required_columns.push_back(column);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
|
void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum processing_stage, QueryPlan & query_plan)
|
||||||
{
|
{
|
||||||
auto & query = getSelectQuery();
|
auto & query = getSelectQuery();
|
||||||
const Settings & settings = context->getSettingsRef();
|
const Settings & settings = context->getSettingsRef();
|
||||||
auto & expressions = analysis_result;
|
|
||||||
auto & prewhere_info = expressions.prewhere_info;
|
|
||||||
auto & columns_to_remove_after_prewhere = expressions.columns_to_remove_after_prewhere;
|
|
||||||
|
|
||||||
/// Optimization for trivial query like SELECT count() FROM table.
|
/// Optimization for trivial query like SELECT count() FROM table.
|
||||||
bool optimize_trivial_count =
|
bool optimize_trivial_count =
|
||||||
@ -1560,160 +1742,6 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Actions to calculate ALIAS if required.
|
|
||||||
ActionsDAGPtr alias_actions;
|
|
||||||
|
|
||||||
if (storage)
|
|
||||||
{
|
|
||||||
/// Detect, if ALIAS columns are required for query execution
|
|
||||||
auto alias_columns_required = false;
|
|
||||||
const ColumnsDescription & storage_columns = metadata_snapshot->getColumns();
|
|
||||||
for (const auto & column_name : required_columns)
|
|
||||||
{
|
|
||||||
auto column_default = storage_columns.getDefault(column_name);
|
|
||||||
if (column_default && column_default->kind == ColumnDefaultKind::Alias)
|
|
||||||
{
|
|
||||||
alias_columns_required = true;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// There are multiple sources of required columns:
|
|
||||||
/// - raw required columns,
|
|
||||||
/// - columns deduced from ALIAS columns,
|
|
||||||
/// - raw required columns from PREWHERE,
|
|
||||||
/// - columns deduced from ALIAS columns from PREWHERE.
|
|
||||||
/// PREWHERE is a special case, since we need to resolve it and pass directly to `IStorage::read()`
|
|
||||||
/// before any other executions.
|
|
||||||
if (alias_columns_required)
|
|
||||||
{
|
|
||||||
NameSet required_columns_from_prewhere; /// Set of all (including ALIAS) required columns for PREWHERE
|
|
||||||
NameSet required_aliases_from_prewhere; /// Set of ALIAS required columns for PREWHERE
|
|
||||||
|
|
||||||
if (prewhere_info)
|
|
||||||
{
|
|
||||||
/// Get some columns directly from PREWHERE expression actions
|
|
||||||
auto prewhere_required_columns = prewhere_info->prewhere_actions->getRequiredColumns().getNames();
|
|
||||||
required_columns_from_prewhere.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
|
|
||||||
|
|
||||||
if (prewhere_info->row_level_filter_actions)
|
|
||||||
{
|
|
||||||
auto row_level_required_columns = prewhere_info->row_level_filter_actions->getRequiredColumns().getNames();
|
|
||||||
required_columns_from_prewhere.insert(row_level_required_columns.begin(), row_level_required_columns.end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Expression, that contains all raw required columns
|
|
||||||
ASTPtr required_columns_all_expr = std::make_shared<ASTExpressionList>();
|
|
||||||
|
|
||||||
/// Expression, that contains raw required columns for PREWHERE
|
|
||||||
ASTPtr required_columns_from_prewhere_expr = std::make_shared<ASTExpressionList>();
|
|
||||||
|
|
||||||
/// Sort out already known required columns between expressions,
|
|
||||||
/// also populate `required_aliases_from_prewhere`.
|
|
||||||
for (const auto & column : required_columns)
|
|
||||||
{
|
|
||||||
ASTPtr column_expr;
|
|
||||||
const auto column_default = storage_columns.getDefault(column);
|
|
||||||
bool is_alias = column_default && column_default->kind == ColumnDefaultKind::Alias;
|
|
||||||
if (is_alias)
|
|
||||||
{
|
|
||||||
auto column_decl = storage_columns.get(column);
|
|
||||||
column_expr = column_default->expression->clone();
|
|
||||||
// recursive visit for alias to alias
|
|
||||||
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), syntax_analyzer_result->getArrayJoinSourceNameSet(), context);
|
|
||||||
|
|
||||||
column_expr = addTypeConversionToAST(std::move(column_expr), column_decl.type->getName(), metadata_snapshot->getColumns().getAll(), context);
|
|
||||||
column_expr = setAlias(column_expr, column);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
column_expr = std::make_shared<ASTIdentifier>(column);
|
|
||||||
|
|
||||||
if (required_columns_from_prewhere.count(column))
|
|
||||||
{
|
|
||||||
required_columns_from_prewhere_expr->children.emplace_back(std::move(column_expr));
|
|
||||||
|
|
||||||
if (is_alias)
|
|
||||||
required_aliases_from_prewhere.insert(column);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
required_columns_all_expr->children.emplace_back(std::move(column_expr));
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Columns, which we will get after prewhere and filter executions.
|
|
||||||
NamesAndTypesList required_columns_after_prewhere;
|
|
||||||
NameSet required_columns_after_prewhere_set;
|
|
||||||
|
|
||||||
/// Collect required columns from prewhere expression actions.
|
|
||||||
if (prewhere_info)
|
|
||||||
{
|
|
||||||
NameSet columns_to_remove(columns_to_remove_after_prewhere.begin(), columns_to_remove_after_prewhere.end());
|
|
||||||
Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns();
|
|
||||||
|
|
||||||
/// Populate required columns with the columns, added by PREWHERE actions and not removed afterwards.
|
|
||||||
/// XXX: looks hacky that we already know which columns after PREWHERE we won't need for sure.
|
|
||||||
for (const auto & column : prewhere_actions_result)
|
|
||||||
{
|
|
||||||
if (prewhere_info->remove_prewhere_column && column.name == prewhere_info->prewhere_column_name)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
if (columns_to_remove.count(column.name))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
required_columns_all_expr->children.emplace_back(std::make_shared<ASTIdentifier>(column.name));
|
|
||||||
required_columns_after_prewhere.emplace_back(column.name, column.type);
|
|
||||||
}
|
|
||||||
|
|
||||||
required_columns_after_prewhere_set
|
|
||||||
= ext::map<NameSet>(required_columns_after_prewhere, [](const auto & it) { return it.name; });
|
|
||||||
}
|
|
||||||
|
|
||||||
auto syntax_result = TreeRewriter(context).analyze(required_columns_all_expr, required_columns_after_prewhere, storage, metadata_snapshot);
|
|
||||||
alias_actions = ExpressionAnalyzer(required_columns_all_expr, syntax_result, context).getActionsDAG(true);
|
|
||||||
|
|
||||||
/// The set of required columns could be added as a result of adding an action to calculate ALIAS.
|
|
||||||
required_columns = alias_actions->getRequiredColumns().getNames();
|
|
||||||
|
|
||||||
/// Do not remove prewhere filter if it is a column which is used as alias.
|
|
||||||
if (prewhere_info && prewhere_info->remove_prewhere_column)
|
|
||||||
if (required_columns.end()
|
|
||||||
!= std::find(required_columns.begin(), required_columns.end(), prewhere_info->prewhere_column_name))
|
|
||||||
prewhere_info->remove_prewhere_column = false;
|
|
||||||
|
|
||||||
/// Remove columns which will be added by prewhere.
|
|
||||||
required_columns.erase(std::remove_if(required_columns.begin(), required_columns.end(), [&](const String & name)
|
|
||||||
{
|
|
||||||
return required_columns_after_prewhere_set.count(name) != 0;
|
|
||||||
}), required_columns.end());
|
|
||||||
|
|
||||||
if (prewhere_info)
|
|
||||||
{
|
|
||||||
/// Don't remove columns which are needed to be aliased.
|
|
||||||
for (const auto & name : required_columns)
|
|
||||||
prewhere_info->prewhere_actions->tryRestoreColumn(name);
|
|
||||||
|
|
||||||
auto analyzed_result
|
|
||||||
= TreeRewriter(context).analyze(required_columns_from_prewhere_expr, metadata_snapshot->getColumns().getAllPhysical());
|
|
||||||
prewhere_info->alias_actions
|
|
||||||
= ExpressionAnalyzer(required_columns_from_prewhere_expr, analyzed_result, context).getActionsDAG(true, false);
|
|
||||||
|
|
||||||
/// Add (physical?) columns required by alias actions.
|
|
||||||
auto required_columns_from_alias = prewhere_info->alias_actions->getRequiredColumns();
|
|
||||||
Block prewhere_actions_result = prewhere_info->prewhere_actions->getResultColumns();
|
|
||||||
for (auto & column : required_columns_from_alias)
|
|
||||||
if (!prewhere_actions_result.has(column.name))
|
|
||||||
if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column.name))
|
|
||||||
required_columns.push_back(column.name);
|
|
||||||
|
|
||||||
/// Add physical columns required by prewhere actions.
|
|
||||||
for (const auto & column : required_columns_from_prewhere)
|
|
||||||
if (required_aliases_from_prewhere.count(column) == 0)
|
|
||||||
if (required_columns.end() == std::find(required_columns.begin(), required_columns.end(), column))
|
|
||||||
required_columns.push_back(column);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Limitation on the number of columns to read.
|
/// Limitation on the number of columns to read.
|
||||||
/// It's not applied in 'only_analyze' mode, because the query could be analyzed without removal of unnecessary columns.
|
/// It's not applied in 'only_analyze' mode, because the query could be analyzed without removal of unnecessary columns.
|
||||||
if (!options.only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read)
|
if (!options.only_analyze && settings.max_columns_to_read && required_columns.size() > settings.max_columns_to_read)
|
||||||
@ -1804,9 +1832,10 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
|||||||
if (max_streams > 1 && !is_remote)
|
if (max_streams > 1 && !is_remote)
|
||||||
max_streams *= settings.max_streams_to_max_threads_ratio;
|
max_streams *= settings.max_streams_to_max_threads_ratio;
|
||||||
|
|
||||||
query_info.syntax_analyzer_result = syntax_analyzer_result;
|
// TODO figure out how to make set for projections
|
||||||
query_info.sets = query_analyzer->getPreparedSets();
|
query_info.sets = query_analyzer->getPreparedSets();
|
||||||
auto actions_settings = ExpressionActionsSettings::fromContext(context);
|
auto actions_settings = ExpressionActionsSettings::fromContext(context);
|
||||||
|
auto & prewhere_info = analysis_result.prewhere_info;
|
||||||
|
|
||||||
if (prewhere_info)
|
if (prewhere_info)
|
||||||
{
|
{
|
||||||
@ -1828,19 +1857,45 @@ void InterpreterSelectQuery::executeFetchColumns(QueryProcessingStage::Enum proc
|
|||||||
|
|
||||||
/// Create optimizer with prepared actions.
|
/// Create optimizer with prepared actions.
|
||||||
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
|
/// Maybe we will need to calc input_order_info later, e.g. while reading from StorageMerge.
|
||||||
if ((analysis_result.optimize_read_in_order || analysis_result.optimize_aggregation_in_order) && !query_info.projection)
|
if ((analysis_result.optimize_read_in_order || analysis_result.optimize_aggregation_in_order)
|
||||||
|
&& (!query_info.projection || query_info.projection->complete))
|
||||||
{
|
{
|
||||||
if (analysis_result.optimize_read_in_order)
|
if (analysis_result.optimize_read_in_order)
|
||||||
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
{
|
||||||
|
if (query_info.projection)
|
||||||
|
{
|
||||||
|
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||||
|
// TODO Do we need a projection variant for this field?
|
||||||
analysis_result.order_by_elements_actions,
|
analysis_result.order_by_elements_actions,
|
||||||
getSortDescription(query, context),
|
getSortDescription(query, context),
|
||||||
query_info.syntax_analyzer_result);
|
query_info.syntax_analyzer_result);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||||
analysis_result.group_by_elements_actions,
|
analysis_result.order_by_elements_actions, getSortDescription(query, context), query_info.syntax_analyzer_result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (query_info.projection)
|
||||||
|
{
|
||||||
|
query_info.projection->order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||||
|
query_info.projection->group_by_elements_actions,
|
||||||
getSortDescriptionFromGroupBy(query),
|
getSortDescriptionFromGroupBy(query),
|
||||||
query_info.syntax_analyzer_result);
|
query_info.syntax_analyzer_result);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
query_info.order_optimizer = std::make_shared<ReadInOrderOptimizer>(
|
||||||
|
analysis_result.group_by_elements_actions, getSortDescriptionFromGroupBy(query), query_info.syntax_analyzer_result);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (query_info.projection)
|
||||||
|
query_info.projection->input_order_info
|
||||||
|
= query_info.projection->order_optimizer->getInputOrder(query_info.projection->desc->metadata, context);
|
||||||
|
else
|
||||||
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
|
query_info.input_order_info = query_info.order_optimizer->getInputOrder(metadata_snapshot, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2504,8 +2559,11 @@ void InterpreterSelectQuery::executeExtremes(QueryPlan & query_plan)
|
|||||||
|
|
||||||
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets)
|
void InterpreterSelectQuery::executeSubqueriesInSetsAndJoins(QueryPlan & query_plan, SubqueriesForSets & subqueries_for_sets)
|
||||||
{
|
{
|
||||||
if (query_info.input_order_info)
|
const auto & input_order_info = query_info.input_order_info
|
||||||
executeMergeSorted(query_plan, query_info.input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
|
? query_info.input_order_info
|
||||||
|
: (query_info.projection ? query_info.projection->input_order_info : nullptr);
|
||||||
|
if (input_order_info)
|
||||||
|
executeMergeSorted(query_plan, input_order_info->order_key_prefix_descr, 0, "before creating sets for subqueries and joins");
|
||||||
|
|
||||||
const Settings & settings = context->getSettingsRef();
|
const Settings & settings = context->getSettingsRef();
|
||||||
|
|
||||||
|
@ -117,6 +117,8 @@ private:
|
|||||||
|
|
||||||
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
|
ASTSelectQuery & getSelectQuery() { return query_ptr->as<ASTSelectQuery &>(); }
|
||||||
|
|
||||||
|
void addPrewhereAliasActions();
|
||||||
|
|
||||||
Block getSampleBlockImpl();
|
Block getSampleBlockImpl();
|
||||||
|
|
||||||
void executeImpl(QueryPlan & query_plan, const BlockInputStreamPtr & prepared_input, std::optional<Pipe> prepared_pipe);
|
void executeImpl(QueryPlan & query_plan, const BlockInputStreamPtr & prepared_input, std::optional<Pipe> prepared_pipe);
|
||||||
@ -183,6 +185,9 @@ private:
|
|||||||
/// Structure of query source (table, subquery, etc).
|
/// Structure of query source (table, subquery, etc).
|
||||||
Block source_header;
|
Block source_header;
|
||||||
|
|
||||||
|
/// Actions to calculate ALIAS if required.
|
||||||
|
ActionsDAGPtr alias_actions;
|
||||||
|
|
||||||
/// The subquery interpreter, if the subquery
|
/// The subquery interpreter, if the subquery
|
||||||
std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter_subquery;
|
std::unique_ptr<InterpreterSelectWithUnionQuery> interpreter_subquery;
|
||||||
|
|
||||||
|
@ -33,7 +33,6 @@
|
|||||||
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
#include <Processors/Formats/InputStreamFromInputFormat.h>
|
||||||
#include <Storages/AlterCommands.h>
|
#include <Storages/AlterCommands.h>
|
||||||
#include <Storages/MergeTree/MergeTreeData.h>
|
#include <Storages/MergeTree/MergeTreeData.h>
|
||||||
#include <Storages/MergeTree/MergeTreeDataUtils.h>
|
|
||||||
#include <Storages/MergeTree/MergeTreeDataPartCompact.h>
|
#include <Storages/MergeTree/MergeTreeDataPartCompact.h>
|
||||||
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
#include <Storages/MergeTree/MergeTreeDataPartInMemory.h>
|
||||||
#include <Storages/MergeTree/MergeTreeDataPartWide.h>
|
#include <Storages/MergeTree/MergeTreeDataPartWide.h>
|
||||||
@ -3800,6 +3799,358 @@ bool MergeTreeData::mayBenefitFromIndexForIn(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using PartitionIdToMaxBlock = std::unordered_map<String, Int64>;
|
||||||
|
|
||||||
|
static void selectBestProjection(
|
||||||
|
const MergeTreeDataSelectExecutor & reader,
|
||||||
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
|
const SelectQueryInfo & query_info,
|
||||||
|
ProjectionCandidate & candidate,
|
||||||
|
ContextPtr query_context,
|
||||||
|
const PartitionIdToMaxBlock * max_added_blocks,
|
||||||
|
const Settings & settings,
|
||||||
|
const MergeTreeData::DataPartsVector & parts,
|
||||||
|
ProjectionCandidate *& selected_candidate,
|
||||||
|
size_t & min_sum_marks)
|
||||||
|
{
|
||||||
|
MergeTreeData::DataPartsVector projection_parts;
|
||||||
|
MergeTreeData::DataPartsVector normal_parts;
|
||||||
|
for (const auto & part : parts)
|
||||||
|
{
|
||||||
|
const auto & projections = part->getProjectionParts();
|
||||||
|
auto it = projections.find(candidate.desc->name);
|
||||||
|
if (it != projections.end())
|
||||||
|
projection_parts.push_back(it->second);
|
||||||
|
else
|
||||||
|
normal_parts.push_back(part);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (projection_parts.empty())
|
||||||
|
return;
|
||||||
|
|
||||||
|
candidate.merge_tree_data_select_base_cache = std::make_unique<MergeTreeDataSelectCache>();
|
||||||
|
candidate.merge_tree_data_select_projection_cache = std::make_unique<MergeTreeDataSelectCache>();
|
||||||
|
reader.readFromParts(
|
||||||
|
projection_parts,
|
||||||
|
candidate.required_columns,
|
||||||
|
metadata_snapshot,
|
||||||
|
candidate.desc->metadata,
|
||||||
|
query_info, // TODO syntax_analysis_result set in index
|
||||||
|
query_context,
|
||||||
|
0, // max_block_size is unused when getting cache
|
||||||
|
settings.max_threads,
|
||||||
|
max_added_blocks,
|
||||||
|
candidate.merge_tree_data_select_projection_cache.get());
|
||||||
|
|
||||||
|
size_t sum_marks = candidate.merge_tree_data_select_projection_cache->sum_marks;
|
||||||
|
if (normal_parts.empty())
|
||||||
|
{
|
||||||
|
// All parts are projection parts which allows us to use in_order_optimization.
|
||||||
|
// TODO It might be better to use a complete projection even with more marks to read.
|
||||||
|
candidate.complete = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
reader.readFromParts(
|
||||||
|
normal_parts,
|
||||||
|
query_info.required_columns,
|
||||||
|
metadata_snapshot,
|
||||||
|
metadata_snapshot,
|
||||||
|
query_info, // TODO syntax_analysis_result set in index
|
||||||
|
query_context,
|
||||||
|
0, // max_block_size is unused when getting cache
|
||||||
|
settings.max_threads,
|
||||||
|
max_added_blocks,
|
||||||
|
candidate.merge_tree_data_select_base_cache.get());
|
||||||
|
sum_marks += candidate.merge_tree_data_select_base_cache->sum_marks;
|
||||||
|
}
|
||||||
|
|
||||||
|
// We choose the projection with least sum_marks to read.
|
||||||
|
if (sum_marks < min_sum_marks)
|
||||||
|
{
|
||||||
|
selected_candidate = &candidate;
|
||||||
|
min_sum_marks = sum_marks;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool MergeTreeData::getQueryProcessingStageWithAggregateProjection(
|
||||||
|
ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info) const
|
||||||
|
{
|
||||||
|
const auto & settings = query_context->getSettingsRef();
|
||||||
|
if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
const auto & query_ptr = query_info.query;
|
||||||
|
|
||||||
|
InterpreterSelectQuery select(
|
||||||
|
query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias());
|
||||||
|
const auto & analysis_result = select.getAnalysisResult();
|
||||||
|
|
||||||
|
bool can_use_aggregate_projection = true;
|
||||||
|
/// If the first stage of the query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage,
|
||||||
|
/// we cannot use aggregate projection.
|
||||||
|
if (analysis_result.join != nullptr || analysis_result.array_join != nullptr)
|
||||||
|
can_use_aggregate_projection = false;
|
||||||
|
|
||||||
|
/// Check if all needed columns can be provided by some aggregate projection. Here we also try
|
||||||
|
/// to find expression matches. For example, suppose an aggregate projection contains a column
|
||||||
|
/// named sum(x) and the given query also has an expression called sum(x), it's a match. This is
|
||||||
|
/// why we need to ignore all aliases during projection creation and the above query planning.
|
||||||
|
/// It's also worth noting that, sqrt(sum(x)) will also work because we can treat sum(x) as a
|
||||||
|
/// required column.
|
||||||
|
|
||||||
|
/// The ownership of ProjectionDescription is hold in metadata_snapshot which lives along with
|
||||||
|
/// InterpreterSelect, thus we can store the raw pointer here.
|
||||||
|
std::vector<ProjectionCandidate> candidates;
|
||||||
|
NameSet keys;
|
||||||
|
std::unordered_map<std::string_view, size_t> key_name_pos_map;
|
||||||
|
size_t pos = 0;
|
||||||
|
for (const auto & desc : select.getQueryAnalyzer()->aggregationKeys())
|
||||||
|
{
|
||||||
|
keys.insert(desc.name);
|
||||||
|
key_name_pos_map.insert({desc.name, pos++});
|
||||||
|
}
|
||||||
|
auto actions_settings = ExpressionActionsSettings::fromSettings(settings);
|
||||||
|
|
||||||
|
// All required columns should be provided by either current projection or previous actions
|
||||||
|
// Let's traverse backward to finish the check.
|
||||||
|
// TODO what if there is a column with name sum(x) and an aggregate sum(x)?
|
||||||
|
auto rewrite_before_where =
|
||||||
|
[&](ProjectionCandidate & candidate, const ProjectionDescription & projection,
|
||||||
|
NameSet & required_columns, const Block & source_block, const Block & aggregates)
|
||||||
|
{
|
||||||
|
if (analysis_result.before_where)
|
||||||
|
{
|
||||||
|
candidate.before_where = analysis_result.before_where->clone();
|
||||||
|
required_columns = candidate.before_where->foldActionsByProjection(
|
||||||
|
required_columns,
|
||||||
|
projection.sample_block_for_keys,
|
||||||
|
query_ptr->as<const ASTSelectQuery &>().where()->getColumnName());
|
||||||
|
|
||||||
|
if (required_columns.empty())
|
||||||
|
return false;
|
||||||
|
candidate.before_where->addAggregatesViaProjection(aggregates);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (analysis_result.prewhere_info)
|
||||||
|
{
|
||||||
|
auto & prewhere_info = analysis_result.prewhere_info;
|
||||||
|
candidate.prewhere_info = std::make_shared<PrewhereInfo>();
|
||||||
|
candidate.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name;
|
||||||
|
candidate.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column;
|
||||||
|
candidate.prewhere_info->row_level_column_name = prewhere_info->row_level_column_name;
|
||||||
|
candidate.prewhere_info->need_filter = prewhere_info->need_filter;
|
||||||
|
|
||||||
|
auto prewhere_actions = prewhere_info->prewhere_actions->clone();
|
||||||
|
NameSet prewhere_required_columns;
|
||||||
|
prewhere_required_columns = prewhere_actions->foldActionsByProjection(
|
||||||
|
prewhere_required_columns, projection.sample_block_for_keys, prewhere_info->prewhere_column_name);
|
||||||
|
if (prewhere_required_columns.empty())
|
||||||
|
return false;
|
||||||
|
candidate.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_actions, actions_settings);
|
||||||
|
|
||||||
|
if (prewhere_info->row_level_filter_actions)
|
||||||
|
{
|
||||||
|
auto row_level_filter_actions = prewhere_info->row_level_filter_actions->clone();
|
||||||
|
prewhere_required_columns = row_level_filter_actions->foldActionsByProjection(
|
||||||
|
prewhere_required_columns, projection.sample_block_for_keys, prewhere_info->row_level_column_name);
|
||||||
|
if (prewhere_required_columns.empty())
|
||||||
|
return false;
|
||||||
|
candidate.prewhere_info->row_level_filter
|
||||||
|
= std::make_shared<ExpressionActions>(row_level_filter_actions, actions_settings);
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO wait for alias analysis to be moved into expression analyzer
|
||||||
|
if (prewhere_info->alias_actions)
|
||||||
|
{
|
||||||
|
auto alias_actions = prewhere_info->alias_actions->clone();
|
||||||
|
prewhere_required_columns
|
||||||
|
= alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys);
|
||||||
|
if (prewhere_required_columns.empty())
|
||||||
|
return false;
|
||||||
|
candidate.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(alias_actions, actions_settings);
|
||||||
|
}
|
||||||
|
required_columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool match = true;
|
||||||
|
for (const auto & column : required_columns)
|
||||||
|
{
|
||||||
|
/// There are still missing columns, fail to match
|
||||||
|
if (!source_block.has(column))
|
||||||
|
{
|
||||||
|
match = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return match;
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto & projection : metadata_snapshot->projections)
|
||||||
|
{
|
||||||
|
ProjectionCandidate candidate{};
|
||||||
|
candidate.desc = &projection;
|
||||||
|
|
||||||
|
if (projection.type == ProjectionDescription::Type::Aggregate && analysis_result.need_aggregate && can_use_aggregate_projection)
|
||||||
|
{
|
||||||
|
bool match = true;
|
||||||
|
Block aggregates;
|
||||||
|
// Let's first check if all aggregates are provided by current projection
|
||||||
|
for (const auto & aggregate : select.getQueryAnalyzer()->aggregates())
|
||||||
|
{
|
||||||
|
const auto * column = projection.sample_block.findByName(aggregate.column_name);
|
||||||
|
if (column)
|
||||||
|
{
|
||||||
|
aggregates.insert(*column);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
match = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!match)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
// Check if all aggregation keys can be either provided by some action, or by current
|
||||||
|
// projection directly. Reshape the `before_aggregation` action DAG so that it only
|
||||||
|
// needs to provide aggregation keys, and certain children DAG might be substituted by
|
||||||
|
// some keys in projection.
|
||||||
|
candidate.before_aggregation = analysis_result.before_aggregation->clone();
|
||||||
|
auto required_columns = candidate.before_aggregation->foldActionsByProjection(keys, projection.sample_block_for_keys);
|
||||||
|
|
||||||
|
if (required_columns.empty())
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (analysis_result.optimize_aggregation_in_order)
|
||||||
|
{
|
||||||
|
for (const auto & key : keys)
|
||||||
|
{
|
||||||
|
auto actions_dag = analysis_result.before_aggregation->clone();
|
||||||
|
actions_dag->foldActionsByProjection({key}, projection.sample_block_for_keys);
|
||||||
|
candidate.group_by_elements_actions.emplace_back(std::make_shared<ExpressionActions>(actions_dag, actions_settings));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reorder aggregation keys and attach aggregates
|
||||||
|
candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map);
|
||||||
|
candidate.before_aggregation->addAggregatesViaProjection(aggregates);
|
||||||
|
|
||||||
|
if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block_for_keys, aggregates))
|
||||||
|
{
|
||||||
|
candidate.required_columns = {required_columns.begin(), required_columns.end()};
|
||||||
|
for (const auto & aggregate : aggregates)
|
||||||
|
candidate.required_columns.push_back(aggregate.name);
|
||||||
|
candidates.push_back(std::move(candidate));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (projection.type == ProjectionDescription::Type::Normal && (analysis_result.hasWhere() || analysis_result.hasPrewhere()))
|
||||||
|
{
|
||||||
|
// TODO is it possible?
|
||||||
|
if (!analysis_result.before_order_by)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
NameSet required_columns;
|
||||||
|
for (const auto & column : analysis_result.before_order_by->getRequiredColumns())
|
||||||
|
required_columns.insert(column.name);
|
||||||
|
|
||||||
|
if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block, {}))
|
||||||
|
{
|
||||||
|
candidate.required_columns = {required_columns.begin(), required_columns.end()};
|
||||||
|
candidates.push_back(std::move(candidate));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Let's select the best projection to execute the query.
|
||||||
|
if (!candidates.empty())
|
||||||
|
{
|
||||||
|
// First build a MergeTreeDataSelectCache to check if a projection is indeed better than base
|
||||||
|
query_info.merge_tree_data_select_cache = std::make_unique<MergeTreeDataSelectCache>();
|
||||||
|
|
||||||
|
std::unique_ptr<PartitionIdToMaxBlock> max_added_blocks;
|
||||||
|
if (settings.select_sequential_consistency)
|
||||||
|
{
|
||||||
|
if (const StorageReplicatedMergeTree * replicated = dynamic_cast<const StorageReplicatedMergeTree *>(this))
|
||||||
|
max_added_blocks = std::make_unique<PartitionIdToMaxBlock>(replicated->getMaxAddedBlocks());
|
||||||
|
}
|
||||||
|
|
||||||
|
auto parts = getDataPartsVector();
|
||||||
|
MergeTreeDataSelectExecutor reader(*this);
|
||||||
|
reader.readFromParts(
|
||||||
|
parts,
|
||||||
|
query_info.required_columns,
|
||||||
|
metadata_snapshot,
|
||||||
|
metadata_snapshot,
|
||||||
|
query_info, // TODO syntax_analysis_result set in index
|
||||||
|
query_context,
|
||||||
|
0, // max_block_size is unused when getting cache
|
||||||
|
settings.max_threads,
|
||||||
|
max_added_blocks.get(),
|
||||||
|
query_info.merge_tree_data_select_cache.get());
|
||||||
|
|
||||||
|
size_t min_sum_marks = query_info.merge_tree_data_select_cache->sum_marks;
|
||||||
|
ProjectionCandidate * selected_candidate = nullptr;
|
||||||
|
/// Favor aggregate projections
|
||||||
|
for (auto & candidate : candidates)
|
||||||
|
{
|
||||||
|
if (candidate.desc->type == ProjectionDescription::Type::Aggregate)
|
||||||
|
{
|
||||||
|
selectBestProjection(
|
||||||
|
reader,
|
||||||
|
metadata_snapshot,
|
||||||
|
query_info,
|
||||||
|
candidate,
|
||||||
|
query_context,
|
||||||
|
max_added_blocks.get(),
|
||||||
|
settings,
|
||||||
|
parts,
|
||||||
|
selected_candidate,
|
||||||
|
min_sum_marks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Select the best normal projection if no aggregate projection is available
|
||||||
|
if (!selected_candidate)
|
||||||
|
{
|
||||||
|
for (auto & candidate : candidates)
|
||||||
|
{
|
||||||
|
if (candidate.desc->type == ProjectionDescription::Type::Normal)
|
||||||
|
{
|
||||||
|
selectBestProjection(
|
||||||
|
reader,
|
||||||
|
metadata_snapshot,
|
||||||
|
query_info,
|
||||||
|
candidate,
|
||||||
|
query_context,
|
||||||
|
max_added_blocks.get(),
|
||||||
|
settings,
|
||||||
|
parts,
|
||||||
|
selected_candidate,
|
||||||
|
min_sum_marks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!selected_candidate)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (selected_candidate->desc->type == ProjectionDescription::Type::Aggregate)
|
||||||
|
{
|
||||||
|
selected_candidate->aggregation_keys = select.getQueryAnalyzer()->aggregationKeys();
|
||||||
|
selected_candidate->aggregate_descriptions = select.getQueryAnalyzer()->aggregates();
|
||||||
|
}
|
||||||
|
|
||||||
|
query_info.projection = std::move(*selected_candidate);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
|
QueryProcessingStage::Enum MergeTreeData::getQueryProcessingStage(
|
||||||
ContextPtr query_context,
|
ContextPtr query_context,
|
||||||
|
@ -358,6 +358,9 @@ public:
|
|||||||
bool attach,
|
bool attach,
|
||||||
BrokenPartCallback broken_part_callback_ = [](const String &){});
|
BrokenPartCallback broken_part_callback_ = [](const String &){});
|
||||||
|
|
||||||
|
bool getQueryProcessingStageWithAggregateProjection(
|
||||||
|
ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info) const;
|
||||||
|
|
||||||
QueryProcessingStage::Enum getQueryProcessingStage(
|
QueryProcessingStage::Enum getQueryProcessingStage(
|
||||||
ContextPtr query_context,
|
ContextPtr query_context,
|
||||||
QueryProcessingStage::Enum to_stage,
|
QueryProcessingStage::Enum to_stage,
|
||||||
|
@ -47,6 +47,7 @@
|
|||||||
#include <Processors/Transforms/FilterTransform.h>
|
#include <Processors/Transforms/FilterTransform.h>
|
||||||
#include <Processors/Transforms/ProjectionPartTransform.h>
|
#include <Processors/Transforms/ProjectionPartTransform.h>
|
||||||
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
|
||||||
|
#include <Storages/MergeTree/StorageFromBasePartsOfProjection.h>
|
||||||
#include <Storages/MergeTree/ProjectionCondition.h>
|
#include <Storages/MergeTree/ProjectionCondition.h>
|
||||||
#include <IO/WriteBufferFromOStream.h>
|
#include <IO/WriteBufferFromOStream.h>
|
||||||
|
|
||||||
@ -152,77 +153,40 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
|
|||||||
data.getDataPartsVector(),
|
data.getDataPartsVector(),
|
||||||
column_names_to_return,
|
column_names_to_return,
|
||||||
metadata_snapshot,
|
metadata_snapshot,
|
||||||
query_info,
|
|
||||||
context,
|
|
||||||
max_block_size,
|
|
||||||
num_streams,
|
|
||||||
max_block_numbers_to_read);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// For normal projection, read anyway.
|
|
||||||
/// We will chose those which read less granules.
|
|
||||||
QueryPlanPtr plan_no_projections;
|
|
||||||
size_t no_projection_granules = 0;
|
|
||||||
size_t with_projection_granules = 0;
|
|
||||||
|
|
||||||
if (query_info.projection->desc->type == ProjectionDescription::Type::Normal)
|
|
||||||
plan_no_projections = readFromParts(
|
|
||||||
data.getDataPartsVector(),
|
|
||||||
column_names_to_return,
|
|
||||||
metadata_snapshot,
|
metadata_snapshot,
|
||||||
query_info,
|
query_info,
|
||||||
context,
|
context,
|
||||||
max_block_size,
|
max_block_size,
|
||||||
num_streams,
|
num_streams,
|
||||||
max_block_numbers_to_read,
|
max_block_numbers_to_read,
|
||||||
&no_projection_granules);
|
query_info.merge_tree_data_select_cache.get());
|
||||||
|
}
|
||||||
|
|
||||||
LOG_DEBUG(log, "Choose projection {}", query_info.projection->desc->name);
|
LOG_DEBUG(log, "Choose projection {}", query_info.projection->desc->name);
|
||||||
|
|
||||||
|
if (query_info.projection->merge_tree_data_select_base_cache->sum_marks
|
||||||
|
+ query_info.projection->merge_tree_data_select_projection_cache->sum_marks
|
||||||
|
== 0)
|
||||||
|
return std::make_unique<QueryPlan>();
|
||||||
|
|
||||||
Pipes pipes;
|
Pipes pipes;
|
||||||
auto parts = data.getDataPartsVector();
|
|
||||||
MergeTreeData::DataPartsVector projection_parts;
|
|
||||||
MergeTreeData::DataPartsVector parent_parts;
|
|
||||||
MergeTreeData::DataPartsVector normal_parts;
|
|
||||||
for (auto & part : parts)
|
|
||||||
{
|
|
||||||
const auto & projections = part->getProjectionParts();
|
|
||||||
auto it = projections.find(query_info.projection->desc->name);
|
|
||||||
if (it != projections.end())
|
|
||||||
{
|
|
||||||
projection_parts.push_back(it->second);
|
|
||||||
parent_parts.push_back(part);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
normal_parts.push_back(part);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t rows_with_projection = 0;
|
|
||||||
size_t rows_without_projection = 0;
|
|
||||||
|
|
||||||
for (auto & part : projection_parts)
|
|
||||||
rows_with_projection += part->getParentPart()->rows_count;
|
|
||||||
|
|
||||||
for (auto & part : normal_parts)
|
|
||||||
rows_without_projection += part->rows_count;
|
|
||||||
|
|
||||||
Pipe projection_pipe;
|
Pipe projection_pipe;
|
||||||
Pipe ordinary_pipe;
|
Pipe ordinary_pipe;
|
||||||
|
|
||||||
const auto & given_select = query_info.query->as<const ASTSelectQuery &>();
|
const auto & given_select = query_info.query->as<const ASTSelectQuery &>();
|
||||||
if (!projection_parts.empty())
|
if (query_info.projection->merge_tree_data_select_projection_cache->sum_marks > 0)
|
||||||
{
|
{
|
||||||
auto plan = readFromParts(
|
auto plan = readFromParts(
|
||||||
std::move(projection_parts),
|
{},
|
||||||
query_info.projection->required_columns, // raw columns without key transformation
|
query_info.projection->required_columns, // raw columns without key transformation
|
||||||
|
metadata_snapshot,
|
||||||
query_info.projection->desc->metadata,
|
query_info.projection->desc->metadata,
|
||||||
query_info,
|
query_info,
|
||||||
context,
|
context,
|
||||||
max_block_size,
|
max_block_size,
|
||||||
num_streams,
|
num_streams,
|
||||||
max_block_numbers_to_read,
|
max_block_numbers_to_read,
|
||||||
&with_projection_granules,
|
query_info.projection->merge_tree_data_select_projection_cache.get());
|
||||||
true);
|
|
||||||
|
|
||||||
if (plan)
|
if (plan)
|
||||||
projection_pipe = plan->convertToPipe(
|
projection_pipe = plan->convertToPipe(
|
||||||
@ -252,29 +216,25 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!normal_parts.empty())
|
if (query_info.projection->merge_tree_data_select_base_cache->sum_marks > 0)
|
||||||
{
|
{
|
||||||
auto storage_from_source_part = StorageFromMergeTreeDataPart::create(std::move(normal_parts));
|
auto storage_from_base_parts_of_projection = StorageFromBasePartsOfProjection::create(data, metadata_snapshot);
|
||||||
auto ast = query_info.projection->desc->query_ast->clone();
|
auto ast = query_info.projection->desc->query_ast->clone();
|
||||||
auto & select = ast->as<ASTSelectQuery &>();
|
auto & select = ast->as<ASTSelectQuery &>();
|
||||||
if (given_select.where())
|
if (given_select.where())
|
||||||
select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.where()->clone());
|
select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.where()->clone());
|
||||||
|
if (given_select.prewhere())
|
||||||
|
select.setExpression(ASTSelectQuery::Expression::WHERE, given_select.prewhere()->clone());
|
||||||
|
// TODO will row policy filter work?
|
||||||
|
|
||||||
// After overriding the group by clause, we finish the possible aggregations directly
|
// After overriding the group by clause, we finish the possible aggregations directly
|
||||||
if (processed_stage >= QueryProcessingStage::Enum::WithMergeableState && given_select.groupBy())
|
if (processed_stage >= QueryProcessingStage::Enum::WithMergeableState && given_select.groupBy())
|
||||||
select.setExpression(ASTSelectQuery::Expression::GROUP_BY, given_select.groupBy()->clone());
|
select.setExpression(ASTSelectQuery::Expression::GROUP_BY, given_select.groupBy()->clone());
|
||||||
auto interpreter = InterpreterSelectQuery(
|
auto interpreter = InterpreterSelectQuery(
|
||||||
ast, context, storage_from_source_part, nullptr, SelectQueryOptions{processed_stage}.ignoreAggregation());
|
ast, context, storage_from_base_parts_of_projection, nullptr, SelectQueryOptions{processed_stage}.ignoreAggregation());
|
||||||
ordinary_pipe = QueryPipeline::getPipe(interpreter.execute().pipeline);
|
ordinary_pipe = QueryPipeline::getPipe(interpreter.execute().pipeline);
|
||||||
|
|
||||||
with_projection_granules += storage_from_source_part->getNumGranulesFromLastRead();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Use normal projection only if we read less granules then without it.
|
|
||||||
/// TODO: check if read-in-order optimization possible for normal projection.
|
|
||||||
if (query_info.projection->desc->type == ProjectionDescription::Type::Normal && with_projection_granules > no_projection_granules)
|
|
||||||
return plan_no_projections;
|
|
||||||
|
|
||||||
if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
|
if (query_info.projection->desc->type == ProjectionDescription::Type::Aggregate)
|
||||||
{
|
{
|
||||||
/// Here we create shared ManyAggregatedData for both projection and ordinary data.
|
/// Here we create shared ManyAggregatedData for both projection and ordinary data.
|
||||||
@ -405,6 +365,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
|
|||||||
pipes.emplace_back(std::move(projection_pipe));
|
pipes.emplace_back(std::move(projection_pipe));
|
||||||
pipes.emplace_back(std::move(ordinary_pipe));
|
pipes.emplace_back(std::move(ordinary_pipe));
|
||||||
auto pipe = Pipe::unitePipes(std::move(pipes));
|
auto pipe = Pipe::unitePipes(std::move(pipes));
|
||||||
|
// TODO what if pipe is empty?
|
||||||
pipe.resize(1);
|
pipe.resize(1);
|
||||||
|
|
||||||
auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), "MergeTree(with projection)");
|
auto step = std::make_unique<ReadFromStorageStep>(std::move(pipe), "MergeTree(with projection)");
|
||||||
@ -417,16 +378,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
MergeTreeData::DataPartsVector parts,
|
MergeTreeData::DataPartsVector parts,
|
||||||
const Names & column_names_to_return,
|
const Names & column_names_to_return,
|
||||||
const StorageMetadataPtr & metadata_snapshot_base,
|
const StorageMetadataPtr & metadata_snapshot_base,
|
||||||
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
const SelectQueryInfo & query_info,
|
const SelectQueryInfo & query_info,
|
||||||
ContextPtr context,
|
ContextPtr context,
|
||||||
const UInt64 max_block_size,
|
const UInt64 max_block_size,
|
||||||
const unsigned num_streams,
|
const unsigned num_streams,
|
||||||
const PartitionIdToMaxBlock * max_block_numbers_to_read,
|
const PartitionIdToMaxBlock * max_block_numbers_to_read,
|
||||||
size_t * num_granules_to_read,
|
MergeTreeDataSelectCache * cache) const
|
||||||
bool use_projection_metadata) const
|
|
||||||
{
|
{
|
||||||
const StorageMetadataPtr & metadata_snapshot
|
bool use_cache = cache && cache->use_cache;
|
||||||
= (query_info.projection && use_projection_metadata) ? query_info.projection->desc->metadata : metadata_snapshot_base;
|
|
||||||
|
|
||||||
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
|
/// If query contains restrictions on the virtual column `_part` or `_part_index`, select only parts suitable for it.
|
||||||
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
|
/// The virtual column `_sample_factor` (which is equal to 1 / used sample rate) can be requested in the query.
|
||||||
@ -481,13 +441,19 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID());
|
||||||
|
|
||||||
|
const Settings & settings = context->getSettingsRef();
|
||||||
NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
|
NamesAndTypesList available_real_columns = metadata_snapshot->getColumns().getAllPhysical();
|
||||||
|
|
||||||
/// If there are only virtual columns in the query, you must request at least one non-virtual one.
|
/// If there are only virtual columns in the query, you must request at least one non-virtual one.
|
||||||
if (real_column_names.empty())
|
if (real_column_names.empty())
|
||||||
real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
|
real_column_names.push_back(ExpressionActions::getSmallestColumn(available_real_columns));
|
||||||
|
|
||||||
|
// Filter parts by virtual columns.
|
||||||
std::unordered_set<String> part_values;
|
std::unordered_set<String> part_values;
|
||||||
|
if (!use_cache)
|
||||||
|
{
|
||||||
ASTPtr expression_ast;
|
ASTPtr expression_ast;
|
||||||
auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, true /* one_part */);
|
auto virtual_columns_block = data.getBlockWithVirtualPartColumns(parts, true /* one_part */);
|
||||||
|
|
||||||
@ -503,24 +469,36 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
if (part_values.empty())
|
if (part_values.empty())
|
||||||
return std::make_unique<QueryPlan>();
|
return std::make_unique<QueryPlan>();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
// At this point, empty `part_values` means all parts.
|
// At this point, empty `part_values` means all parts.
|
||||||
|
|
||||||
metadata_snapshot->check(real_column_names, data.getVirtuals(), data.getStorageID());
|
// Build and check if primary key is used when necessary
|
||||||
|
std::optional<KeyCondition> key_condition;
|
||||||
const Settings & settings = context->getSettingsRef();
|
if (!use_cache)
|
||||||
|
{
|
||||||
const auto & primary_key = metadata_snapshot->getPrimaryKey();
|
const auto & primary_key = metadata_snapshot->getPrimaryKey();
|
||||||
Names primary_key_columns = primary_key.column_names;
|
Names primary_key_columns = primary_key.column_names;
|
||||||
|
key_condition.emplace(query_info, context, primary_key_columns, primary_key.expression);
|
||||||
|
|
||||||
KeyCondition key_condition(query_info, context, primary_key_columns, primary_key.expression);
|
if (settings.force_primary_key && key_condition->alwaysUnknownOrTrue())
|
||||||
|
|
||||||
if (settings.force_primary_key && key_condition.alwaysUnknownOrTrue())
|
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::INDEX_NOT_USED, "Primary key ({}) is not used and setting 'force_primary_key' is set.",
|
throw Exception(
|
||||||
boost::algorithm::join(primary_key_columns, ", "));
|
ErrorCodes::INDEX_NOT_USED,
|
||||||
|
"Primary key ({}) is not used and setting 'force_primary_key' is set.",
|
||||||
|
fmt::join(primary_key_columns, ", "));
|
||||||
|
}
|
||||||
|
LOG_DEBUG(log, "Key condition: {}", key_condition->toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<KeyCondition> minmax_idx_condition;
|
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
||||||
|
auto query_context = context->hasQueryContext() ? context->getQueryContext() : context;
|
||||||
|
auto index_stats = use_cache ? std::move(cache->index_stats) : std::make_unique<ReadFromMergeTree::IndexStats>();
|
||||||
|
|
||||||
|
// Select parts to read and do partition pruning via partition value and minmax indices
|
||||||
|
if (!use_cache)
|
||||||
|
{
|
||||||
std::optional<PartitionPruner> partition_pruner;
|
std::optional<PartitionPruner> partition_pruner;
|
||||||
|
std::optional<KeyCondition> minmax_idx_condition;
|
||||||
DataTypes minmax_columns_types;
|
DataTypes minmax_columns_types;
|
||||||
if (metadata_snapshot_base->hasPartitionKey())
|
if (metadata_snapshot_base->hasPartitionKey())
|
||||||
{
|
{
|
||||||
@ -550,15 +528,26 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
auto query_context = context->hasQueryContext() ? context->getQueryContext() : context;
|
|
||||||
|
|
||||||
PartFilterCounters part_filter_counters;
|
PartFilterCounters part_filter_counters;
|
||||||
auto index_stats = std::make_unique<ReadFromMergeTree::IndexStats>();
|
|
||||||
|
|
||||||
if (query_context->getSettingsRef().allow_experimental_query_deduplication)
|
if (query_context->getSettingsRef().allow_experimental_query_deduplication)
|
||||||
selectPartsToReadWithUUIDFilter(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, query_context, part_filter_counters);
|
selectPartsToReadWithUUIDFilter(
|
||||||
|
parts,
|
||||||
|
part_values,
|
||||||
|
minmax_idx_condition,
|
||||||
|
minmax_columns_types,
|
||||||
|
partition_pruner,
|
||||||
|
max_block_numbers_to_read,
|
||||||
|
query_context,
|
||||||
|
part_filter_counters);
|
||||||
else
|
else
|
||||||
selectPartsToRead(parts, part_values, minmax_idx_condition, minmax_columns_types, partition_pruner, max_block_numbers_to_read, part_filter_counters);
|
selectPartsToRead(
|
||||||
|
parts,
|
||||||
|
part_values,
|
||||||
|
minmax_idx_condition,
|
||||||
|
minmax_columns_types,
|
||||||
|
partition_pruner,
|
||||||
|
max_block_numbers_to_read,
|
||||||
|
part_filter_counters);
|
||||||
|
|
||||||
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
||||||
.type = ReadFromMergeTree::IndexType::None,
|
.type = ReadFromMergeTree::IndexType::None,
|
||||||
@ -574,6 +563,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
.used_keys = std::move(description.used_keys),
|
.used_keys = std::move(description.used_keys),
|
||||||
.num_parts_after = part_filter_counters.num_parts_after_minmax,
|
.num_parts_after = part_filter_counters.num_parts_after_minmax,
|
||||||
.num_granules_after = part_filter_counters.num_granules_after_minmax});
|
.num_granules_after = part_filter_counters.num_granules_after_minmax});
|
||||||
|
LOG_DEBUG(log, "MinMax index condition: {}", minmax_idx_condition->toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (partition_pruner)
|
if (partition_pruner)
|
||||||
@ -586,17 +576,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
.num_parts_after = part_filter_counters.num_parts_after_partition_pruner,
|
.num_parts_after = part_filter_counters.num_parts_after_partition_pruner,
|
||||||
.num_granules_after = part_filter_counters.num_granules_after_partition_pruner});
|
.num_granules_after = part_filter_counters.num_granules_after_partition_pruner});
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Sampling.
|
/// Sampling.
|
||||||
Names column_names_to_read = real_column_names;
|
MergeTreeDataSelectSamplingData sampling = use_cache ? std::move(cache->sampling) : MergeTreeDataSelectSamplingData{};
|
||||||
std::shared_ptr<ASTFunction> filter_function;
|
if (!use_cache)
|
||||||
ActionsDAGPtr filter_expression;
|
{
|
||||||
|
|
||||||
RelativeSize relative_sample_size = 0;
|
RelativeSize relative_sample_size = 0;
|
||||||
RelativeSize relative_sample_offset = 0;
|
RelativeSize relative_sample_offset = 0;
|
||||||
|
|
||||||
const auto & select = query_info.query->as<ASTSelectQuery &>();
|
|
||||||
|
|
||||||
auto select_sample_size = select.sampleSize();
|
auto select_sample_size = select.sampleSize();
|
||||||
auto select_sample_offset = select.sampleOffset();
|
auto select_sample_offset = select.sampleOffset();
|
||||||
|
|
||||||
@ -618,10 +606,11 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
if (relative_sample_offset < 0)
|
if (relative_sample_offset < 0)
|
||||||
throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
throw Exception("Negative sample offset", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
|
||||||
|
|
||||||
/// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to read) into the relative `SAMPLE 0.1` (how much data to read).
|
/// Convert absolute value of the sampling (in form `SAMPLE 1000000` - how many rows to
|
||||||
|
/// read) into the relative `SAMPLE 0.1` (how much data to read).
|
||||||
size_t approx_total_rows = 0;
|
size_t approx_total_rows = 0;
|
||||||
if (relative_sample_size > 1 || relative_sample_offset > 1)
|
if (relative_sample_size > 1 || relative_sample_offset > 1)
|
||||||
approx_total_rows = getApproximateTotalRowsToRead(parts, metadata_snapshot, key_condition, settings);
|
approx_total_rows = getApproximateTotalRowsToRead(parts, metadata_snapshot, *key_condition, settings);
|
||||||
|
|
||||||
if (relative_sample_size > 1)
|
if (relative_sample_size > 1)
|
||||||
{
|
{
|
||||||
@ -685,10 +674,10 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
return std::make_unique<QueryPlan>();
|
return std::make_unique<QueryPlan>();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && data.supportsSampling());
|
sampling.use_sampling = relative_sample_size > 0 || (settings.parallel_replicas_count > 1 && data.supportsSampling());
|
||||||
bool no_data = false; /// There is nothing left after sampling.
|
bool no_data = false; /// There is nothing left after sampling.
|
||||||
|
|
||||||
if (use_sampling)
|
if (sampling.use_sampling)
|
||||||
{
|
{
|
||||||
if (sample_factor_column_queried && relative_sample_size != RelativeSize(0))
|
if (sample_factor_column_queried && relative_sample_size != RelativeSize(0))
|
||||||
used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
|
used_sample_factor = 1.0 / boost::rational_cast<Float64>(relative_sample_size);
|
||||||
@ -757,7 +746,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
|
|
||||||
if (no_data || (!has_lower_limit && !has_upper_limit))
|
if (no_data || (!has_lower_limit && !has_upper_limit))
|
||||||
{
|
{
|
||||||
use_sampling = false;
|
sampling.use_sampling = false;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -780,7 +769,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
|
|
||||||
if (has_lower_limit)
|
if (has_lower_limit)
|
||||||
{
|
{
|
||||||
if (!key_condition.addCondition(sampling_key.column_names[0], Range::createLeftBounded(lower, true)))
|
if (!key_condition->addCondition(sampling_key.column_names[0], Range::createLeftBounded(lower, true)))
|
||||||
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
|
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
|
||||||
|
|
||||||
ASTPtr args = std::make_shared<ASTExpressionList>();
|
ASTPtr args = std::make_shared<ASTExpressionList>();
|
||||||
@ -792,12 +781,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
lower_function->arguments = args;
|
lower_function->arguments = args;
|
||||||
lower_function->children.push_back(lower_function->arguments);
|
lower_function->children.push_back(lower_function->arguments);
|
||||||
|
|
||||||
filter_function = lower_function;
|
sampling.filter_function = lower_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_upper_limit)
|
if (has_upper_limit)
|
||||||
{
|
{
|
||||||
if (!key_condition.addCondition(sampling_key.column_names[0], Range::createRightBounded(upper, false)))
|
if (!key_condition->addCondition(sampling_key.column_names[0], Range::createRightBounded(upper, false)))
|
||||||
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
|
throw Exception("Sampling column not in primary key", ErrorCodes::ILLEGAL_COLUMN);
|
||||||
|
|
||||||
ASTPtr args = std::make_shared<ASTExpressionList>();
|
ASTPtr args = std::make_shared<ASTExpressionList>();
|
||||||
@ -809,7 +798,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
upper_function->arguments = args;
|
upper_function->arguments = args;
|
||||||
upper_function->children.push_back(upper_function->arguments);
|
upper_function->children.push_back(upper_function->arguments);
|
||||||
|
|
||||||
filter_function = upper_function;
|
sampling.filter_function = upper_function;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (has_lower_limit && has_upper_limit)
|
if (has_lower_limit && has_upper_limit)
|
||||||
@ -818,26 +807,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
args->children.push_back(lower_function);
|
args->children.push_back(lower_function);
|
||||||
args->children.push_back(upper_function);
|
args->children.push_back(upper_function);
|
||||||
|
|
||||||
filter_function = std::make_shared<ASTFunction>();
|
sampling.filter_function = std::make_shared<ASTFunction>();
|
||||||
filter_function->name = "and";
|
sampling.filter_function->name = "and";
|
||||||
filter_function->arguments = args;
|
sampling.filter_function->arguments = args;
|
||||||
filter_function->children.push_back(filter_function->arguments);
|
sampling.filter_function->children.push_back(sampling.filter_function->arguments);
|
||||||
}
|
}
|
||||||
|
|
||||||
ASTPtr query = filter_function;
|
ASTPtr query = sampling.filter_function;
|
||||||
auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns);
|
auto syntax_result = TreeRewriter(context).analyze(query, available_real_columns);
|
||||||
filter_expression = ExpressionAnalyzer(filter_function, syntax_result, context).getActionsDAG(false);
|
sampling.filter_expression = ExpressionAnalyzer(sampling.filter_function, syntax_result, context).getActionsDAG(false);
|
||||||
|
|
||||||
if (!select.final())
|
|
||||||
{
|
|
||||||
/// Add columns needed for `sample_by_ast` to `column_names_to_read`.
|
|
||||||
/// Skip this if final was used, because such columns were already added from PK.
|
|
||||||
std::vector<String> add_columns = filter_expression->getRequiredColumns().getNames();
|
|
||||||
column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end());
|
|
||||||
std::sort(column_names_to_read.begin(), column_names_to_read.end());
|
|
||||||
column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()),
|
|
||||||
column_names_to_read.end());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -846,10 +824,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
LOG_DEBUG(log, "Sampling yields no data.");
|
LOG_DEBUG(log, "Sampling yields no data.");
|
||||||
return std::make_unique<QueryPlan>();
|
return std::make_unique<QueryPlan>();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
LOG_DEBUG(log, "Key condition: {}", key_condition.toString());
|
|
||||||
if (minmax_idx_condition)
|
|
||||||
LOG_DEBUG(log, "MinMax index condition: {}", minmax_idx_condition->toString());
|
|
||||||
|
|
||||||
MergeTreeReaderSettings reader_settings =
|
MergeTreeReaderSettings reader_settings =
|
||||||
{
|
{
|
||||||
@ -861,6 +836,13 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
.checksum_on_read = settings.checksum_on_read,
|
.checksum_on_read = settings.checksum_on_read,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
RangesInDataParts parts_with_ranges(parts.size());
|
||||||
|
size_t sum_marks = 0;
|
||||||
|
size_t sum_ranges = 0;
|
||||||
|
|
||||||
|
/// Let's start analyzing all useful indices
|
||||||
|
if (!use_cache)
|
||||||
|
{
|
||||||
struct DataSkippingIndexAndCondition
|
struct DataSkippingIndexAndCondition
|
||||||
{
|
{
|
||||||
MergeTreeIndexPtr index;
|
MergeTreeIndexPtr index;
|
||||||
@ -871,8 +853,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
std::atomic<size_t> parts_dropped{0};
|
std::atomic<size_t> parts_dropped{0};
|
||||||
|
|
||||||
DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_)
|
DataSkippingIndexAndCondition(MergeTreeIndexPtr index_, MergeTreeIndexConditionPtr condition_)
|
||||||
: index(index_)
|
: index(index_), condition(condition_)
|
||||||
, condition(condition_)
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -896,8 +877,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
IParser::Pos pos(tokens, settings.max_parser_depth);
|
IParser::Pos pos(tokens, settings.max_parser_depth);
|
||||||
Expected expected;
|
Expected expected;
|
||||||
if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices))
|
if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices))
|
||||||
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT,
|
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse force_data_skipping_indices ('{}')", indices);
|
||||||
"Cannot parse force_data_skipping_indices ('{}')", indices);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (forced_indices.empty())
|
if (forced_indices.empty())
|
||||||
@ -911,26 +891,21 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
{
|
{
|
||||||
if (!useful_indices_names.count(index_name))
|
if (!useful_indices_names.count(index_name))
|
||||||
{
|
{
|
||||||
throw Exception(ErrorCodes::INDEX_NOT_USED,
|
throw Exception(
|
||||||
|
ErrorCodes::INDEX_NOT_USED,
|
||||||
"Index {} is not used and setting 'force_data_skipping_indices' contains it",
|
"Index {} is not used and setting 'force_data_skipping_indices' contains it",
|
||||||
backQuote(index_name));
|
backQuote(index_name));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO We can use normal projections with better primary keys
|
|
||||||
|
|
||||||
RangesInDataParts parts_with_ranges(parts.size());
|
|
||||||
size_t sum_marks = 0;
|
|
||||||
std::atomic<size_t> sum_marks_pk = 0;
|
std::atomic<size_t> sum_marks_pk = 0;
|
||||||
std::atomic<size_t> sum_parts_pk = 0;
|
std::atomic<size_t> sum_parts_pk = 0;
|
||||||
std::atomic<size_t> total_marks_pk = 0;
|
std::atomic<size_t> total_marks_pk = 0;
|
||||||
|
|
||||||
size_t sum_ranges = 0;
|
|
||||||
|
|
||||||
/// Let's find what range to read from each part.
|
/// Let's find what range to read from each part.
|
||||||
{
|
{
|
||||||
std::atomic<size_t> total_rows {0};
|
std::atomic<size_t> total_rows{0};
|
||||||
|
|
||||||
SizeLimits limits;
|
SizeLimits limits;
|
||||||
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
|
if (settings.read_overflow_mode == OverflowMode::THROW && settings.max_rows_to_read)
|
||||||
@ -953,7 +928,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
total_marks_pk.fetch_add(total_marks_count, std::memory_order_relaxed);
|
total_marks_pk.fetch_add(total_marks_count, std::memory_order_relaxed);
|
||||||
|
|
||||||
if (metadata_snapshot->hasPrimaryKey())
|
if (metadata_snapshot->hasPrimaryKey())
|
||||||
ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, key_condition, settings, log);
|
ranges.ranges = markRangesFromPKRange(part, metadata_snapshot, *key_condition, settings, log);
|
||||||
else if (total_marks_count)
|
else if (total_marks_count)
|
||||||
ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
|
ranges.ranges = MarkRanges{MarkRange{0, total_marks_count}};
|
||||||
|
|
||||||
@ -972,10 +947,14 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
size_t total_granules = 0;
|
size_t total_granules = 0;
|
||||||
size_t granules_dropped = 0;
|
size_t granules_dropped = 0;
|
||||||
ranges.ranges = filterMarksUsingIndex(
|
ranges.ranges = filterMarksUsingIndex(
|
||||||
index_and_condition.index, index_and_condition.condition,
|
index_and_condition.index,
|
||||||
part, ranges.ranges,
|
index_and_condition.condition,
|
||||||
settings, reader_settings,
|
part,
|
||||||
total_granules, granules_dropped,
|
ranges.ranges,
|
||||||
|
settings,
|
||||||
|
reader_settings,
|
||||||
|
total_granules,
|
||||||
|
granules_dropped,
|
||||||
log);
|
log);
|
||||||
|
|
||||||
index_and_condition.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
|
index_and_condition.total_granules.fetch_add(total_granules, std::memory_order_relaxed);
|
||||||
@ -994,7 +973,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
size_t prev_total_rows_estimate = total_rows.fetch_add(current_rows_estimate);
|
size_t prev_total_rows_estimate = total_rows.fetch_add(current_rows_estimate);
|
||||||
size_t total_rows_estimate = current_rows_estimate + prev_total_rows_estimate;
|
size_t total_rows_estimate = current_rows_estimate + prev_total_rows_estimate;
|
||||||
limits.check(total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read' setting)", ErrorCodes::TOO_MANY_ROWS);
|
limits.check(total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read' setting)", ErrorCodes::TOO_MANY_ROWS);
|
||||||
leaf_limits.check(total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read_leaf' setting)", ErrorCodes::TOO_MANY_ROWS);
|
leaf_limits.check(
|
||||||
|
total_rows_estimate, 0, "rows (controlled by 'max_rows_to_read_leaf' setting)", ErrorCodes::TOO_MANY_ROWS);
|
||||||
}
|
}
|
||||||
|
|
||||||
parts_with_ranges[part_index] = std::move(ranges);
|
parts_with_ranges[part_index] = std::move(ranges);
|
||||||
@ -1014,11 +994,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
ThreadPool pool(num_threads);
|
ThreadPool pool(num_threads);
|
||||||
|
|
||||||
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
|
for (size_t part_index = 0; part_index < parts.size(); ++part_index)
|
||||||
pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()] {
|
pool.scheduleOrThrowOnError([&, part_index, thread_group = CurrentThread::getGroup()]
|
||||||
SCOPE_EXIT_SAFE(
|
{
|
||||||
if (thread_group)
|
SCOPE_EXIT_SAFE(if (thread_group) CurrentThread::detachQueryIfNotDetached(););
|
||||||
CurrentThread::detachQueryIfNotDetached();
|
|
||||||
);
|
|
||||||
if (thread_group)
|
if (thread_group)
|
||||||
CurrentThread::attachTo(thread_group);
|
CurrentThread::attachTo(thread_group);
|
||||||
|
|
||||||
@ -1050,7 +1028,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
|
|
||||||
if (metadata_snapshot->hasPrimaryKey())
|
if (metadata_snapshot->hasPrimaryKey())
|
||||||
{
|
{
|
||||||
auto description = key_condition.getDescription();
|
auto description = key_condition->getDescription();
|
||||||
|
|
||||||
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
||||||
.type = ReadFromMergeTree::IndexType::PrimaryKey,
|
.type = ReadFromMergeTree::IndexType::PrimaryKey,
|
||||||
@ -1063,12 +1041,15 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
for (const auto & index_and_condition : useful_indices)
|
for (const auto & index_and_condition : useful_indices)
|
||||||
{
|
{
|
||||||
const auto & index_name = index_and_condition.index->index.name;
|
const auto & index_name = index_and_condition.index->index.name;
|
||||||
LOG_DEBUG(log, "Index {} has dropped {}/{} granules.",
|
LOG_DEBUG(
|
||||||
|
log,
|
||||||
|
"Index {} has dropped {}/{} granules.",
|
||||||
backQuote(index_name),
|
backQuote(index_name),
|
||||||
index_and_condition.granules_dropped, index_and_condition.total_granules);
|
index_and_condition.granules_dropped,
|
||||||
|
index_and_condition.total_granules);
|
||||||
|
|
||||||
std::string description = index_and_condition.index->index.type
|
std::string description
|
||||||
+ " GRANULARITY " + std::to_string(index_and_condition.index->index.granularity);
|
= index_and_condition.index->index.type + " GRANULARITY " + std::to_string(index_and_condition.index->index.granularity);
|
||||||
|
|
||||||
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
index_stats->emplace_back(ReadFromMergeTree::IndexStat{
|
||||||
.type = ReadFromMergeTree::IndexType::Skip,
|
.type = ReadFromMergeTree::IndexType::Skip,
|
||||||
@ -1078,18 +1059,45 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
.num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped});
|
.num_granules_after = index_and_condition.total_granules - index_and_condition.granules_dropped});
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG_DEBUG(log, "Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
|
LOG_DEBUG(
|
||||||
parts.size(), total_parts, parts_with_ranges.size(),
|
log,
|
||||||
|
"Selected {}/{} parts by partition key, {} parts by primary key, {}/{} marks by primary key, {} marks to read from {} ranges",
|
||||||
|
parts.size(),
|
||||||
|
total_parts,
|
||||||
|
parts_with_ranges.size(),
|
||||||
sum_marks_pk.load(std::memory_order_relaxed),
|
sum_marks_pk.load(std::memory_order_relaxed),
|
||||||
total_marks_pk.load(std::memory_order_relaxed),
|
total_marks_pk.load(std::memory_order_relaxed),
|
||||||
sum_marks, sum_ranges);
|
sum_marks,
|
||||||
|
sum_ranges);
|
||||||
|
}
|
||||||
|
|
||||||
if (num_granules_to_read)
|
if (cache)
|
||||||
*num_granules_to_read = sum_marks_pk.load(std::memory_order_relaxed);
|
{
|
||||||
|
if (cache->use_cache)
|
||||||
|
{
|
||||||
|
parts_with_ranges = std::move(cache->parts_with_ranges);
|
||||||
|
sum_marks = cache->sum_marks;
|
||||||
|
sum_ranges = cache->sum_ranges;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// We are asking for ranges_to_read. Return immediately without further planning.
|
||||||
|
cache->parts_with_ranges = std::move(parts_with_ranges);
|
||||||
|
cache->sampling = std::move(sampling);
|
||||||
|
cache->index_stats = std::move(index_stats);
|
||||||
|
cache->sum_marks = sum_marks;
|
||||||
|
cache->sum_ranges = sum_ranges;
|
||||||
|
cache->use_cache = true;
|
||||||
|
return std::make_unique<QueryPlan>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (parts_with_ranges.empty())
|
if (parts_with_ranges.empty())
|
||||||
return std::make_unique<QueryPlan>();
|
return std::make_unique<QueryPlan>();
|
||||||
|
|
||||||
|
// Check limitations. query_id is used as the quota RAII's resource key.
|
||||||
|
String query_id;
|
||||||
|
{
|
||||||
const auto data_settings = data.getSettings();
|
const auto data_settings = data.getSettings();
|
||||||
auto max_partitions_to_read
|
auto max_partitions_to_read
|
||||||
= settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read;
|
= settings.max_partitions_to_read.changed ? settings.max_partitions_to_read : data_settings->max_partitions_to_read;
|
||||||
@ -1106,7 +1114,6 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
max_partitions_to_read);
|
max_partitions_to_read);
|
||||||
}
|
}
|
||||||
|
|
||||||
String query_id;
|
|
||||||
if (data_settings->max_concurrent_queries > 0)
|
if (data_settings->max_concurrent_queries > 0)
|
||||||
{
|
{
|
||||||
if (data_settings->min_marks_to_honor_max_concurrent_queries > 0
|
if (data_settings->min_marks_to_honor_max_concurrent_queries > 0
|
||||||
@ -1117,6 +1124,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
data.insertQueryIdOrThrow(query_id, data_settings->max_concurrent_queries);
|
data.insertQueryIdOrThrow(query_id, data_settings->max_concurrent_queries);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size());
|
ProfileEvents::increment(ProfileEvents::SelectedParts, parts_with_ranges.size());
|
||||||
ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
|
ProfileEvents::increment(ProfileEvents::SelectedRanges, sum_ranges);
|
||||||
@ -1129,6 +1137,22 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
/// NOTE: It may lead to double computation of expressions.
|
/// NOTE: It may lead to double computation of expressions.
|
||||||
ActionsDAGPtr result_projection;
|
ActionsDAGPtr result_projection;
|
||||||
|
|
||||||
|
Names column_names_to_read = real_column_names;
|
||||||
|
if (!select.final() && sampling.use_sampling)
|
||||||
|
{
|
||||||
|
/// Add columns needed for `sample_by_ast` to `column_names_to_read`.
|
||||||
|
/// Skip this if final was used, because such columns were already added from PK.
|
||||||
|
std::vector<String> add_columns = sampling.filter_expression->getRequiredColumns().getNames();
|
||||||
|
column_names_to_read.insert(column_names_to_read.end(), add_columns.begin(), add_columns.end());
|
||||||
|
std::sort(column_names_to_read.begin(), column_names_to_read.end());
|
||||||
|
column_names_to_read.erase(std::unique(column_names_to_read.begin(), column_names_to_read.end()),
|
||||||
|
column_names_to_read.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto & input_order_info = query_info.input_order_info
|
||||||
|
? query_info.input_order_info
|
||||||
|
: (query_info.projection ? query_info.projection->input_order_info : nullptr);
|
||||||
|
|
||||||
if (select.final())
|
if (select.final())
|
||||||
{
|
{
|
||||||
/// Add columns needed to calculate the sorting expression and the sign.
|
/// Add columns needed to calculate the sorting expression and the sign.
|
||||||
@ -1158,9 +1182,9 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
result_projection,
|
result_projection,
|
||||||
query_id);
|
query_id);
|
||||||
}
|
}
|
||||||
else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && query_info.input_order_info)
|
else if ((settings.optimize_read_in_order || settings.optimize_aggregation_in_order) && input_order_info)
|
||||||
{
|
{
|
||||||
size_t prefix_size = query_info.input_order_info->order_key_prefix_descr.size();
|
size_t prefix_size = input_order_info->order_key_prefix_descr.size();
|
||||||
auto order_key_prefix_ast = metadata_snapshot->getSortingKey().expression_list_ast->clone();
|
auto order_key_prefix_ast = metadata_snapshot->getSortingKey().expression_list_ast->clone();
|
||||||
order_key_prefix_ast->children.resize(prefix_size);
|
order_key_prefix_ast->children.resize(prefix_size);
|
||||||
|
|
||||||
@ -1181,7 +1205,8 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
settings,
|
settings,
|
||||||
reader_settings,
|
reader_settings,
|
||||||
result_projection,
|
result_projection,
|
||||||
query_id);
|
query_id,
|
||||||
|
input_order_info);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1203,12 +1228,12 @@ QueryPlanPtr MergeTreeDataSelectExecutor::readFromParts(
|
|||||||
if (!plan)
|
if (!plan)
|
||||||
return std::make_unique<QueryPlan>();
|
return std::make_unique<QueryPlan>();
|
||||||
|
|
||||||
if (use_sampling)
|
if (sampling.use_sampling)
|
||||||
{
|
{
|
||||||
auto sampling_step = std::make_unique<FilterStep>(
|
auto sampling_step = std::make_unique<FilterStep>(
|
||||||
plan->getCurrentDataStream(),
|
plan->getCurrentDataStream(),
|
||||||
filter_expression,
|
sampling.filter_expression,
|
||||||
filter_function->getColumnName(),
|
sampling.filter_function->getColumnName(),
|
||||||
false);
|
false);
|
||||||
|
|
||||||
sampling_step->setStepDescription("Sampling");
|
sampling_step->setStepDescription("Sampling");
|
||||||
@ -1412,11 +1437,10 @@ QueryPlanPtr MergeTreeDataSelectExecutor::spreadMarkRangesAmongStreamsWithOrder(
|
|||||||
const Settings & settings,
|
const Settings & settings,
|
||||||
const MergeTreeReaderSettings & reader_settings,
|
const MergeTreeReaderSettings & reader_settings,
|
||||||
ActionsDAGPtr & out_projection,
|
ActionsDAGPtr & out_projection,
|
||||||
const String & query_id) const
|
const String & query_id,
|
||||||
|
const InputOrderInfoPtr & input_order_info) const
|
||||||
{
|
{
|
||||||
size_t sum_marks = 0;
|
size_t sum_marks = 0;
|
||||||
const InputOrderInfoPtr & input_order_info = query_info.input_order_info;
|
|
||||||
|
|
||||||
size_t adaptive_parts = 0;
|
size_t adaptive_parts = 0;
|
||||||
std::vector<size_t> sum_marks_in_parts(parts.size());
|
std::vector<size_t> sum_marks_in_parts(parts.size());
|
||||||
const auto data_settings = data.getSettings();
|
const auto data_settings = data.getSettings();
|
||||||
|
@ -13,6 +13,22 @@ namespace DB
|
|||||||
|
|
||||||
class KeyCondition;
|
class KeyCondition;
|
||||||
|
|
||||||
|
struct MergeTreeDataSelectSamplingData
|
||||||
|
{
|
||||||
|
bool use_sampling;
|
||||||
|
std::shared_ptr<ASTFunction> filter_function;
|
||||||
|
ActionsDAGPtr filter_expression;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct MergeTreeDataSelectCache
|
||||||
|
{
|
||||||
|
RangesInDataParts parts_with_ranges;
|
||||||
|
MergeTreeDataSelectSamplingData sampling;
|
||||||
|
std::unique_ptr<ReadFromMergeTree::IndexStats> index_stats;
|
||||||
|
size_t sum_marks = 0;
|
||||||
|
size_t sum_ranges = 0;
|
||||||
|
bool use_cache = false;
|
||||||
|
};
|
||||||
|
|
||||||
/** Executes SELECT queries on data from the merge tree.
|
/** Executes SELECT queries on data from the merge tree.
|
||||||
*/
|
*/
|
||||||
@ -36,18 +52,17 @@ public:
|
|||||||
QueryProcessingStage::Enum processed_stage,
|
QueryProcessingStage::Enum processed_stage,
|
||||||
const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const;
|
const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr) const;
|
||||||
|
|
||||||
|
|
||||||
QueryPlanPtr readFromParts(
|
QueryPlanPtr readFromParts(
|
||||||
MergeTreeData::DataPartsVector parts,
|
MergeTreeData::DataPartsVector parts,
|
||||||
const Names & column_names,
|
const Names & column_names,
|
||||||
const StorageMetadataPtr & metadata_snapshot_base,
|
const StorageMetadataPtr & metadata_snapshot_base,
|
||||||
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
const SelectQueryInfo & query_info,
|
const SelectQueryInfo & query_info,
|
||||||
ContextPtr context,
|
ContextPtr context,
|
||||||
UInt64 max_block_size,
|
UInt64 max_block_size,
|
||||||
unsigned num_streams,
|
unsigned num_streams,
|
||||||
const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr,
|
const PartitionIdToMaxBlock * max_block_numbers_to_read = nullptr,
|
||||||
size_t * num_granules_to_read = nullptr,
|
MergeTreeDataSelectCache * cache = nullptr) const;
|
||||||
bool use_projection_metadata = false) const;
|
|
||||||
|
|
||||||
private:
|
private:
|
||||||
const MergeTreeData & data;
|
const MergeTreeData & data;
|
||||||
@ -83,7 +98,8 @@ private:
|
|||||||
const Settings & settings,
|
const Settings & settings,
|
||||||
const MergeTreeReaderSettings & reader_settings,
|
const MergeTreeReaderSettings & reader_settings,
|
||||||
ActionsDAGPtr & out_projection,
|
ActionsDAGPtr & out_projection,
|
||||||
const String & query_id) const;
|
const String & query_id,
|
||||||
|
const InputOrderInfoPtr & input_order_info) const;
|
||||||
|
|
||||||
QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
|
QueryPlanPtr spreadMarkRangesAmongStreamsFinal(
|
||||||
RangesInDataParts && parts,
|
RangesInDataParts && parts,
|
||||||
|
@ -1,228 +0,0 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeDataUtils.h>
|
|
||||||
|
|
||||||
#include <Interpreters/Context.h>
|
|
||||||
#include <Interpreters/InterpreterSelectQuery.h>
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
|
|
||||||
bool getQueryProcessingStageWithAggregateProjection(
|
|
||||||
ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info)
|
|
||||||
{
|
|
||||||
const auto & settings = query_context->getSettingsRef();
|
|
||||||
if (!settings.allow_experimental_projection_optimization || query_info.ignore_projections)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
const auto & query_ptr = query_info.query;
|
|
||||||
|
|
||||||
InterpreterSelectQuery select(
|
|
||||||
query_ptr, query_context, SelectQueryOptions{QueryProcessingStage::WithMergeableState}.ignoreProjections().ignoreAlias());
|
|
||||||
const auto & analysis_result = select.getAnalysisResult();
|
|
||||||
|
|
||||||
bool can_use_aggregate_projection = true;
|
|
||||||
/// If the first stage of the query pipeline is more complex than Aggregating - Expression - Filter - ReadFromStorage,
|
|
||||||
/// we cannot use aggregate projection.
|
|
||||||
if (analysis_result.join != nullptr || analysis_result.array_join != nullptr)
|
|
||||||
can_use_aggregate_projection = false;
|
|
||||||
|
|
||||||
/// Check if all needed columns can be provided by some aggregate projection. Here we also try
|
|
||||||
/// to find expression matches. For example, suppose an aggregate projection contains a column
|
|
||||||
/// named sum(x) and the given query also has an expression called sum(x), it's a match. This is
|
|
||||||
/// why we need to ignore all aliases during projection creation and the above query planning.
|
|
||||||
/// It's also worth noting that, sqrt(sum(x)) will also work because we can treat sum(x) as a
|
|
||||||
/// required column.
|
|
||||||
|
|
||||||
/// The ownership of ProjectionDescription is hold in metadata_snapshot which lives along with
|
|
||||||
/// InterpreterSelect, thus we can store the raw pointer here.
|
|
||||||
std::vector<ProjectionCandidate> candidates;
|
|
||||||
NameSet keys;
|
|
||||||
std::unordered_map<std::string_view, size_t> key_name_pos_map;
|
|
||||||
size_t pos = 0;
|
|
||||||
for (const auto & desc : select.getQueryAnalyzer()->aggregationKeys())
|
|
||||||
{
|
|
||||||
keys.insert(desc.name);
|
|
||||||
key_name_pos_map.insert({desc.name, pos++});
|
|
||||||
}
|
|
||||||
|
|
||||||
// All required columns should be provided by either current projection or previous actions
|
|
||||||
// Let's traverse backward to finish the check.
|
|
||||||
// TODO what if there is a column with name sum(x) and an aggregate sum(x)?
|
|
||||||
auto rewrite_before_where =
|
|
||||||
[&](ProjectionCandidate & candidate, const ProjectionDescription & projection,
|
|
||||||
NameSet & required_columns, const Block & source_block, const Block & aggregates)
|
|
||||||
{
|
|
||||||
if (analysis_result.before_where)
|
|
||||||
{
|
|
||||||
candidate.before_where = analysis_result.before_where->clone();
|
|
||||||
required_columns = candidate.before_where->foldActionsByProjection(
|
|
||||||
required_columns,
|
|
||||||
projection.sample_block_for_keys,
|
|
||||||
query_ptr->as<const ASTSelectQuery &>().where()->getColumnName());
|
|
||||||
if (required_columns.empty())
|
|
||||||
return false;
|
|
||||||
candidate.before_where->addAggregatesViaProjection(aggregates);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (analysis_result.prewhere_info)
|
|
||||||
{
|
|
||||||
auto & prewhere_info = analysis_result.prewhere_info;
|
|
||||||
candidate.prewhere_info = std::make_shared<PrewhereInfo>();
|
|
||||||
candidate.prewhere_info->prewhere_column_name = prewhere_info->prewhere_column_name;
|
|
||||||
candidate.prewhere_info->remove_prewhere_column = prewhere_info->remove_prewhere_column;
|
|
||||||
candidate.prewhere_info->row_level_column_name = prewhere_info->row_level_column_name;
|
|
||||||
candidate.prewhere_info->need_filter = prewhere_info->need_filter;
|
|
||||||
|
|
||||||
auto actions_settings = ExpressionActionsSettings::fromSettings(query_context->getSettingsRef());
|
|
||||||
auto prewhere_actions = prewhere_info->prewhere_actions->clone();
|
|
||||||
NameSet prewhere_required_columns;
|
|
||||||
prewhere_required_columns = prewhere_actions->foldActionsByProjection(
|
|
||||||
prewhere_required_columns, projection.sample_block_for_keys, prewhere_info->prewhere_column_name);
|
|
||||||
if (prewhere_required_columns.empty())
|
|
||||||
return false;
|
|
||||||
candidate.prewhere_info->prewhere_actions = std::make_shared<ExpressionActions>(prewhere_actions, actions_settings);
|
|
||||||
|
|
||||||
if (prewhere_info->row_level_filter_actions)
|
|
||||||
{
|
|
||||||
auto row_level_filter_actions = prewhere_info->row_level_filter_actions->clone();
|
|
||||||
prewhere_required_columns = row_level_filter_actions->foldActionsByProjection(
|
|
||||||
prewhere_required_columns, projection.sample_block_for_keys, prewhere_info->row_level_column_name);
|
|
||||||
if (prewhere_required_columns.empty())
|
|
||||||
return false;
|
|
||||||
candidate.prewhere_info->row_level_filter
|
|
||||||
= std::make_shared<ExpressionActions>(row_level_filter_actions, actions_settings);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (prewhere_info->alias_actions)
|
|
||||||
{
|
|
||||||
auto alias_actions = prewhere_info->alias_actions->clone();
|
|
||||||
prewhere_required_columns
|
|
||||||
= alias_actions->foldActionsByProjection(prewhere_required_columns, projection.sample_block_for_keys);
|
|
||||||
if (prewhere_required_columns.empty())
|
|
||||||
return false;
|
|
||||||
candidate.prewhere_info->alias_actions = std::make_shared<ExpressionActions>(alias_actions, actions_settings);
|
|
||||||
}
|
|
||||||
required_columns.insert(prewhere_required_columns.begin(), prewhere_required_columns.end());
|
|
||||||
}
|
|
||||||
|
|
||||||
bool match = true;
|
|
||||||
for (const auto & column : required_columns)
|
|
||||||
{
|
|
||||||
/// There are still missing columns, fail to match
|
|
||||||
if (!source_block.has(column))
|
|
||||||
{
|
|
||||||
match = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return match;
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto & projection : metadata_snapshot->projections)
|
|
||||||
{
|
|
||||||
ProjectionCandidate candidate{};
|
|
||||||
candidate.desc = &projection;
|
|
||||||
|
|
||||||
if (projection.type == ProjectionDescription::Type::Aggregate && analysis_result.need_aggregate && can_use_aggregate_projection)
|
|
||||||
{
|
|
||||||
bool match = true;
|
|
||||||
Block aggregates;
|
|
||||||
// Let's first check if all aggregates are provided by current projection
|
|
||||||
for (const auto & aggregate : select.getQueryAnalyzer()->aggregates())
|
|
||||||
{
|
|
||||||
const auto * column = projection.sample_block.findByName(aggregate.column_name);
|
|
||||||
if (column)
|
|
||||||
{
|
|
||||||
aggregates.insert(*column);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
match = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!match)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Check if all aggregation keys can be either provided by some action, or by current
|
|
||||||
// projection directly. Reshape the `before_aggregation` action DAG so that it only
|
|
||||||
// needs to provide aggregation keys, and certain children DAG might be substituted by
|
|
||||||
// some keys in projection.
|
|
||||||
candidate.before_aggregation = analysis_result.before_aggregation->clone();
|
|
||||||
auto required_columns = candidate.before_aggregation->foldActionsByProjection(keys, projection.sample_block_for_keys);
|
|
||||||
|
|
||||||
if (required_columns.empty())
|
|
||||||
continue;
|
|
||||||
|
|
||||||
// Reorder aggregation keys and attach aggregates
|
|
||||||
candidate.before_aggregation->reorderAggregationKeysForProjection(key_name_pos_map);
|
|
||||||
candidate.before_aggregation->addAggregatesViaProjection(aggregates);
|
|
||||||
|
|
||||||
if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block_for_keys, aggregates))
|
|
||||||
{
|
|
||||||
candidate.required_columns = {required_columns.begin(), required_columns.end()};
|
|
||||||
for (const auto & aggregate : aggregates)
|
|
||||||
candidate.required_columns.push_back(aggregate.name);
|
|
||||||
candidates.push_back(std::move(candidate));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (projection.type == ProjectionDescription::Type::Normal && (analysis_result.hasWhere() || analysis_result.hasPrewhere()))
|
|
||||||
{
|
|
||||||
NameSet required_columns;
|
|
||||||
if (analysis_result.hasWhere())
|
|
||||||
{
|
|
||||||
for (const auto & column : analysis_result.before_where->getResultColumns())
|
|
||||||
required_columns.insert(column.name);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (const auto & column : analysis_result.prewhere_info->prewhere_actions->getResultColumns())
|
|
||||||
required_columns.insert(column.name);
|
|
||||||
}
|
|
||||||
if (rewrite_before_where(candidate, projection, required_columns, projection.sample_block, {}))
|
|
||||||
{
|
|
||||||
candidate.required_columns = {required_columns.begin(), required_columns.end()};
|
|
||||||
candidates.push_back(std::move(candidate));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Let's select the best aggregate projection to execute the query.
|
|
||||||
if (!candidates.empty())
|
|
||||||
{
|
|
||||||
size_t min_key_size = std::numeric_limits<size_t>::max();
|
|
||||||
ProjectionCandidate * selected_candidate = nullptr;
|
|
||||||
/// Favor aggregate projections
|
|
||||||
for (auto & candidate : candidates)
|
|
||||||
{
|
|
||||||
// TODO We choose the projection with least key_size. Perhaps we can do better? (key rollups)
|
|
||||||
if (candidate.desc->type == ProjectionDescription::Type::Aggregate && candidate.desc->key_size < min_key_size)
|
|
||||||
{
|
|
||||||
selected_candidate = &candidate;
|
|
||||||
min_key_size = candidate.desc->key_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// TODO Select the best normal projection if no aggregate projection is available
|
|
||||||
if (!selected_candidate)
|
|
||||||
{
|
|
||||||
for (auto & candidate : candidates)
|
|
||||||
selected_candidate = &candidate;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!selected_candidate)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (selected_candidate->desc->type == ProjectionDescription::Type::Aggregate)
|
|
||||||
{
|
|
||||||
selected_candidate->aggregation_keys = select.getQueryAnalyzer()->aggregationKeys();
|
|
||||||
selected_candidate->aggregate_descriptions = select.getQueryAnalyzer()->aggregates();
|
|
||||||
}
|
|
||||||
query_info.projection = std::move(*selected_candidate);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
#pragma once
|
|
||||||
|
|
||||||
#include <Interpreters/Context_fwd.h>
|
|
||||||
#include <Storages/SelectQueryInfo.h>
|
|
||||||
#include <Storages/StorageInMemoryMetadata.h>
|
|
||||||
|
|
||||||
namespace DB
|
|
||||||
{
|
|
||||||
|
|
||||||
bool getQueryProcessingStageWithAggregateProjection(
|
|
||||||
ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot, SelectQueryInfo & query_info);
|
|
||||||
|
|
||||||
}
|
|
75
src/Storages/MergeTree/StorageFromBasePartsOfProjection.h
Normal file
75
src/Storages/MergeTree/StorageFromBasePartsOfProjection.h
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <Core/Defines.h>
|
||||||
|
#include <Processors/QueryPipeline.h>
|
||||||
|
#include <Processors/QueryPlan/BuildQueryPipelineSettings.h>
|
||||||
|
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
|
||||||
|
#include <Processors/QueryPlan/QueryPlan.h>
|
||||||
|
#include <Storages/IStorage.h>
|
||||||
|
#include <Storages/MergeTree/IMergeTreeDataPart.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeDataSelectExecutor.h>
|
||||||
|
|
||||||
|
#include <ext/shared_ptr_helper.h>
|
||||||
|
|
||||||
|
|
||||||
|
namespace DB
|
||||||
|
{
|
||||||
|
/// A Storage that allows reading from a single MergeTree data part.
|
||||||
|
class StorageFromBasePartsOfProjection final : public ext::shared_ptr_helper<StorageFromBasePartsOfProjection>, public IStorage
|
||||||
|
{
|
||||||
|
friend struct ext::shared_ptr_helper<StorageFromBasePartsOfProjection>;
|
||||||
|
|
||||||
|
public:
|
||||||
|
String getName() const override { return "FromBasePartsOfProjection"; }
|
||||||
|
|
||||||
|
Pipe read(
|
||||||
|
const Names & column_names,
|
||||||
|
const StorageMetadataPtr & metadata_snapshot,
|
||||||
|
SelectQueryInfo & query_info,
|
||||||
|
ContextPtr context,
|
||||||
|
QueryProcessingStage::Enum /*processed_stage*/,
|
||||||
|
size_t max_block_size,
|
||||||
|
unsigned num_streams) override
|
||||||
|
{
|
||||||
|
// NOTE: It's used to read normal parts only
|
||||||
|
QueryPlan query_plan = std::move(*MergeTreeDataSelectExecutor(storage).readFromParts(
|
||||||
|
{},
|
||||||
|
column_names,
|
||||||
|
metadata_snapshot,
|
||||||
|
metadata_snapshot,
|
||||||
|
query_info,
|
||||||
|
context,
|
||||||
|
max_block_size,
|
||||||
|
num_streams,
|
||||||
|
nullptr,
|
||||||
|
query_info.projection ? query_info.projection->merge_tree_data_select_base_cache.get()
|
||||||
|
: query_info.merge_tree_data_select_cache.get()));
|
||||||
|
|
||||||
|
return query_plan.convertToPipe(
|
||||||
|
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool supportsIndexForIn() const override { return true; }
|
||||||
|
|
||||||
|
bool mayBenefitFromIndexForIn(
|
||||||
|
const ASTPtr & left_in_operand, ContextPtr query_context, const StorageMetadataPtr & metadata_snapshot) const override
|
||||||
|
{
|
||||||
|
return storage.mayBenefitFromIndexForIn(left_in_operand, query_context, metadata_snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
NamesAndTypesList getVirtuals() const override { return storage.getVirtuals(); }
|
||||||
|
|
||||||
|
protected:
|
||||||
|
StorageFromBasePartsOfProjection(const MergeTreeData & storage_, const StorageMetadataPtr & metadata_snapshot)
|
||||||
|
: IStorage(storage_.getStorageID()), storage(storage_)
|
||||||
|
{
|
||||||
|
setInMemoryMetadata(*metadata_snapshot);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private:
|
||||||
|
const MergeTreeData & storage;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
@ -31,19 +31,23 @@ public:
|
|||||||
size_t max_block_size,
|
size_t max_block_size,
|
||||||
unsigned num_streams) override
|
unsigned num_streams) override
|
||||||
{
|
{
|
||||||
|
// NOTE: It's used to read normal parts only
|
||||||
QueryPlan query_plan = std::move(*MergeTreeDataSelectExecutor(parts.front()->storage)
|
QueryPlan query_plan = std::move(*MergeTreeDataSelectExecutor(parts.front()->storage)
|
||||||
.readFromParts(
|
.readFromParts(
|
||||||
parts,
|
parts,
|
||||||
column_names,
|
column_names,
|
||||||
metadata_snapshot,
|
metadata_snapshot,
|
||||||
|
metadata_snapshot,
|
||||||
query_info,
|
query_info,
|
||||||
context,
|
context,
|
||||||
max_block_size,
|
max_block_size,
|
||||||
num_streams,
|
num_streams,
|
||||||
nullptr,
|
nullptr,
|
||||||
&num_granules_from_last_read));
|
query_info.projection ? query_info.projection->merge_tree_data_select_base_cache.get()
|
||||||
|
: query_info.merge_tree_data_select_cache.get()));
|
||||||
|
|
||||||
return query_plan.convertToPipe(QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
|
return query_plan.convertToPipe(
|
||||||
|
QueryPlanOptimizationSettings::fromContext(context), BuildQueryPipelineSettings::fromContext(context));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -70,8 +74,6 @@ public:
|
|||||||
return parts.front()->storage.getPartitionIDFromQuery(ast, context);
|
return parts.front()->storage.getPartitionIDFromQuery(ast, context);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t getNumGranulesFromLastRead() const { return num_granules_from_last_read; }
|
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_)
|
StorageFromMergeTreeDataPart(const MergeTreeData::DataPartPtr & part_)
|
||||||
: IStorage(getIDFromPart(part_))
|
: IStorage(getIDFromPart(part_))
|
||||||
@ -90,8 +92,6 @@ protected:
|
|||||||
private:
|
private:
|
||||||
MergeTreeData::DataPartsVector parts;
|
MergeTreeData::DataPartsVector parts;
|
||||||
|
|
||||||
size_t num_granules_from_last_read = 0;
|
|
||||||
|
|
||||||
static StorageID getIDFromPart(const MergeTreeData::DataPartPtr & part_)
|
static StorageID getIDFromPart(const MergeTreeData::DataPartPtr & part_)
|
||||||
{
|
{
|
||||||
auto table_id = part_->storage.getStorageID();
|
auto table_id = part_->storage.getStorageID();
|
||||||
|
@ -116,6 +116,10 @@ struct InputOrderInfo
|
|||||||
|
|
||||||
class IMergeTreeDataPart;
|
class IMergeTreeDataPart;
|
||||||
|
|
||||||
|
using ManyExpressionActions = std::vector<ExpressionActionsPtr>;
|
||||||
|
|
||||||
|
struct MergeTreeDataSelectCache;
|
||||||
|
|
||||||
// The projection selected to execute current query
|
// The projection selected to execute current query
|
||||||
struct ProjectionCandidate
|
struct ProjectionCandidate
|
||||||
{
|
{
|
||||||
@ -126,6 +130,12 @@ struct ProjectionCandidate
|
|||||||
Names required_columns;
|
Names required_columns;
|
||||||
NamesAndTypesList aggregation_keys;
|
NamesAndTypesList aggregation_keys;
|
||||||
AggregateDescriptions aggregate_descriptions;
|
AggregateDescriptions aggregate_descriptions;
|
||||||
|
bool complete = false;
|
||||||
|
ReadInOrderOptimizerPtr order_optimizer;
|
||||||
|
InputOrderInfoPtr input_order_info;
|
||||||
|
ManyExpressionActions group_by_elements_actions;
|
||||||
|
std::shared_ptr<MergeTreeDataSelectCache> merge_tree_data_select_base_cache;
|
||||||
|
std::shared_ptr<MergeTreeDataSelectCache> merge_tree_data_select_projection_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Query along with some additional data,
|
/** Query along with some additional data,
|
||||||
@ -159,9 +169,12 @@ struct SelectQueryInfo
|
|||||||
|
|
||||||
ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }
|
ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }
|
||||||
|
|
||||||
|
Names required_columns;
|
||||||
|
|
||||||
/// If not null, it means we choose a projection to execute current query.
|
/// If not null, it means we choose a projection to execute current query.
|
||||||
std::optional<ProjectionCandidate> projection;
|
std::optional<ProjectionCandidate> projection;
|
||||||
bool ignore_projections = false;
|
bool ignore_projections = false;
|
||||||
|
std::shared_ptr<MergeTreeDataSelectCache> merge_tree_data_select_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user