ClickHouse/src/Interpreters/InterpreterSelectWithUnionQuery.cpp

Ignoring revisions in .git-blame-ignore-revs. Click here to bypass and see the normal blame view.

424 lines
18 KiB
C++
Raw Normal View History

2022-08-13 13:03:16 +00:00
#include <Access/AccessControl.h>
2020-11-01 14:57:41 +00:00
#include <Columns/getLeastSuperColumn.h>
#include <Interpreters/Context.h>
2023-03-17 16:27:24 +00:00
#include <Interpreters/InterpreterSelectIntersectExceptQuery.h>
#include <Interpreters/InterpreterFactory.h>
2020-11-01 14:57:41 +00:00
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/InterpreterSelectWithUnionQuery.h>
2022-08-13 13:03:16 +00:00
#include <Interpreters/QueryLog.h>
2023-03-17 16:27:24 +00:00
#include <Interpreters/evaluateConstantExpression.h>
#include <Parsers/ASTSelectIntersectExceptQuery.h>
2018-02-27 20:16:58 +00:00
#include <Parsers/ASTSelectQuery.h>
2020-11-01 14:57:41 +00:00
#include <Parsers/ASTSelectWithUnionQuery.h>
#include <Parsers/queryToString.h>
2020-11-01 14:57:41 +00:00
#include <Processors/QueryPlan/DistinctStep.h>
2021-03-25 09:57:14 +00:00
#include <Processors/QueryPlan/ExpressionStep.h>
#include <Processors/QueryPlan/IQueryPlanStep.h>
#include <Processors/QueryPlan/LimitStep.h>
#include <Processors/QueryPlan/OffsetStep.h>
2021-03-04 17:38:12 +00:00
#include <Processors/QueryPlan/Optimizations/QueryPlanOptimizationSettings.h>
2023-03-17 16:27:24 +00:00
#include <Processors/QueryPlan/QueryPlan.h>
#include <Processors/QueryPlan/UnionStep.h>
2022-05-20 19:49:31 +00:00
#include <QueryPipeline/QueryPipelineBuilder.h>
2020-11-01 14:57:41 +00:00
#include <Common/typeid_cast.h>
2019-03-26 18:28:37 +00:00
2020-11-09 15:44:11 +00:00
#include <Interpreters/InDepthNodeVisitor.h>
#include <algorithm>
2022-08-13 13:03:16 +00:00
2018-02-25 00:50:53 +00:00
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
2018-02-26 06:12:59 +00:00
extern const int UNION_ALL_RESULT_STRUCTURES_MISMATCH;
2018-02-25 00:50:53 +00:00
}
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
const ASTPtr & query_ptr_, ContextPtr context_, const SelectQueryOptions & options_, const Names & required_result_column_names)
: InterpreterSelectWithUnionQuery(query_ptr_, Context::createCopy(context_), options_, required_result_column_names)
{
}
InterpreterSelectWithUnionQuery::InterpreterSelectWithUnionQuery(
const ASTPtr & query_ptr_, ContextMutablePtr context_, const SelectQueryOptions & options_, const Names & required_result_column_names)
2020-11-01 13:54:07 +00:00
: IInterpreterUnionOrSelectQuery(query_ptr_, context_, options_)
2018-02-25 00:50:53 +00:00
{
ASTSelectWithUnionQuery * ast = query_ptr->as<ASTSelectWithUnionQuery>();
2021-08-16 12:03:55 +00:00
bool require_full_header = ast->hasNonDefaultUnionMode();
2020-11-09 15:44:11 +00:00
const Settings & settings = context->getSettingsRef();
if (options.subquery_depth == 0 && (settings.limit > 0 || settings.offset > 0))
settings_limit_offset_needed = true;
size_t num_children = ast->list_of_selects->children.size();
2020-11-01 13:54:07 +00:00
if (!num_children)
throw Exception(ErrorCodes::LOGICAL_ERROR, "No children in ASTSelectWithUnionQuery");
2018-02-27 20:16:58 +00:00
/// Note that we pass 'required_result_column_names' to first SELECT.
2018-02-27 20:43:42 +00:00
/// And for the rest, we pass names at the corresponding positions of 'required_result_column_names' in the result of first SELECT,
2018-02-27 20:16:58 +00:00
/// because names could be different.
2018-02-25 00:50:53 +00:00
2020-11-01 13:54:07 +00:00
nested_interpreters.reserve(num_children);
std::vector<Names> required_result_column_names_for_other_selects(num_children);
2021-08-16 12:03:55 +00:00
if (!require_full_header && !required_result_column_names.empty() && num_children > 1)
2018-02-27 20:16:58 +00:00
{
2018-02-27 20:43:42 +00:00
/// Result header if there are no filtering by 'required_result_column_names'.
/// We use it to determine positions of 'required_result_column_names' in SELECT clause.
Block full_result_header = getCurrentChildResultHeader(ast->list_of_selects->children.at(0), required_result_column_names);
2018-02-27 20:16:58 +00:00
2018-02-27 20:43:42 +00:00
std::vector<size_t> positions_of_required_result_columns(required_result_column_names.size());
2020-11-01 13:54:07 +00:00
2018-02-27 20:16:58 +00:00
for (size_t required_result_num = 0, size = required_result_column_names.size(); required_result_num < size; ++required_result_num)
2018-02-27 20:43:42 +00:00
positions_of_required_result_columns[required_result_num] = full_result_header.getPositionByName(required_result_column_names[required_result_num]);
2020-11-01 13:54:07 +00:00
for (size_t query_num = 1; query_num < num_children; ++query_num)
2018-02-27 20:16:58 +00:00
{
2020-10-28 01:29:09 +00:00
Block full_result_header_for_current_select
= getCurrentChildResultHeader(ast->list_of_selects->children.at(query_num), required_result_column_names);
2018-02-27 20:43:42 +00:00
if (full_result_header_for_current_select.columns() != full_result_header.columns())
throw Exception(ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH,
"Different number of columns in UNION ALL elements:\n{}\nand\n{}\n",
full_result_header.dumpNames(), full_result_header_for_current_select.dumpNames());
2018-02-27 20:43:42 +00:00
required_result_column_names_for_other_selects[query_num].reserve(required_result_column_names.size());
for (const auto & pos : positions_of_required_result_columns)
required_result_column_names_for_other_selects[query_num].push_back(full_result_header_for_current_select.getByPosition(pos).name);
2018-02-27 20:16:58 +00:00
}
}
if (num_children == 1 && settings_limit_offset_needed && !options.settings_limit_offset_done)
{
const ASTPtr first_select_ast = ast->list_of_selects->children.at(0);
2021-11-01 13:19:31 +00:00
ASTSelectQuery * select_query = dynamic_cast<ASTSelectQuery *>(first_select_ast.get());
if (!select_query)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Invalid type in list_of_selects: {}", first_select_ast->getID());
if (!select_query->withFill() && !select_query->limit_with_ties)
{
UInt64 limit_length = 0;
UInt64 limit_offset = 0;
const ASTPtr limit_offset_ast = select_query->limitOffset();
if (limit_offset_ast)
{
2023-03-17 16:27:24 +00:00
limit_offset = evaluateConstantExpressionAsLiteral(limit_offset_ast, context)->as<ASTLiteral &>().value.safeGet<UInt64>();
UInt64 new_limit_offset = settings.offset + limit_offset;
2023-03-17 16:27:24 +00:00
ASTPtr new_limit_offset_ast = std::make_shared<ASTLiteral>(new_limit_offset);
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, std::move(new_limit_offset_ast));
}
else if (settings.offset)
{
2023-03-17 16:27:24 +00:00
ASTPtr new_limit_offset_ast = std::make_shared<ASTLiteral>(settings.offset.value);
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_OFFSET, std::move(new_limit_offset_ast));
}
const ASTPtr limit_length_ast = select_query->limitLength();
if (limit_length_ast)
{
2023-03-17 16:27:24 +00:00
limit_length = evaluateConstantExpressionAsLiteral(limit_length_ast, context)->as<ASTLiteral &>().value.safeGet<UInt64>();
UInt64 new_limit_length = 0;
if (settings.offset == 0)
2023-03-17 16:27:24 +00:00
new_limit_length = std::min(limit_length, settings.limit.value);
else if (settings.offset < limit_length)
2023-03-17 16:27:24 +00:00
new_limit_length = settings.limit ? std::min(settings.limit.value, limit_length - settings.offset.value)
: (limit_length - settings.offset.value);
2023-03-17 16:27:24 +00:00
ASTPtr new_limit_length_ast = std::make_shared<ASTLiteral>(new_limit_length);
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(new_limit_length_ast));
}
else if (settings.limit)
{
2023-03-17 16:27:24 +00:00
ASTPtr new_limit_length_ast = std::make_shared<ASTLiteral>(settings.limit.value);
select_query->setExpression(ASTSelectQuery::Expression::LIMIT_LENGTH, std::move(new_limit_length_ast));
}
options.settings_limit_offset_done = true;
}
}
2020-11-01 13:54:07 +00:00
for (size_t query_num = 0; query_num < num_children; ++query_num)
2018-02-27 20:16:58 +00:00
{
const Names & current_required_result_column_names
= query_num == 0 ? required_result_column_names : required_result_column_names_for_other_selects[query_num];
2018-02-27 20:16:58 +00:00
2020-11-01 13:54:07 +00:00
nested_interpreters.emplace_back(
2021-08-16 12:03:55 +00:00
buildCurrentChildInterpreter(ast->list_of_selects->children.at(query_num), require_full_header ? Names() : current_required_result_column_names));
// We need to propagate the uses_view_source flag from children to the (self) parent since, if one of the children uses
// a view source that means that the parent uses it too and can be cached globally
uses_view_source |= nested_interpreters.back()->usesViewSource();
2018-02-27 20:16:58 +00:00
}
/// Determine structure of the result.
2018-02-26 06:12:59 +00:00
2020-11-01 13:54:07 +00:00
if (num_children == 1)
2018-02-26 06:12:59 +00:00
{
2020-11-01 13:54:07 +00:00
result_header = nested_interpreters.front()->getSampleBlock();
2018-02-26 06:12:59 +00:00
}
else
{
2020-11-01 13:54:07 +00:00
Blocks headers(num_children);
for (size_t query_num = 0; query_num < num_children; ++query_num)
{
2020-11-01 13:54:07 +00:00
headers[query_num] = nested_interpreters[query_num]->getSampleBlock();
/// Here we check that, in case if required_result_column_names were specified,
/// nested interpreter returns exactly it. Except if query requires full header.
2023-01-05 13:20:47 +00:00
/// The code aboew is written in a way that for 0th query required_result_column_names_for_other_selects[0]
/// is an empty list, and we should use required_result_column_names instead.
const auto & current_required_result_column_names = (query_num == 0 && !require_full_header)
? required_result_column_names
: required_result_column_names_for_other_selects[query_num];
if (!current_required_result_column_names.empty())
{
const auto & header_columns = headers[query_num].getNames();
if (current_required_result_column_names != header_columns)
{
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Different order of columns in UNION subquery: {} and {}",
fmt::join(current_required_result_column_names, ", "),
fmt::join(header_columns, ", "));
}
}
}
2018-02-26 06:12:59 +00:00
2019-04-09 13:07:07 +00:00
result_header = getCommonHeaderForUnion(headers);
}
/// InterpreterSelectWithUnionQuery ignores limits if all nested interpreters ignore limits.
bool all_nested_ignore_limits = true;
bool all_nested_ignore_quota = true;
2020-11-01 13:54:07 +00:00
for (auto & interpreter : nested_interpreters)
{
if (!interpreter->ignoreLimits())
all_nested_ignore_limits = false;
if (!interpreter->ignoreQuota())
all_nested_ignore_quota = false;
}
options.ignore_limits |= all_nested_ignore_limits;
options.ignore_quota |= all_nested_ignore_quota;
2020-10-28 01:29:09 +00:00
}
2018-02-26 06:12:59 +00:00
2019-04-09 13:07:07 +00:00
Block InterpreterSelectWithUnionQuery::getCommonHeaderForUnion(const Blocks & headers)
{
size_t num_selects = headers.size();
Block common_header = headers.front();
size_t num_columns = common_header.columns();
for (size_t query_num = 1; query_num < num_selects; ++query_num)
2019-06-25 17:00:54 +00:00
{
2019-04-09 13:07:07 +00:00
if (headers[query_num].columns() != num_columns)
throw Exception(ErrorCodes::UNION_ALL_RESULT_STRUCTURES_MISMATCH,
"Different number of columns in UNION ALL elements:\n{}\nand\n{}\n",
common_header.dumpNames(), headers[query_num].dumpNames());
}
2019-04-09 13:07:07 +00:00
std::vector<const ColumnWithTypeAndName *> columns(num_selects);
2019-04-09 13:07:07 +00:00
for (size_t column_num = 0; column_num < num_columns; ++column_num)
{
for (size_t i = 0; i < num_selects; ++i)
columns[i] = &headers[i].getByPosition(column_num);
ColumnWithTypeAndName & result_elem = common_header.getByPosition(column_num);
result_elem = getLeastSuperColumn(columns);
2018-02-26 06:12:59 +00:00
}
2019-04-09 13:07:07 +00:00
return common_header;
2018-02-26 06:12:59 +00:00
}
2020-11-01 13:54:07 +00:00
Block InterpreterSelectWithUnionQuery::getCurrentChildResultHeader(const ASTPtr & ast_ptr_, const Names & required_result_column_names)
{
2020-11-02 08:02:35 +00:00
if (ast_ptr_->as<ASTSelectWithUnionQuery>())
return InterpreterSelectWithUnionQuery(ast_ptr_, context, options.copy().analyze().noModify(), required_result_column_names)
2020-11-01 13:54:07 +00:00
.getSampleBlock();
2022-03-07 13:51:34 +00:00
else if (ast_ptr_->as<ASTSelectQuery>())
return InterpreterSelectQuery(ast_ptr_, context, options.copy().analyze().noModify()).getSampleBlock();
2022-03-07 13:51:34 +00:00
else
return InterpreterSelectIntersectExceptQuery(ast_ptr_, context, options.copy().analyze().noModify()).getSampleBlock();
2020-11-01 13:54:07 +00:00
}
2018-02-26 06:12:59 +00:00
2020-11-01 13:54:07 +00:00
std::unique_ptr<IInterpreterUnionOrSelectQuery>
InterpreterSelectWithUnionQuery::buildCurrentChildInterpreter(const ASTPtr & ast_ptr_, const Names & current_required_result_column_names)
2018-02-25 00:50:53 +00:00
{
2020-11-02 08:02:35 +00:00
if (ast_ptr_->as<ASTSelectWithUnionQuery>())
return std::make_unique<InterpreterSelectWithUnionQuery>(ast_ptr_, context, options, current_required_result_column_names);
2021-08-12 11:42:51 +00:00
else if (ast_ptr_->as<ASTSelectQuery>())
return std::make_unique<InterpreterSelectQuery>(ast_ptr_, context, options, current_required_result_column_names);
2021-08-12 11:42:51 +00:00
else
return std::make_unique<InterpreterSelectIntersectExceptQuery>(ast_ptr_, context, options);
2018-02-25 00:50:53 +00:00
}
2020-11-01 13:54:07 +00:00
InterpreterSelectWithUnionQuery::~InterpreterSelectWithUnionQuery() = default;
Block InterpreterSelectWithUnionQuery::getSampleBlock(const ASTPtr & query_ptr_, ContextPtr context_, bool is_subquery, bool is_create_parameterized_view)
2018-02-25 00:50:53 +00:00
{
if (!context_->hasQueryContext())
{
SelectQueryOptions options;
if (is_subquery)
options = options.subquery();
if (is_create_parameterized_view)
options = options.createParameterizedView();
return InterpreterSelectWithUnionQuery(query_ptr_, context_, std::move(options.analyze())).getSampleBlock();
}
auto & cache = context_->getSampleBlockCache();
/// Using query string because query_ptr changes for every internal SELECT
2020-11-01 13:54:07 +00:00
auto key = queryToString(query_ptr_);
if (cache.find(key) != cache.end())
{
return cache[key];
}
SelectQueryOptions options;
2021-02-14 06:20:23 +00:00
if (is_subquery)
options = options.subquery();
if (is_create_parameterized_view)
options = options.createParameterizedView();
return cache[key] = InterpreterSelectWithUnionQuery(query_ptr_, context_, std::move(options.analyze())).getSampleBlock();
2020-11-01 13:54:07 +00:00
}
2018-02-25 00:50:53 +00:00
void InterpreterSelectWithUnionQuery::buildQueryPlan(QueryPlan & query_plan)
2018-02-25 00:50:53 +00:00
{
2020-11-01 13:54:07 +00:00
size_t num_plans = nested_interpreters.size();
const Settings & settings = context->getSettingsRef();
2020-11-01 13:54:07 +00:00
2022-05-31 14:43:38 +00:00
auto local_limits = getStorageLimits(*context, options);
storage_limits.emplace_back(local_limits);
for (auto & interpreter : nested_interpreters)
interpreter->addStorageLimits(storage_limits);
2020-11-01 13:54:07 +00:00
/// Skip union for single interpreter.
if (num_plans == 1)
{
nested_interpreters.front()->buildQueryPlan(query_plan);
}
else
{
std::vector<std::unique_ptr<QueryPlan>> plans(num_plans);
DataStreams data_streams(num_plans);
2020-11-01 13:54:07 +00:00
for (size_t i = 0; i < num_plans; ++i)
{
plans[i] = std::make_unique<QueryPlan>();
nested_interpreters[i]->buildQueryPlan(*plans[i]);
2021-03-25 09:57:14 +00:00
if (!blocksHaveEqualStructure(plans[i]->getCurrentDataStream().header, result_header))
{
auto actions_dag = ActionsDAG::makeConvertingActions(
plans[i]->getCurrentDataStream().header.getColumnsWithTypeAndName(),
result_header.getColumnsWithTypeAndName(),
ActionsDAG::MatchColumnsMode::Position);
auto converting_step = std::make_unique<ExpressionStep>(plans[i]->getCurrentDataStream(), std::move(actions_dag));
converting_step->setStepDescription("Conversion before UNION");
plans[i]->addStep(std::move(converting_step));
}
data_streams[i] = plans[i]->getCurrentDataStream();
}
2020-11-01 13:54:07 +00:00
2022-08-30 10:09:01 +00:00
auto max_threads = settings.max_threads;
2021-03-25 09:57:14 +00:00
auto union_step = std::make_unique<UnionStep>(std::move(data_streams), max_threads);
2020-11-01 13:54:07 +00:00
query_plan.unitePlans(std::move(union_step), std::move(plans));
2020-11-01 13:54:07 +00:00
const auto & query = query_ptr->as<ASTSelectWithUnionQuery &>();
2022-08-30 10:09:01 +00:00
if (query.union_mode == SelectUnionMode::UNION_DISTINCT)
{
/// Add distinct transform
SizeLimits limits(settings.max_rows_in_distinct, settings.max_bytes_in_distinct, settings.distinct_overflow_mode);
2020-11-01 13:54:07 +00:00
auto distinct_step = std::make_unique<DistinctStep>(
query_plan.getCurrentDataStream(),
limits,
0,
result_header.getNames(),
false,
settings.optimize_distinct_in_order);
2020-11-01 13:54:07 +00:00
query_plan.addStep(std::move(distinct_step));
}
}
2020-11-01 13:54:07 +00:00
if (settings_limit_offset_needed && !options.settings_limit_offset_done)
{
if (settings.limit > 0)
{
2021-06-16 12:59:02 +00:00
auto limit = std::make_unique<LimitStep>(query_plan.getCurrentDataStream(), settings.limit, settings.offset, settings.exact_rows_before_limit);
limit->setStepDescription("LIMIT OFFSET for SETTINGS");
query_plan.addStep(std::move(limit));
}
else
{
auto offset = std::make_unique<OffsetStep>(query_plan.getCurrentDataStream(), settings.offset);
offset->setStepDescription("OFFSET for SETTINGS");
query_plan.addStep(std::move(offset));
}
2020-11-01 13:54:07 +00:00
}
2020-11-09 15:44:11 +00:00
2022-07-06 12:37:37 +00:00
addAdditionalPostFilter(query_plan);
2022-05-20 19:49:31 +00:00
query_plan.addInterpreterContext(context);
}
BlockIO InterpreterSelectWithUnionQuery::execute()
{
BlockIO res;
QueryPlan query_plan;
buildQueryPlan(query_plan);
2022-05-23 13:46:57 +00:00
auto builder = query_plan.buildQueryPipeline(
QueryPlanOptimizationSettings::fromContext(context),
BuildQueryPipelineSettings::fromContext(context));
2019-03-26 18:28:37 +00:00
2022-05-24 20:06:08 +00:00
res.pipeline = QueryPipelineBuilder::getPipeline(std::move(*builder));
2022-06-02 10:34:40 +00:00
setQuota(res.pipeline);
2020-05-28 08:24:59 +00:00
return res;
2019-03-26 18:28:37 +00:00
}
2018-02-25 06:34:20 +00:00
void InterpreterSelectWithUnionQuery::ignoreWithTotals()
{
2020-11-01 13:54:07 +00:00
for (auto & interpreter : nested_interpreters)
interpreter->ignoreWithTotals();
2018-02-25 06:34:20 +00:00
}
2023-02-01 02:11:54 +00:00
void InterpreterSelectWithUnionQuery::extendQueryLogElemImpl(QueryLogElement & elem, const ASTPtr & /*ast*/, ContextPtr /*context_*/) const
{
2022-11-08 07:55:39 +00:00
for (const auto & interpreter : nested_interpreters)
2022-08-13 13:03:16 +00:00
{
2022-11-08 07:55:39 +00:00
if (const auto * select_interpreter = dynamic_cast<const InterpreterSelectQuery *>(interpreter.get()))
2022-08-13 13:03:16 +00:00
{
auto filter = select_interpreter->getRowPolicyFilter();
if (filter)
2022-08-13 13:03:16 +00:00
{
for (const auto & row_policy : filter->policies)
{
auto name = row_policy->getFullName().toString();
elem.used_row_policies.emplace(std::move(name));
}
2022-08-13 13:03:16 +00:00
}
}
}
}
void registerInterpreterSelectWithUnionQuery(InterpreterFactory & factory)
{
auto create_fn = [] (const InterpreterFactory::Arguments & args)
{
return std::make_unique<InterpreterSelectWithUnionQuery>(args.query, args.context, args.options);
};
factory.registerInterpreter("InterpreterSelectWithUnionQuery", create_fn);
}
2018-02-25 00:50:53 +00:00
}