Fix ORDER BY that matches projections ORDER BY

In case of projection is seleted and it is comlete (all parts has such
projection) and ORDER BY matches projection ORDER BY, and
optimize_read_in_order=1 (default), then the sorting is simply not done,
because projections has separate plan, and so
InterpreterSelectQuery::executeOrder() -> SortingStep has
pipeline.getNumStreams() == 0 and it cannot do sorting.

Fix this, by adding sorting when creating plan for reading from
projections.

Signed-off-by: Azat Khuzhin <a.khuzhin@semrush.com>
This commit is contained in:
Azat Khuzhin 2022-07-01 16:36:20 +03:00
parent 04944ff6f5
commit 22d8e532ed
11 changed files with 154 additions and 12 deletions

View File

@ -748,13 +748,20 @@ BlockIO InterpreterSelectQuery::execute()
Block InterpreterSelectQuery::getSampleBlockImpl()
{
auto & select_query = getSelectQuery();
query_info.query = query_ptr;
/// NOTE: this is required for getQueryProcessingStage(), so should be initialized before ExpressionAnalysisResult.
query_info.has_window = query_analyzer->hasWindow();
/// NOTE: this is required only for IStorage::read(), and to be precise MergeTreeData::read(), in case of projections.
query_info.has_order_by = select_query.orderBy() != nullptr;
query_info.need_aggregate = query_analyzer->hasAggregation();
if (storage && !options.only_analyze)
{
auto & query = getSelectQuery();
query_analyzer->makeSetsForIndex(query.where());
query_analyzer->makeSetsForIndex(query.prewhere());
query_analyzer->makeSetsForIndex(select_query.where());
query_analyzer->makeSetsForIndex(select_query.prewhere());
query_info.sets = std::move(query_analyzer->getPreparedSets());
query_info.subquery_for_sets = std::move(query_analyzer->getSubqueriesForSets());

View File

@ -8,13 +8,16 @@
#include <Storages/MergeTree/MergeTreeIndexReader.h>
#include <Storages/MergeTree/KeyCondition.h>
#include <Storages/MergeTree/MergeTreeDataPartUUID.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <Storages/ReadInOrderOptimizer.h>
#include <Storages/VirtualColumnUtils.h>
#include <Parsers/ASTIdentifier.h>
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSampleRatio.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Interpreters/Context.h>
#include <Processors/ConcatProcessor.h>
#include <Processors/QueryPlan/QueryPlan.h>
@ -26,7 +29,9 @@
#include <Processors/QueryPlan/UnionStep.h>
#include <Processors/QueryPlan/QueryIdHolder.h>
#include <Processors/QueryPlan/AggregatingStep.h>
#include <Processors/QueryPlan/SortingStep.h>
#include <Processors/Sources/SourceFromSingleChunk.h>
#include <Processors/Transforms/AggregatingTransform.h>
#include <Core/UUID.h>
#include <DataTypes/DataTypeDate.h>
@ -35,12 +40,7 @@
#include <DataTypes/DataTypeTuple.h>
#include <DataTypes/DataTypesNumber.h>
#include <DataTypes/DataTypeArray.h>
#include <Storages/VirtualColumnUtils.h>
#include <Interpreters/InterpreterSelectQuery.h>
#include <Processors/Transforms/AggregatingTransform.h>
#include <Storages/MergeTree/StorageFromMergeTreeDataPart.h>
#include <IO/WriteBufferFromOStream.h>
namespace DB
@ -184,6 +184,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
query_info.projection->desc->type,
query_info.projection->desc->name);
const ASTSelectQuery & select_query = query_info.query->as<ASTSelectQuery &>();
QueryPlanResourceHolder resources;
auto projection_plan = std::make_unique<QueryPlan>();
@ -230,6 +231,25 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
expression_before_aggregation->setStepDescription("Before GROUP BY");
projection_plan->addStep(std::move(expression_before_aggregation));
}
/// NOTE: input_order_info (for projection and not) is set only if projection is complete
if (query_info.has_order_by && !query_info.need_aggregate && query_info.projection->input_order_info)
{
chassert(query_info.projection->complete);
SortDescription output_order_descr = InterpreterSelectQuery::getSortDescription(select_query, context);
UInt64 limit = InterpreterSelectQuery::getLimitForSorting(select_query, context);
auto sorting_step = std::make_unique<SortingStep>(
projection_plan->getCurrentDataStream(),
query_info.projection->input_order_info->order_key_prefix_descr,
output_order_descr,
settings.max_block_size,
limit);
sorting_step->setStepDescription("ORDER BY for projections");
projection_plan->addStep(std::move(sorting_step));
}
}
auto ordinary_query_plan = std::make_unique<QueryPlan>();
@ -365,7 +385,7 @@ QueryPlanPtr MergeTreeDataSelectExecutor::read(
InputOrderInfoPtr group_by_info = query_info.projection->input_order_info;
SortDescription group_by_sort_description;
if (group_by_info && settings.optimize_aggregation_in_order)
group_by_sort_description = getSortDescriptionFromGroupBy(query_info.query->as<ASTSelectQuery &>());
group_by_sort_description = getSortDescriptionFromGroupBy(select_query);
else
group_by_info = nullptr;

View File

@ -154,8 +154,6 @@ struct SelectQueryInfoBase
TreeRewriterResultPtr syntax_analyzer_result;
PrewhereInfoPtr prewhere_info;
/// This is an additional filer applied to current table.
/// It is needed only for additional PK filtering.
ASTPtr additional_filter_ast;
@ -168,8 +166,11 @@ struct SelectQueryInfoBase
/// Example: x IN (1, 2, 3)
PreparedSets sets;
/// Cached value of ExpressionAnalysisResult::has_window
/// Cached value of ExpressionAnalysisResult
bool has_window = false;
bool has_order_by = false;
bool need_aggregate = false;
PrewhereInfoPtr prewhere_info;
ClusterPtr getCluster() const { return !optimized_cluster ? cluster : optimized_cluster; }

View File

@ -0,0 +1,13 @@
-- { echoOn }
select t from data_order_by_proj_comp where t > 0 order by t settings optimize_read_in_order=1;
5
5
6
select t from data_order_by_proj_comp where t > 0 order by t settings optimize_read_in_order=0;
5
5
6
select t from data_order_by_proj_comp where t > 0 order by t settings max_threads=1;
5
5
6

View File

@ -0,0 +1,14 @@
drop table if exists data_order_by_proj_comp;
create table data_order_by_proj_comp (t UInt64, projection tSort (select * order by t)) ENGINE MergeTree() order by t;
system stop merges data_order_by_proj_comp;
insert into data_order_by_proj_comp values (5);
insert into data_order_by_proj_comp values (5);
insert into data_order_by_proj_comp values (6);
-- { echoOn }
select t from data_order_by_proj_comp where t > 0 order by t settings optimize_read_in_order=1;
select t from data_order_by_proj_comp where t > 0 order by t settings optimize_read_in_order=0;
select t from data_order_by_proj_comp where t > 0 order by t settings max_threads=1;
-- { echoOff }

View File

@ -0,0 +1,13 @@
-- { echoOn }
select t from data_order_by_proj_incomp where t > 0 order by t settings optimize_read_in_order=1;
5
5
6
select t from data_order_by_proj_incomp where t > 0 order by t settings optimize_read_in_order=0;
5
5
6
select t from data_order_by_proj_incomp where t > 0 order by t settings max_threads=1;
5
5
6

View File

@ -0,0 +1,16 @@
drop table if exists data_order_by_proj_incomp;
create table data_order_by_proj_incomp (t UInt64) ENGINE MergeTree() order by t;
system stop merges data_order_by_proj_incomp;
insert into data_order_by_proj_incomp values (5);
insert into data_order_by_proj_incomp values (5);
alter table data_order_by_proj_incomp add projection tSort (select * order by t);
insert into data_order_by_proj_incomp values (6);
-- { echoOn }
select t from data_order_by_proj_incomp where t > 0 order by t settings optimize_read_in_order=1;
select t from data_order_by_proj_incomp where t > 0 order by t settings optimize_read_in_order=0;
select t from data_order_by_proj_incomp where t > 0 order by t settings max_threads=1;
-- { echoOff }

View File

@ -0,0 +1,13 @@
-- { echoOn }
select t from data_proj_order_by_comp where t > 0 order by t settings optimize_read_in_order=1;
5
5
6
select t from data_proj_order_by_comp where t > 0 order by t settings optimize_read_in_order=0;
5
5
6
select t from data_proj_order_by_comp where t > 0 order by t settings max_threads=1;
5
5
6

View File

@ -0,0 +1,16 @@
-- Test from https://github.com/ClickHouse/ClickHouse/issues/37673
drop table if exists data_proj_order_by_comp;
create table data_proj_order_by_comp (t UInt64, projection tSort (select * order by t)) ENGINE MergeTree() order by tuple();
system stop merges data_proj_order_by_comp;
insert into data_proj_order_by_comp values (5);
insert into data_proj_order_by_comp values (5);
insert into data_proj_order_by_comp values (6);
-- { echoOn }
select t from data_proj_order_by_comp where t > 0 order by t settings optimize_read_in_order=1;
select t from data_proj_order_by_comp where t > 0 order by t settings optimize_read_in_order=0;
select t from data_proj_order_by_comp where t > 0 order by t settings max_threads=1;
-- { echoOff }

View File

@ -0,0 +1,13 @@
-- { echoOn }
select t from data_proj_order_by_incomp where t > 0 order by t settings optimize_read_in_order=1;
5
5
6
select t from data_proj_order_by_incomp where t > 0 order by t settings optimize_read_in_order=0;
5
5
6
select t from data_proj_order_by_incomp where t > 0 order by t settings max_threads=1;
5
5
6

View File

@ -0,0 +1,16 @@
drop table if exists data_proj_order_by_incomp;
create table data_proj_order_by_incomp (t UInt64) ENGINE MergeTree() order by tuple();
system stop merges data_proj_order_by_incomp;
insert into data_proj_order_by_incomp values (5);
insert into data_proj_order_by_incomp values (5);
alter table data_proj_order_by_incomp add projection tSort (select * order by t);
insert into data_proj_order_by_incomp values (6);
-- { echoOn }
select t from data_proj_order_by_incomp where t > 0 order by t settings optimize_read_in_order=1;
select t from data_proj_order_by_incomp where t > 0 order by t settings optimize_read_in_order=0;
select t from data_proj_order_by_incomp where t > 0 order by t settings max_threads=1;
-- { echoOff }