Merge pull request #41014 from ClickHouse/distinct_in_order_wo_order_by

Reading in order for DISTINCT without ORDER BY
This commit is contained in:
Igor Nikonov 2022-09-24 18:49:52 +02:00 committed by GitHub
commit c721c6dc0c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 260 additions and 36 deletions

View File

@ -24,6 +24,10 @@ public:
void describeActions(JSONBuilder::JSONMap & map) const override;
void describeActions(FormatSettings & settings) const override;
bool isPreliminary() const { return pre_distinct; }
UInt64 getLimitHint() const { return limit_hint; }
private:
void updateOutputStream() override;

View File

@ -54,16 +54,20 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
/// Update information about prefix sort description in SortingStep.
size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
/// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key
size_t tryDistinctReadInOrder(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
inline const auto & getOptimizations()
{
static const std::array<Optimization, 7> optimizations = {{
static const std::array<Optimization, 8> optimizations = {{
{tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan},
{tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan},
{trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
{tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
{tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
{tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan},
{tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::optimize_plan}
{tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::optimize_plan},
{tryDistinctReadInOrder, "distinctReadInOrder", &QueryPlanOptimizationSettings::distinct_in_order},
}};
return optimizations;

View File

@ -11,6 +11,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
settings.optimize_plan = from.query_plan_enable_optimizations;
settings.max_optimizations_to_apply = from.query_plan_max_optimizations_to_apply;
settings.filter_push_down = from.query_plan_filter_push_down;
settings.distinct_in_order = from.optimize_distinct_in_order;
return settings;
}

View File

@ -21,6 +21,9 @@ struct QueryPlanOptimizationSettings
/// If filter push down optimization is enabled.
bool filter_push_down = true;
/// if distinct in order optimization is enabled
bool distinct_in_order = false;
static QueryPlanOptimizationSettings fromSettings(const Settings & from);
static QueryPlanOptimizationSettings fromContext(ContextPtr from);
};

View File

@ -0,0 +1,97 @@
#include <memory>
#include <Processors/QueryPlan/DistinctStep.h>
#include <Processors/QueryPlan/ITransformingStep.h>
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
#include <Processors/QueryPlan/ReadFromMergeTree.h>
#include <Common/typeid_cast.h>
namespace DB::QueryPlanOptimizations
{
size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
{
/// check if it is preliminary distinct node
DistinctStep * pre_distinct = nullptr;
if (auto * distinct = typeid_cast<DistinctStep *>(parent_node->step.get()); distinct)
{
if (distinct->isPreliminary())
pre_distinct = distinct;
}
if (!pre_distinct)
return 0;
/// walk through the plan
/// (1) check if nodes below preliminary distinct preserve sorting
/// (2) gather transforming steps to update their sorting properties later
std::vector<ITransformingStep *> steps2update;
QueryPlan::Node * node = parent_node;
while (!node->children.empty())
{
auto * step = dynamic_cast<ITransformingStep *>(node->step.get());
if (!step)
return 0;
const ITransformingStep::DataStreamTraits & traits = step->getDataStreamTraits();
if (!traits.preserves_sorting)
return 0;
steps2update.push_back(step);
node = node->children.front();
}
/// check if we read from MergeTree
auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(node->step.get());
if (!read_from_merge_tree)
return 0;
/// find non-const columns in DISTINCT
const ColumnsWithTypeAndName & distinct_columns = pre_distinct->getOutputStream().header.getColumnsWithTypeAndName();
std::set<std::string_view> non_const_columns;
for (const auto & column : distinct_columns)
{
if (!isColumnConst(*column.column))
non_const_columns.emplace(column.name);
}
const Names& sorting_key_columns = read_from_merge_tree->getStorageMetadata()->getSortingKeyColumns();
/// check if DISTINCT has the same columns as sorting key
size_t number_of_sorted_distinct_columns = 0;
for (const auto & column_name : sorting_key_columns)
{
if (non_const_columns.end() == non_const_columns.find(column_name))
break;
++number_of_sorted_distinct_columns;
}
/// apply optimization only when distinct columns match or form prefix of sorting key
/// todo: check if reading in order optimization would be beneficial when sorting key is prefix of columns in DISTINCT
if (number_of_sorted_distinct_columns != non_const_columns.size())
return 0;
/// check if another read in order optimization is already applied
/// apply optimization only if another read in order one uses less sorting columns
/// example: SELECT DISTINCT a, b FROM t ORDER BY a; -- sorting key: a, b
/// if read in order for ORDER BY is already applied, then output sort description will contain only column `a`
/// but we need columns `a, b`, applying read in order for distinct will still benefit `order by`
const DataStream & output_data_stream = read_from_merge_tree->getOutputStream();
const SortDescription & output_sort_desc = output_data_stream.sort_description;
if (output_data_stream.sort_scope != DataStream::SortScope::Chunk && number_of_sorted_distinct_columns <= output_sort_desc.size())
return 0;
/// update input order info in read_from_merge_tree step
const int direction = 0; /// for DISTINCT direction doesn't matter, ReadFromMergeTree will choose proper one
read_from_merge_tree->requestReadingInOrder(number_of_sorted_distinct_columns, direction, pre_distinct->getLimitHint());
/// update data stream's sorting properties for found transforms
const DataStream * input_stream = &read_from_merge_tree->getOutputStream();
while (!steps2update.empty())
{
steps2update.back()->updateInputStream(*input_stream);
input_stream = &steps2update.back()->getOutputStream();
steps2update.pop_back();
}
return 0;
}
}

View File

@ -91,8 +91,6 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
window->getWindowDescription().full_sort_description,
query_info.syntax_analyzer_result);
read_from_merge_tree->setQueryInfoOrderOptimizer(order_optimizer);
/// If we don't have filtration, we can pushdown limit to reading stage for optimizations.
UInt64 limit = (select_query->hasFiltration() || select_query->groupBy()) ? 0 : InterpreterSelectQuery::getLimitForSorting(*select_query, context);
@ -103,7 +101,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
if (order_info)
{
read_from_merge_tree->setQueryInfoInputOrderInfo(order_info);
read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
sorting->convertToFinishSorting(order_info->sort_description_for_merging);
}

View File

@ -153,7 +153,6 @@ ReadFromMergeTree::ReadFromMergeTree(
}
output_stream->sort_description = std::move(sort_description);
}
}
@ -1019,28 +1018,38 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
return std::make_shared<MergeTreeDataSelectAnalysisResult>(MergeTreeDataSelectAnalysisResult{.result = std::move(result)});
}
void ReadFromMergeTree::setQueryInfoOrderOptimizer(std::shared_ptr<ReadInOrderOptimizer> order_optimizer)
void ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit)
{
if (query_info.projection)
{
query_info.projection->order_optimizer = order_optimizer;
}
else
{
query_info.order_optimizer = order_optimizer;
}
}
/// if dirction is not set, use current one
if (!direction)
direction = getSortDirection();
void ReadFromMergeTree::setQueryInfoInputOrderInfo(InputOrderInfoPtr order_info)
{
auto order_info = std::make_shared<InputOrderInfo>(SortDescription{}, prefix_size, direction, limit);
if (query_info.projection)
{
query_info.projection->input_order_info = order_info;
}
else
{
query_info.input_order_info = order_info;
/// update sort info for output stream
SortDescription sort_description;
const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns();
const Block & header = output_stream->header;
const int sort_direction = getSortDirection();
for (const auto & column_name : sorting_key_columns)
{
if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; })
== header.end())
break;
sort_description.emplace_back(column_name, sort_direction);
}
if (sort_description.empty())
throw Exception(ErrorCodes::LOGICAL_ERROR, "Sort description can't be empty when reading in order");
const size_t used_prefix_of_sorting_key_size = order_info->used_prefix_of_sorting_key_size;
if (sort_description.size() > used_prefix_of_sorting_key_size)
sort_description.resize(used_prefix_of_sorting_key_size);
output_stream->sort_description = std::move(sort_description);
output_stream->sort_scope = DataStream::SortScope::Stream;
}
ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const

View File

@ -151,8 +151,7 @@ public:
const SelectQueryInfo & getQueryInfo() const { return query_info; }
StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }
void setQueryInfoOrderOptimizer(std::shared_ptr<ReadInOrderOptimizer> read_in_order_optimizer);
void setQueryInfoInputOrderInfo(InputOrderInfoPtr order_info);
void requestReadingInOrder(size_t prefix_size, int direction, size_t limit);
private:
int getSortDirection() const

View File

@ -202,7 +202,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
const ContextPtr & context,
UInt64 limit) const
{
auto sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
const Names & sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
int read_direction = description.at(0).direction;
auto fixed_sorting_columns = getFixedSortingColumns(query, sorting_key_columns, context);

View File

@ -4,15 +4,10 @@
<fill_query>INSERT INTO distinct_cardinality_high SELECT number % 1e6, number % 1e4, number % 1e2 FROM numbers_mt(1e8)</fill_query>
<query>SELECT DISTINCT high FROM distinct_cardinality_high FORMAT Null</query>
<query>SELECT DISTINCT high, low FROM distinct_cardinality_high FORMAT Null</query>
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high FORMAT Null</query>
<query>SELECT DISTINCT high, medium, low FROM distinct_cardinality_high FORMAT Null</query>
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY high, medium FORMAT Null</query>
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY high FORMAT Null</query>
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY medium FORMAT Null</query>
<query>SELECT DISTINCT high, low FROM distinct_cardinality_high ORDER BY low FORMAT Null</query>
<query>SELECT DISTINCT high, medium, low FROM distinct_cardinality_high ORDER BY low FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS distinct_cardinality_high</drop_query>
@ -22,14 +17,9 @@
<query>SELECT DISTINCT low FROM distinct_cardinality_low FORMAT Null</query>
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low FORMAT Null</query>
<query>SELECT DISTINCT low, high FROM distinct_cardinality_low FORMAT Null</query>
<query>SELECT DISTINCT low, medium, high FROM distinct_cardinality_low FORMAT Null</query>
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY low, medium FORMAT Null</query>
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY low FORMAT Null</query>
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY medium FORMAT Null</query>
<query>SELECT DISTINCT low, high FROM distinct_cardinality_low ORDER BY high FORMAT Null</query>
<query>SELECT DISTINCT low, medium, high FROM distinct_cardinality_low ORDER BY high FORMAT Null</query>
<drop_query>DROP TABLE IF EXISTS distinct_cardinality_low</drop_query>
</test>

View File

@ -104,3 +104,9 @@ select distinct a, b, x, y from (select a, b, 1 as x, 2 as y from distinct_in_or
0 3 1 2
0 4 1 2
-- check that distinct in order returns the same result as ordinary distinct
-- check that distinct in order WITH order by returns the same result as ordinary distinct
0
-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct
0
-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct
0

View File

@ -65,11 +65,32 @@ INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number %
drop table if exists distinct_in_order sync;
drop table if exists ordinary_distinct sync;
select '-- check that distinct in order WITH order by returns the same result as ordinary distinct';
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1;
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0;
select distinct * from distinct_in_order except select * from ordinary_distinct;
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
drop table if exists distinct_in_order sync;
drop table if exists ordinary_distinct sync;
select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct';
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1;
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0;
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
drop table if exists distinct_in_order;
drop table if exists ordinary_distinct;
select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct';
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1;
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0;
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
drop table if exists distinct_in_order;
drop table if exists ordinary_distinct;

View File

@ -15,6 +15,9 @@ DistinctSortedChunkTransform
-- distinct with primary key prefix and order by the same columns -> pre-distinct and final distinct optimization
DistinctSortedStreamTransform
DistinctSortedChunkTransform
-- distinct with primary key prefix and order by columns are prefix of distinct columns -> pre-distinct and final distinct optimization
DistinctSortedTransform
DistinctSortedChunkTransform
-- distinct with primary key prefix and order by column in distinct but non-primary key prefix -> pre-distinct and final distinct optimization
DistinctSortedTransform
DistinctSortedChunkTransform
@ -33,3 +36,48 @@ DistinctTransform
-- distinct with non-primary key prefix and order by _const_ column in distinct -> ordinary distinct
DistinctTransform
DistinctTransform
-- Check reading in order for distinct
-- disabled, distinct columns match sorting key
MergeTreeThread
-- enabled, distinct columns match sorting key
MergeTreeInOrder
-- enabled, distinct columns form prefix of sorting key
MergeTreeInOrder
-- enabled, distinct columns DON't form prefix of sorting key
MergeTreeThread
-- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key
MergeTreeInOrder
-- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key
MergeTreeInOrder
-- enabled, only part of distinct columns form prefix of sorting key
MergeTreeThread
-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query
-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns
Sorting (Stream): a ASC
Sorting (Stream): a ASC
Sorting (Stream): a ASC
-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause
Sorting (Stream): a DESC, b DESC
Sorting (Stream): a DESC, b DESC
Sorting (Stream): a DESC, b DESC
-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause
Sorting (Stream): a DESC, b DESC
Sorting (Stream): a DESC, b DESC
Sorting (Stream): a DESC, b DESC
-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC
Sorting (Stream): a ASC, b ASC

View File

@ -10,11 +10,16 @@ DISABLE_OPTIMIZATION="set optimize_distinct_in_order=0"
ENABLE_OPTIMIZATION="set optimize_distinct_in_order=1"
GREP_DISTINCT="grep 'DistinctSortedChunkTransform\|DistinctSortedStreamTransform\|DistinctSortedTransform\|DistinctTransform'"
TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'"
FIND_DISTINCT="$GREP_DISTINCT | $TRIM_LEADING_SPACES"
REMOVE_NON_LETTERS="sed 's/[^a-zA-Z]//g'"
FIND_DISTINCT="$GREP_DISTINCT | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS"
FIND_READING_IN_ORDER="grep 'MergeTreeInOrder' | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS"
FIND_READING_DEFAULT="grep 'MergeTreeThread' | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS"
FIND_SORTING_PROPERTIES="grep 'Sorting (Stream)' | $TRIM_LEADING_SPACES"
$CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync"
$CLICKHOUSE_CLIENT -q "create table distinct_in_order_explain (a int, b int, c int) engine=MergeTree() order by (a, b)"
$CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % number, number % 5, number % 10 from numbers(1,10)"
$CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % number, number % 5, number % 10 from numbers(1,10)"
$CLICKHOUSE_CLIENT -q "select '-- disable optimize_distinct_in_order'"
$CLICKHOUSE_CLIENT -q "select '-- distinct all primary key columns -> ordinary distinct'"
@ -33,6 +38,9 @@ $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a,
$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by the same columns -> pre-distinct and final distinct optimization'"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a, b" | eval $FIND_DISTINCT
$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by columns are prefix of distinct columns -> pre-distinct and final distinct optimization'"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a" | eval $FIND_DISTINCT
$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column in distinct but non-primary key prefix -> pre-distinct and final distinct optimization'"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT
@ -51,4 +59,40 @@ $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b,
$CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by _const_ column in distinct -> ordinary distinct'"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, 1 as x from distinct_in_order_explain order by x" | eval $FIND_DISTINCT
echo "-- Check reading in order for distinct"
echo "-- disabled, distinct columns match sorting key"
$CLICKHOUSE_CLIENT --max_threads=0 -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT
echo "-- enabled, distinct columns match sorting key"
# read_in_order_two_level_merge_threshold is set here to avoid repeating MergeTreeInOrder in output
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
echo "-- enabled, distinct columns form prefix of sorting key"
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
echo "-- enabled, distinct columns DON't form prefix of sorting key"
$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT
echo "-- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key"
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
echo "-- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key"
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, b, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
echo "-- enabled, only part of distinct columns form prefix of sorting key"
$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT
echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES
echo "-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query"
ENABLE_READ_IN_ORDER="set optimize_read_in_order=1"
echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns"
$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES
echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES
echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES
echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES
echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES
echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization"
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES
$CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync"