mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-22 23:52:03 +00:00
Merge pull request #41014 from ClickHouse/distinct_in_order_wo_order_by
Reading in order for DISTINCT without ORDER BY
This commit is contained in:
commit
c721c6dc0c
@ -24,6 +24,10 @@ public:
|
||||
void describeActions(JSONBuilder::JSONMap & map) const override;
|
||||
void describeActions(FormatSettings & settings) const override;
|
||||
|
||||
bool isPreliminary() const { return pre_distinct; }
|
||||
|
||||
UInt64 getLimitHint() const { return limit_hint; }
|
||||
|
||||
private:
|
||||
void updateOutputStream() override;
|
||||
|
||||
|
@ -54,16 +54,20 @@ size_t tryExecuteFunctionsAfterSorting(QueryPlan::Node * parent_node, QueryPlan:
|
||||
/// Update information about prefix sort description in SortingStep.
|
||||
size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node, QueryPlan::Nodes & nodes);
|
||||
|
||||
/// Reading in order from MergeTree table if DISTINCT columns match or form a prefix of MergeTree sorting key
|
||||
size_t tryDistinctReadInOrder(QueryPlan::Node * node, QueryPlan::Nodes & nodes);
|
||||
|
||||
inline const auto & getOptimizations()
|
||||
{
|
||||
static const std::array<Optimization, 7> optimizations = {{
|
||||
static const std::array<Optimization, 8> optimizations = {{
|
||||
{tryLiftUpArrayJoin, "liftUpArrayJoin", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryPushDownLimit, "pushDownLimit", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{trySplitFilter, "splitFilter", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryMergeExpressions, "mergeExpressions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryPushDownFilter, "pushDownFilter", &QueryPlanOptimizationSettings::filter_push_down},
|
||||
{tryExecuteFunctionsAfterSorting, "liftUpFunctions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::optimize_plan}
|
||||
{tryReuseStorageOrderingForWindowFunctions, "reuseStorageOrderingForWindowFunctions", &QueryPlanOptimizationSettings::optimize_plan},
|
||||
{tryDistinctReadInOrder, "distinctReadInOrder", &QueryPlanOptimizationSettings::distinct_in_order},
|
||||
}};
|
||||
|
||||
return optimizations;
|
||||
|
@ -11,6 +11,7 @@ QueryPlanOptimizationSettings QueryPlanOptimizationSettings::fromSettings(const
|
||||
settings.optimize_plan = from.query_plan_enable_optimizations;
|
||||
settings.max_optimizations_to_apply = from.query_plan_max_optimizations_to_apply;
|
||||
settings.filter_push_down = from.query_plan_filter_push_down;
|
||||
settings.distinct_in_order = from.optimize_distinct_in_order;
|
||||
return settings;
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,9 @@ struct QueryPlanOptimizationSettings
|
||||
/// If filter push down optimization is enabled.
|
||||
bool filter_push_down = true;
|
||||
|
||||
/// if distinct in order optimization is enabled
|
||||
bool distinct_in_order = false;
|
||||
|
||||
static QueryPlanOptimizationSettings fromSettings(const Settings & from);
|
||||
static QueryPlanOptimizationSettings fromContext(ContextPtr from);
|
||||
};
|
||||
|
@ -0,0 +1,97 @@
|
||||
#include <memory>
|
||||
#include <Processors/QueryPlan/DistinctStep.h>
|
||||
#include <Processors/QueryPlan/ITransformingStep.h>
|
||||
#include <Processors/QueryPlan/Optimizations/Optimizations.h>
|
||||
#include <Processors/QueryPlan/ReadFromMergeTree.h>
|
||||
#include <Common/typeid_cast.h>
|
||||
|
||||
namespace DB::QueryPlanOptimizations
|
||||
{
|
||||
size_t tryDistinctReadInOrder(QueryPlan::Node * parent_node, QueryPlan::Nodes &)
|
||||
{
|
||||
/// check if it is preliminary distinct node
|
||||
DistinctStep * pre_distinct = nullptr;
|
||||
if (auto * distinct = typeid_cast<DistinctStep *>(parent_node->step.get()); distinct)
|
||||
{
|
||||
if (distinct->isPreliminary())
|
||||
pre_distinct = distinct;
|
||||
}
|
||||
if (!pre_distinct)
|
||||
return 0;
|
||||
|
||||
/// walk through the plan
|
||||
/// (1) check if nodes below preliminary distinct preserve sorting
|
||||
/// (2) gather transforming steps to update their sorting properties later
|
||||
std::vector<ITransformingStep *> steps2update;
|
||||
QueryPlan::Node * node = parent_node;
|
||||
while (!node->children.empty())
|
||||
{
|
||||
auto * step = dynamic_cast<ITransformingStep *>(node->step.get());
|
||||
if (!step)
|
||||
return 0;
|
||||
|
||||
const ITransformingStep::DataStreamTraits & traits = step->getDataStreamTraits();
|
||||
if (!traits.preserves_sorting)
|
||||
return 0;
|
||||
|
||||
steps2update.push_back(step);
|
||||
|
||||
node = node->children.front();
|
||||
}
|
||||
|
||||
/// check if we read from MergeTree
|
||||
auto * read_from_merge_tree = typeid_cast<ReadFromMergeTree *>(node->step.get());
|
||||
if (!read_from_merge_tree)
|
||||
return 0;
|
||||
|
||||
/// find non-const columns in DISTINCT
|
||||
const ColumnsWithTypeAndName & distinct_columns = pre_distinct->getOutputStream().header.getColumnsWithTypeAndName();
|
||||
std::set<std::string_view> non_const_columns;
|
||||
for (const auto & column : distinct_columns)
|
||||
{
|
||||
if (!isColumnConst(*column.column))
|
||||
non_const_columns.emplace(column.name);
|
||||
}
|
||||
|
||||
const Names& sorting_key_columns = read_from_merge_tree->getStorageMetadata()->getSortingKeyColumns();
|
||||
/// check if DISTINCT has the same columns as sorting key
|
||||
size_t number_of_sorted_distinct_columns = 0;
|
||||
for (const auto & column_name : sorting_key_columns)
|
||||
{
|
||||
if (non_const_columns.end() == non_const_columns.find(column_name))
|
||||
break;
|
||||
|
||||
++number_of_sorted_distinct_columns;
|
||||
}
|
||||
/// apply optimization only when distinct columns match or form prefix of sorting key
|
||||
/// todo: check if reading in order optimization would be beneficial when sorting key is prefix of columns in DISTINCT
|
||||
if (number_of_sorted_distinct_columns != non_const_columns.size())
|
||||
return 0;
|
||||
|
||||
/// check if another read in order optimization is already applied
|
||||
/// apply optimization only if another read in order one uses less sorting columns
|
||||
/// example: SELECT DISTINCT a, b FROM t ORDER BY a; -- sorting key: a, b
|
||||
/// if read in order for ORDER BY is already applied, then output sort description will contain only column `a`
|
||||
/// but we need columns `a, b`, applying read in order for distinct will still benefit `order by`
|
||||
const DataStream & output_data_stream = read_from_merge_tree->getOutputStream();
|
||||
const SortDescription & output_sort_desc = output_data_stream.sort_description;
|
||||
if (output_data_stream.sort_scope != DataStream::SortScope::Chunk && number_of_sorted_distinct_columns <= output_sort_desc.size())
|
||||
return 0;
|
||||
|
||||
/// update input order info in read_from_merge_tree step
|
||||
const int direction = 0; /// for DISTINCT direction doesn't matter, ReadFromMergeTree will choose proper one
|
||||
read_from_merge_tree->requestReadingInOrder(number_of_sorted_distinct_columns, direction, pre_distinct->getLimitHint());
|
||||
|
||||
/// update data stream's sorting properties for found transforms
|
||||
const DataStream * input_stream = &read_from_merge_tree->getOutputStream();
|
||||
while (!steps2update.empty())
|
||||
{
|
||||
steps2update.back()->updateInputStream(*input_stream);
|
||||
input_stream = &steps2update.back()->getOutputStream();
|
||||
steps2update.pop_back();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
@ -91,8 +91,6 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
|
||||
window->getWindowDescription().full_sort_description,
|
||||
query_info.syntax_analyzer_result);
|
||||
|
||||
read_from_merge_tree->setQueryInfoOrderOptimizer(order_optimizer);
|
||||
|
||||
/// If we don't have filtration, we can pushdown limit to reading stage for optimizations.
|
||||
UInt64 limit = (select_query->hasFiltration() || select_query->groupBy()) ? 0 : InterpreterSelectQuery::getLimitForSorting(*select_query, context);
|
||||
|
||||
@ -103,7 +101,7 @@ size_t tryReuseStorageOrderingForWindowFunctions(QueryPlan::Node * parent_node,
|
||||
|
||||
if (order_info)
|
||||
{
|
||||
read_from_merge_tree->setQueryInfoInputOrderInfo(order_info);
|
||||
read_from_merge_tree->requestReadingInOrder(order_info->used_prefix_of_sorting_key_size, order_info->direction, order_info->limit);
|
||||
sorting->convertToFinishSorting(order_info->sort_description_for_merging);
|
||||
}
|
||||
|
||||
|
@ -153,7 +153,6 @@ ReadFromMergeTree::ReadFromMergeTree(
|
||||
}
|
||||
|
||||
output_stream->sort_description = std::move(sort_description);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -1019,28 +1018,38 @@ MergeTreeDataSelectAnalysisResultPtr ReadFromMergeTree::selectRangesToRead(
|
||||
return std::make_shared<MergeTreeDataSelectAnalysisResult>(MergeTreeDataSelectAnalysisResult{.result = std::move(result)});
|
||||
}
|
||||
|
||||
void ReadFromMergeTree::setQueryInfoOrderOptimizer(std::shared_ptr<ReadInOrderOptimizer> order_optimizer)
|
||||
void ReadFromMergeTree::requestReadingInOrder(size_t prefix_size, int direction, size_t limit)
|
||||
{
|
||||
if (query_info.projection)
|
||||
{
|
||||
query_info.projection->order_optimizer = order_optimizer;
|
||||
}
|
||||
else
|
||||
{
|
||||
query_info.order_optimizer = order_optimizer;
|
||||
}
|
||||
}
|
||||
/// if dirction is not set, use current one
|
||||
if (!direction)
|
||||
direction = getSortDirection();
|
||||
|
||||
void ReadFromMergeTree::setQueryInfoInputOrderInfo(InputOrderInfoPtr order_info)
|
||||
{
|
||||
auto order_info = std::make_shared<InputOrderInfo>(SortDescription{}, prefix_size, direction, limit);
|
||||
if (query_info.projection)
|
||||
{
|
||||
query_info.projection->input_order_info = order_info;
|
||||
}
|
||||
else
|
||||
{
|
||||
query_info.input_order_info = order_info;
|
||||
|
||||
/// update sort info for output stream
|
||||
SortDescription sort_description;
|
||||
const Names & sorting_key_columns = storage_snapshot->getMetadataForQuery()->getSortingKeyColumns();
|
||||
const Block & header = output_stream->header;
|
||||
const int sort_direction = getSortDirection();
|
||||
for (const auto & column_name : sorting_key_columns)
|
||||
{
|
||||
if (std::find_if(header.begin(), header.end(), [&](ColumnWithTypeAndName const & col) { return col.name == column_name; })
|
||||
== header.end())
|
||||
break;
|
||||
sort_description.emplace_back(column_name, sort_direction);
|
||||
}
|
||||
if (sort_description.empty())
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Sort description can't be empty when reading in order");
|
||||
|
||||
const size_t used_prefix_of_sorting_key_size = order_info->used_prefix_of_sorting_key_size;
|
||||
if (sort_description.size() > used_prefix_of_sorting_key_size)
|
||||
sort_description.resize(used_prefix_of_sorting_key_size);
|
||||
output_stream->sort_description = std::move(sort_description);
|
||||
output_stream->sort_scope = DataStream::SortScope::Stream;
|
||||
}
|
||||
|
||||
ReadFromMergeTree::AnalysisResult ReadFromMergeTree::getAnalysisResult() const
|
||||
|
@ -151,8 +151,7 @@ public:
|
||||
const SelectQueryInfo & getQueryInfo() const { return query_info; }
|
||||
StorageMetadataPtr getStorageMetadata() const { return metadata_for_reading; }
|
||||
|
||||
void setQueryInfoOrderOptimizer(std::shared_ptr<ReadInOrderOptimizer> read_in_order_optimizer);
|
||||
void setQueryInfoInputOrderInfo(InputOrderInfoPtr order_info);
|
||||
void requestReadingInOrder(size_t prefix_size, int direction, size_t limit);
|
||||
|
||||
private:
|
||||
int getSortDirection() const
|
||||
|
@ -202,7 +202,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrderImpl(
|
||||
const ContextPtr & context,
|
||||
UInt64 limit) const
|
||||
{
|
||||
auto sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
|
||||
const Names & sorting_key_columns = metadata_snapshot->getSortingKeyColumns();
|
||||
int read_direction = description.at(0).direction;
|
||||
|
||||
auto fixed_sorting_columns = getFixedSortingColumns(query, sorting_key_columns, context);
|
||||
|
@ -4,15 +4,10 @@
|
||||
<fill_query>INSERT INTO distinct_cardinality_high SELECT number % 1e6, number % 1e4, number % 1e2 FROM numbers_mt(1e8)</fill_query>
|
||||
|
||||
<query>SELECT DISTINCT high FROM distinct_cardinality_high FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, low FROM distinct_cardinality_high FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, medium, low FROM distinct_cardinality_high FORMAT Null</query>
|
||||
|
||||
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY high, medium FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY high FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, medium FROM distinct_cardinality_high ORDER BY medium FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, low FROM distinct_cardinality_high ORDER BY low FORMAT Null</query>
|
||||
<query>SELECT DISTINCT high, medium, low FROM distinct_cardinality_high ORDER BY low FORMAT Null</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS distinct_cardinality_high</drop_query>
|
||||
|
||||
@ -22,14 +17,9 @@
|
||||
|
||||
<query>SELECT DISTINCT low FROM distinct_cardinality_low FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, high FROM distinct_cardinality_low FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, medium, high FROM distinct_cardinality_low FORMAT Null</query>
|
||||
|
||||
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY low, medium FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY low FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, medium FROM distinct_cardinality_low ORDER BY medium FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, high FROM distinct_cardinality_low ORDER BY high FORMAT Null</query>
|
||||
<query>SELECT DISTINCT low, medium, high FROM distinct_cardinality_low ORDER BY high FORMAT Null</query>
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS distinct_cardinality_low</drop_query>
|
||||
</test>
|
||||
|
@ -104,3 +104,9 @@ select distinct a, b, x, y from (select a, b, 1 as x, 2 as y from distinct_in_or
|
||||
0 3 1 2
|
||||
0 4 1 2
|
||||
-- check that distinct in order returns the same result as ordinary distinct
|
||||
-- check that distinct in order WITH order by returns the same result as ordinary distinct
|
||||
0
|
||||
-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct
|
||||
0
|
||||
-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct
|
||||
0
|
||||
|
@ -65,11 +65,32 @@ INSERT INTO distinct_cardinality_low SELECT number % 1e1, number % 1e2, number %
|
||||
drop table if exists distinct_in_order sync;
|
||||
drop table if exists ordinary_distinct sync;
|
||||
|
||||
select '-- check that distinct in order WITH order by returns the same result as ordinary distinct';
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
insert into distinct_in_order select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
insert into ordinary_distinct select distinct * from distinct_cardinality_low order by high settings optimize_distinct_in_order=0;
|
||||
select distinct * from distinct_in_order except select * from ordinary_distinct;
|
||||
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
|
||||
|
||||
drop table if exists distinct_in_order sync;
|
||||
drop table if exists ordinary_distinct sync;
|
||||
|
||||
select '-- check that distinct in order WITHOUT order by returns the same result as ordinary distinct';
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
insert into distinct_in_order select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
insert into ordinary_distinct select distinct * from distinct_cardinality_low settings optimize_distinct_in_order=0;
|
||||
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
|
||||
|
||||
drop table if exists distinct_in_order;
|
||||
drop table if exists ordinary_distinct;
|
||||
|
||||
select '-- check that distinct in order WITHOUT order by and WITH filter returns the same result as ordinary distinct';
|
||||
create table distinct_in_order (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
insert into distinct_in_order select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=1;
|
||||
create table ordinary_distinct (low UInt64, medium UInt64, high UInt64) engine=MergeTree() order by (low, medium);
|
||||
insert into ordinary_distinct select distinct * from distinct_cardinality_low where low > 0 settings optimize_distinct_in_order=0;
|
||||
select count() as diff from (select distinct * from distinct_in_order except select * from ordinary_distinct);
|
||||
|
||||
drop table if exists distinct_in_order;
|
||||
drop table if exists ordinary_distinct;
|
||||
|
@ -15,6 +15,9 @@ DistinctSortedChunkTransform
|
||||
-- distinct with primary key prefix and order by the same columns -> pre-distinct and final distinct optimization
|
||||
DistinctSortedStreamTransform
|
||||
DistinctSortedChunkTransform
|
||||
-- distinct with primary key prefix and order by columns are prefix of distinct columns -> pre-distinct and final distinct optimization
|
||||
DistinctSortedTransform
|
||||
DistinctSortedChunkTransform
|
||||
-- distinct with primary key prefix and order by column in distinct but non-primary key prefix -> pre-distinct and final distinct optimization
|
||||
DistinctSortedTransform
|
||||
DistinctSortedChunkTransform
|
||||
@ -33,3 +36,48 @@ DistinctTransform
|
||||
-- distinct with non-primary key prefix and order by _const_ column in distinct -> ordinary distinct
|
||||
DistinctTransform
|
||||
DistinctTransform
|
||||
-- Check reading in order for distinct
|
||||
-- disabled, distinct columns match sorting key
|
||||
MergeTreeThread
|
||||
-- enabled, distinct columns match sorting key
|
||||
MergeTreeInOrder
|
||||
-- enabled, distinct columns form prefix of sorting key
|
||||
MergeTreeInOrder
|
||||
-- enabled, distinct columns DON't form prefix of sorting key
|
||||
MergeTreeThread
|
||||
-- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key
|
||||
MergeTreeInOrder
|
||||
-- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key
|
||||
MergeTreeInOrder
|
||||
-- enabled, only part of distinct columns form prefix of sorting key
|
||||
MergeTreeThread
|
||||
-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query
|
||||
-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns
|
||||
Sorting (Stream): a ASC
|
||||
Sorting (Stream): a ASC
|
||||
Sorting (Stream): a ASC
|
||||
-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause
|
||||
Sorting (Stream): a DESC, b DESC
|
||||
Sorting (Stream): a DESC, b DESC
|
||||
Sorting (Stream): a DESC, b DESC
|
||||
-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause
|
||||
Sorting (Stream): a DESC, b DESC
|
||||
Sorting (Stream): a DESC, b DESC
|
||||
Sorting (Stream): a DESC, b DESC
|
||||
-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
Sorting (Stream): a ASC, b ASC
|
||||
|
@ -10,11 +10,16 @@ DISABLE_OPTIMIZATION="set optimize_distinct_in_order=0"
|
||||
ENABLE_OPTIMIZATION="set optimize_distinct_in_order=1"
|
||||
GREP_DISTINCT="grep 'DistinctSortedChunkTransform\|DistinctSortedStreamTransform\|DistinctSortedTransform\|DistinctTransform'"
|
||||
TRIM_LEADING_SPACES="sed -e 's/^[ \t]*//'"
|
||||
FIND_DISTINCT="$GREP_DISTINCT | $TRIM_LEADING_SPACES"
|
||||
REMOVE_NON_LETTERS="sed 's/[^a-zA-Z]//g'"
|
||||
FIND_DISTINCT="$GREP_DISTINCT | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS"
|
||||
FIND_READING_IN_ORDER="grep 'MergeTreeInOrder' | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS"
|
||||
FIND_READING_DEFAULT="grep 'MergeTreeThread' | $TRIM_LEADING_SPACES | $REMOVE_NON_LETTERS"
|
||||
FIND_SORTING_PROPERTIES="grep 'Sorting (Stream)' | $TRIM_LEADING_SPACES"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync"
|
||||
$CLICKHOUSE_CLIENT -q "create table distinct_in_order_explain (a int, b int, c int) engine=MergeTree() order by (a, b)"
|
||||
$CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % number, number % 5, number % 10 from numbers(1,10)"
|
||||
$CLICKHOUSE_CLIENT -q "insert into distinct_in_order_explain select number % number, number % 5, number % 10 from numbers(1,10)"
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "select '-- disable optimize_distinct_in_order'"
|
||||
$CLICKHOUSE_CLIENT -q "select '-- distinct all primary key columns -> ordinary distinct'"
|
||||
@ -33,6 +38,9 @@ $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a,
|
||||
$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by the same columns -> pre-distinct and final distinct optimization'"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a, b" | eval $FIND_DISTINCT
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by columns are prefix of distinct columns -> pre-distinct and final distinct optimization'"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain order by a" | eval $FIND_DISTINCT
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "select '-- distinct with primary key prefix and order by column in distinct but non-primary key prefix -> pre-distinct and final distinct optimization'"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b, c from distinct_in_order_explain order by c" | eval $FIND_DISTINCT
|
||||
|
||||
@ -51,4 +59,40 @@ $CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b,
|
||||
$CLICKHOUSE_CLIENT -q "select '-- distinct with non-primary key prefix and order by _const_ column in distinct -> ordinary distinct'"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b, 1 as x from distinct_in_order_explain order by x" | eval $FIND_DISTINCT
|
||||
|
||||
echo "-- Check reading in order for distinct"
|
||||
echo "-- disabled, distinct columns match sorting key"
|
||||
$CLICKHOUSE_CLIENT --max_threads=0 -nq "$DISABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT
|
||||
echo "-- enabled, distinct columns match sorting key"
|
||||
# read_in_order_two_level_merge_threshold is set here to avoid repeating MergeTreeInOrder in output
|
||||
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
|
||||
echo "-- enabled, distinct columns form prefix of sorting key"
|
||||
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, b from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
|
||||
echo "-- enabled, distinct columns DON't form prefix of sorting key"
|
||||
$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct b from distinct_in_order_explain" | eval $FIND_READING_DEFAULT
|
||||
echo "-- enabled, distinct columns contains constant columns, non-const columns form prefix of sorting key"
|
||||
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
|
||||
echo "-- enabled, distinct columns contains constant columns, non-const columns match prefix of sorting key"
|
||||
$CLICKHOUSE_CLIENT --read_in_order_two_level_merge_threshold=2 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct 1, b, a from distinct_in_order_explain" | eval $FIND_READING_IN_ORDER
|
||||
echo "-- enabled, only part of distinct columns form prefix of sorting key"
|
||||
$CLICKHOUSE_CLIENT --max_threads=0 -nq "$ENABLE_OPTIMIZATION;explain pipeline select distinct a, c from distinct_in_order_explain" | eval $FIND_READING_DEFAULT
|
||||
|
||||
echo "-- enabled, check that sorting properties are propagated from ReadFromMergeTree till preliminary distinct"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;explain plan sorting=1 select distinct b, a from distinct_in_order_explain where a > 0" | eval $FIND_SORTING_PROPERTIES
|
||||
|
||||
echo "-- check that reading in order optimization for ORDER BY and DISTINCT applied correctly in the same query"
|
||||
ENABLE_READ_IN_ORDER="set optimize_read_in_order=1"
|
||||
echo "-- disabled, check that sorting description for ReadFromMergeTree match ORDER BY columns"
|
||||
$CLICKHOUSE_CLIENT -nq "$DISABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES
|
||||
echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization i.e. it contains columns from DISTINCT clause"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a" | eval $FIND_SORTING_PROPERTIES
|
||||
echo "-- enabled, check that ReadFromMergeTree sorting description is overwritten by DISTINCT optimization, but direction used from ORDER BY clause"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC" | eval $FIND_SORTING_PROPERTIES
|
||||
echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (1), - it contains columns from ORDER BY clause"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct a from distinct_in_order_explain order by a, b" | eval $FIND_SORTING_PROPERTIES
|
||||
echo "-- enabled, check that ReadFromMergeTree sorting description is NOT overwritten by DISTINCT optimization (2), - direction used from ORDER BY clause"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;$ENABLE_READ_IN_ORDER;explain plan sorting=1 select distinct b, a from distinct_in_order_explain order by a DESC, b DESC" | eval $FIND_SORTING_PROPERTIES
|
||||
|
||||
echo "-- enabled, check that disabling other 'read in order' optimizations do not disable distinct in order optimization"
|
||||
$CLICKHOUSE_CLIENT -nq "$ENABLE_OPTIMIZATION;set optimize_read_in_order=0;set optimize_aggregation_in_order=0;set optimize_read_in_window_order=0;explain plan sorting=1 select distinct a,b from distinct_in_order_explain" | eval $FIND_SORTING_PROPERTIES
|
||||
|
||||
$CLICKHOUSE_CLIENT -q "drop table if exists distinct_in_order_explain sync"
|
||||
|
Loading…
Reference in New Issue
Block a user