Merge pull request #23375 from ClickHouse/fix-read-in-order-with-collation

Fix read in order optimization of COLLATE is present #22379
This commit is contained in:
alexey-milovidov 2021-04-21 15:00:02 +03:00 committed by GitHub
commit 9c649932bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 41 additions and 9 deletions

View File

@ -308,10 +308,8 @@ function run_tests
01354_order_by_tuple_collate_const
01355_ilike
01411_bayesian_ab_testing
01532_collate_in_low_cardinality
01533_collate_in_nullable
01542_collate_in_array
01543_collate_in_tuple
collate
collation
_orc_
arrow
avro

View File

@ -1510,7 +1510,8 @@ ExpressionAnalysisResult::ExpressionAnalysisResult(
optimize_read_in_order =
settings.optimize_read_in_order
&& storage && query.orderBy()
&& storage
&& query.orderBy()
&& !query_analyzer.hasAggregation()
&& !query_analyzer.hasWindow()
&& !query.final()

View File

@ -44,7 +44,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
int read_direction = required_sort_description.at(0).direction;
size_t prefix_size = std::min(required_sort_description.size(), sorting_key_columns.size());
auto aliase_columns = metadata_snapshot->getColumns().getAliases();
auto aliased_columns = metadata_snapshot->getColumns().getAliases();
for (size_t i = 0; i < prefix_size; ++i)
{
@ -55,13 +55,18 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
/// or in some simple cases when order key element is wrapped into monotonic function.
auto apply_order_judge = [&] (const ExpressionActions::Actions & actions, const String & sort_column)
{
/// If required order depend on collation, it cannot be matched with primary key order.
/// Because primary keys cannot have collations.
if (required_sort_description[i].collator)
return false;
int current_direction = required_sort_description[i].direction;
/// For the path: order by (sort_column, ...)
/// For the path: order by (sort_column, ...)
if (sort_column == sorting_key_columns[i] && current_direction == read_direction)
{
return true;
}
/// For the path: order by (function(sort_column), ...)
/// For the path: order by (function(sort_column), ...)
/// Allow only one simple monotonic functions with one argument
/// Why not allow multi monotonic functions?
else
@ -125,7 +130,7 @@ InputOrderInfoPtr ReadInOrderOptimizer::getInputOrder(const StorageMetadataPtr &
/// currently we only support alias column without any function wrapper
/// ie: `order by aliased_column` can have this optimization, but `order by function(aliased_column)` can not.
/// This suits most cases.
if (context->getSettingsRef().optimize_respect_aliases && aliase_columns.contains(required_sort_description[i].column_name))
if (context->getSettingsRef().optimize_respect_aliases && aliased_columns.contains(required_sort_description[i].column_name))
{
auto column_expr = metadata_snapshot->getColumns().get(required_sort_description[i].column_name).default_desc.expression->clone();
replaceAliasColumnsInQuery(column_expr, metadata_snapshot->getColumns(), forbidden_columns, context);

View File

@ -0,0 +1,6 @@
a a
A A
b b
B B
c c
C C

View File

@ -0,0 +1,21 @@
DROP TABLE IF EXISTS test_collation;
CREATE TABLE test_collation
(
`v` String,
`v2` String
)
ENGINE = MergeTree
ORDER BY v
SETTINGS index_granularity = 8192;
insert into test_collation values ('A', 'A');
insert into test_collation values ('B', 'B');
insert into test_collation values ('C', 'C');
insert into test_collation values ('a', 'a');
insert into test_collation values ('b', 'b');
insert into test_collation values ('c', 'c');
SELECT * FROM test_collation ORDER BY v ASC COLLATE 'en';
DROP TABLE test_collation;

View File

@ -231,3 +231,4 @@
01801_distinct_group_by_shard
01804_dictionary_decimal256_type
01801_s3_distributed
01833_test_collation_alvarotuso