Consider trivial LIMIT when deciding on parallel replicas

This commit is contained in:
Raúl Marín 2023-07-03 11:36:41 +02:00
parent b9969e8730
commit 088b0527f6
3 changed files with 51 additions and 3 deletions

View File

@ -108,6 +108,9 @@ namespace ProfileEvents
namespace DB
{
static UInt64 getLimitUIntValue(const ASTPtr & node, const ContextPtr & context, const std::string & expr);
static std::pair<UInt64, UInt64> getLimitLengthAndOffset(const ASTSelectQuery & query, const ContextPtr & context);
namespace ErrorCodes
{
extern const int TOO_DEEP_SUBQUERIES;
@ -869,6 +872,28 @@ InterpreterSelectQuery::InterpreterSelectQuery(
if (analysis_result.before_where)
added_filter_nodes.nodes.push_back(&analysis_result.before_where->findInOutputs(analysis_result.where_column_name));
auto [limit_length, limit_offset] = getLimitLengthAndOffset(query, context);
auto local_limits = getStorageLimits(*context, options);
if (!query.distinct
&& !query.limit_with_ties
&& !query.prewhere()
&& !query.where()
&& query_info.filter_asts.empty()
&& !query.groupBy()
&& !query.having()
&& !query.orderBy()
&& !query.limitBy()
&& !query.join()
&& !query_analyzer->hasAggregation()
&& !query_analyzer->hasWindow()
&& query.limitLength()
&& limit_length <= std::numeric_limits<UInt64>::max() - limit_offset)
{
query_info.limit = limit_length + limit_offset;
}
/// END OF TODO BLOCK ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
rows_to_read.emplace(

View File

@ -12,3 +12,11 @@
02783_automatic_parallel_replicas-default_2_300k_pure 0
02783_automatic_parallel_replicas-default_2_200k_pure 2
02783_automatic_parallel_replicas-default_2_100k_pure 3
02783_automatic_parallel_replicas-default_3_0_pure 3
02783_automatic_parallel_replicas-default_3_10M_pure 0
02783_automatic_parallel_replicas-default_3_1M_pure 0
02783_automatic_parallel_replicas-default_3_500k_pure 2
02783_automatic_parallel_replicas-default_4_0_pure 3
02783_automatic_parallel_replicas-default_4_10M_pure 0
02783_automatic_parallel_replicas-default_4_1M_pure 3
02783_automatic_parallel_replicas-default_4_500k_pure 3

View File

@ -97,7 +97,7 @@ run_query_with_pure_parallel_replicas "${query_id_base}_0_10M" 10000000 "$whole_
run_query_with_pure_parallel_replicas "${query_id_base}_0_6M" 6000000 "$whole_table_query" # 1.6 replicas -> 1 replica -> No parallel replicas
run_query_with_pure_parallel_replicas "${query_id_base}_0_5M" 5000000 "$whole_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_0_1M" 1000000 "$whole_table_query"
#
##### Reading 2M rows without filters as partition (p=3) is pruned completely
query_with_partition_pruning="SELECT sum(number) FROM test_parallel_replicas_automatic_count WHERE p != 3 format Null"
run_query_with_pure_parallel_replicas "${query_id_base}_1_0" 0 "$query_with_partition_pruning"
@ -105,7 +105,7 @@ run_query_with_pure_parallel_replicas "${query_id_base}_1_10M" 10000000 "$query_
run_query_with_pure_parallel_replicas "${query_id_base}_1_1M" 1000000 "$query_with_partition_pruning"
run_query_with_pure_parallel_replicas "${query_id_base}_1_500k" 500000 "$query_with_partition_pruning"
## Reading ~500k rows as index filter should prune granules from partition=1 and partition=2, and drop p3 completely
#### Reading ~500k rows as index filter should prune granules from partition=1 and partition=2, and drop p3 completely
query_with_index="SELECT sum(number) FROM test_parallel_replicas_automatic_count WHERE number < 500_000 format Null"
run_query_with_pure_parallel_replicas "${query_id_base}_2_0" 0 "$query_with_index"
run_query_with_pure_parallel_replicas "${query_id_base}_2_1M" 1000000 "$query_with_index"
@ -113,7 +113,22 @@ run_query_with_pure_parallel_replicas "${query_id_base}_2_300k" 300000 "$query_w
run_query_with_pure_parallel_replicas "${query_id_base}_2_200k" 200000 "$query_with_index"
run_query_with_pure_parallel_replicas "${query_id_base}_2_100k" 100000 "$query_with_index"
# Custom key parallel replicas: Not implemented
#### Reading 1M (because of LIMIT)
limit_table_query="SELECT sum(number) FROM (SELECT number FROM test_parallel_replicas_automatic_count LIMIT 1_000_000) format Null"
run_query_with_pure_parallel_replicas "${query_id_base}_3_0" 0 "$limit_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_3_10M" 10000000 "$limit_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_3_1M" 1000000 "$limit_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_3_500k" 500000 "$limit_table_query"
#### Reading 10M (because of LIMIT is applied after aggregations)
limit_agg_table_query="SELECT sum(number) FROM test_parallel_replicas_automatic_count LIMIT 1_000_000 format Null"
run_query_with_pure_parallel_replicas "${query_id_base}_4_0" 0 "$limit_agg_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_4_10M" 10000000 "$limit_agg_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_4_1M" 1000000 "$limit_agg_table_query"
run_query_with_pure_parallel_replicas "${query_id_base}_4_500k" 500000 "$limit_agg_table_query"
#### Custom key parallel replicas: Not implemented
#whole_table_query="SELECT sum(number) FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_parallel_replicas_automatic_count) format Null"
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_0" 0 "$whole_table_query"
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_10M" 10000000 "$whole_table_query"