mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Test automatic decision of nº of parallel replicas
This commit is contained in:
parent
f5e0c1bd25
commit
b9969e8730
@ -126,7 +126,7 @@
|
||||
02721_url_cluster
|
||||
02534_s3_cluster_insert_select_schema_inference
|
||||
02765_parallel_replicas_final_modifier
|
||||
02784_parallel_replicas_automatic_disabling
|
||||
02784_parallel_replicas_automatic_decision
|
||||
02581_share_big_sets_between_mutation_tasks_long
|
||||
02581_share_big_sets_between_multiple_mutations_tasks_long
|
||||
00992_system_parts_race_condition_zookeeper_long
|
||||
|
@ -5,6 +5,7 @@ CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
function has_used_parallel_replicas () {
|
||||
# Not using current_database = '$CLICKHOUSE_DATABASE' as nested parallel queries aren't run with it
|
||||
$CLICKHOUSE_CLIENT --query "
|
||||
SELECT
|
||||
initial_query_id,
|
||||
@ -12,7 +13,7 @@ function has_used_parallel_replicas () {
|
||||
sumIf(read_rows, is_initial_query) as read_rows,
|
||||
sumIf(read_bytes, is_initial_query) as read_bytes
|
||||
FROM system.query_log
|
||||
WHERE event_date >= yesterday() and initial_query_id LIKE '$1%' AND current_database = '$CLICKHOUSE_DATABASE'
|
||||
WHERE event_date >= yesterday() and initial_query_id LIKE '$1%'
|
||||
GROUP BY initial_query_id
|
||||
ORDER BY min(event_time_microseconds) ASC
|
||||
FORMAT TSV"
|
||||
@ -34,7 +35,6 @@ function run_query_with_pure_parallel_replicas () {
|
||||
--allow_experimental_parallel_reading_from_replicas 1 \
|
||||
--allow_experimental_analyzer 0
|
||||
|
||||
# Not implemented yet
|
||||
$CLICKHOUSE_CLIENT \
|
||||
--query "$2" \
|
||||
--query_id "${1}_pure_analyzer" \
|
||||
|
@ -0,0 +1,14 @@
|
||||
02783_automatic_parallel_replicas-default_0_0_pure 3
|
||||
02783_automatic_parallel_replicas-default_0_10M_pure 0
|
||||
02783_automatic_parallel_replicas-default_0_6M_pure 0
|
||||
02783_automatic_parallel_replicas-default_0_5M_pure 2
|
||||
02783_automatic_parallel_replicas-default_0_1M_pure 3
|
||||
02783_automatic_parallel_replicas-default_1_0_pure 3
|
||||
02783_automatic_parallel_replicas-default_1_10M_pure 0
|
||||
02783_automatic_parallel_replicas-default_1_1M_pure 2
|
||||
02783_automatic_parallel_replicas-default_1_500k_pure 3
|
||||
02783_automatic_parallel_replicas-default_2_0_pure 3
|
||||
02783_automatic_parallel_replicas-default_2_1M_pure 0
|
||||
02783_automatic_parallel_replicas-default_2_300k_pure 0
|
||||
02783_automatic_parallel_replicas-default_2_200k_pure 2
|
||||
02783_automatic_parallel_replicas-default_2_100k_pure 3
|
125
tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh
Executable file
125
tests/queries/0_stateless/02784_parallel_replicas_automatic_decision.sh
Executable file
@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
|
||||
# shellcheck source=../shell_config.sh
|
||||
. "$CUR_DIR"/../shell_config.sh
|
||||
|
||||
function involved_parallel_replicas () {
|
||||
# Not using current_database = '$CLICKHOUSE_DATABASE' as nested parallel queries aren't run with it
|
||||
$CLICKHOUSE_CLIENT --query "
|
||||
SELECT
|
||||
initial_query_id,
|
||||
(count() - 2) / 2 as number_of_parallel_replicas
|
||||
FROM system.query_log
|
||||
WHERE event_date >= yesterday()
|
||||
AND initial_query_id LIKE '$1%'
|
||||
GROUP BY initial_query_id
|
||||
ORDER BY min(event_time_microseconds) ASC
|
||||
FORMAT TSV"
|
||||
}
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "
|
||||
CREATE TABLE test_parallel_replicas_automatic_count
|
||||
(
|
||||
number Int64,
|
||||
p Int64
|
||||
)
|
||||
ENGINE=MergeTree()
|
||||
ORDER BY number
|
||||
PARTITION BY p
|
||||
SETTINGS index_granularity = 8192 -- Don't randomize it to avoid flakiness
|
||||
AS
|
||||
SELECT number, number % 2 AS p FROM numbers(2_000_000)
|
||||
UNION ALL
|
||||
SELECT number, 3 AS p FROM numbers(10_000_000, 8_000_000)
|
||||
"
|
||||
|
||||
function run_query_with_pure_parallel_replicas () {
|
||||
# $1 -> query_id
|
||||
# $2 -> min rows per replica
|
||||
# $3 -> query
|
||||
$CLICKHOUSE_CLIENT \
|
||||
--query "$3" \
|
||||
--query_id "${1}_pure" \
|
||||
--max_parallel_replicas 3 \
|
||||
--prefer_localhost_replica 1 \
|
||||
--use_hedged_requests 0 \
|
||||
--cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \
|
||||
--allow_experimental_parallel_reading_from_replicas 1 \
|
||||
--parallel_replicas_for_non_replicated_merge_tree 1 \
|
||||
--parallel_replicas_min_number_of_rows_per_replica "$2" \
|
||||
--allow_experimental_analyzer 0
|
||||
|
||||
# Analyzer: Not implemented yet
|
||||
# $CLICKHOUSE_CLIENT \
|
||||
# --query "$3" \
|
||||
# --query_id "${1}_pure_analyzer" \
|
||||
# --max_parallel_replicas 3 \
|
||||
# --prefer_localhost_replica 1 \
|
||||
# --use_hedged_requests 0 \
|
||||
# --cluster_for_parallel_replicas 'test_cluster_one_shard_three_replicas_localhost' \
|
||||
# --allow_experimental_parallel_reading_from_replicas 1 \
|
||||
# --parallel_replicas_for_non_replicated_merge_tree 1 \
|
||||
# --parallel_replicas_min_number_of_rows_per_replica "$2" \
|
||||
# --allow_experimental_analyzer 0
|
||||
}
|
||||
|
||||
function run_query_with_custom_key_parallel_replicas () {
|
||||
$CLICKHOUSE_CLIENT \
|
||||
--query "$3" \
|
||||
--query_id "${1}_custom_key" \
|
||||
--max_parallel_replicas 3 \
|
||||
--use_hedged_requests 0 \
|
||||
--parallel_replicas_custom_key_filter_type 'default' \
|
||||
--parallel_replicas_custom_key "$2" \
|
||||
--parallel_replicas_for_non_replicated_merge_tree 1 \
|
||||
--parallel_replicas_min_number_of_rows_per_replica "$2" \
|
||||
--allow_experimental_analyzer 0
|
||||
|
||||
$CLICKHOUSE_CLIENT \
|
||||
--query "$3" \
|
||||
--query_id "${1}_custom_key_analyzer" \
|
||||
--max_parallel_replicas 3 \
|
||||
--use_hedged_requests 0 \
|
||||
--parallel_replicas_custom_key_filter_type 'default' \
|
||||
--parallel_replicas_custom_key "sipHash64(number)" \
|
||||
--parallel_replicas_for_non_replicated_merge_tree 1 \
|
||||
--parallel_replicas_min_number_of_rows_per_replica "$2" \
|
||||
--allow_experimental_analyzer 1
|
||||
}
|
||||
|
||||
query_id_base="02783_automatic_parallel_replicas-$CLICKHOUSE_DATABASE"
|
||||
|
||||
#### Reading 10M rows without filters
|
||||
whole_table_query="SELECT sum(number) FROM test_parallel_replicas_automatic_count format Null"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_0_0" 0 "$whole_table_query"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_0_10M" 10000000 "$whole_table_query"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_0_6M" 6000000 "$whole_table_query" # 1.6 replicas -> 1 replica -> No parallel replicas
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_0_5M" 5000000 "$whole_table_query"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_0_1M" 1000000 "$whole_table_query"
|
||||
#
|
||||
##### Reading 2M rows without filters as partition (p=3) is pruned completely
|
||||
query_with_partition_pruning="SELECT sum(number) FROM test_parallel_replicas_automatic_count WHERE p != 3 format Null"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_1_0" 0 "$query_with_partition_pruning"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_1_10M" 10000000 "$query_with_partition_pruning"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_1_1M" 1000000 "$query_with_partition_pruning"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_1_500k" 500000 "$query_with_partition_pruning"
|
||||
|
||||
## Reading ~500k rows as index filter should prune granules from partition=1 and partition=2, and drop p3 completely
|
||||
query_with_index="SELECT sum(number) FROM test_parallel_replicas_automatic_count WHERE number < 500_000 format Null"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_2_0" 0 "$query_with_index"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_2_1M" 1000000 "$query_with_index"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_2_300k" 300000 "$query_with_index"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_2_200k" 200000 "$query_with_index"
|
||||
run_query_with_pure_parallel_replicas "${query_id_base}_2_100k" 100000 "$query_with_index"
|
||||
|
||||
# Custom key parallel replicas: Not implemented
|
||||
#whole_table_query="SELECT sum(number) FROM cluster(test_cluster_one_shard_three_replicas_localhost, currentDatabase(), test_parallel_replicas_automatic_count) format Null"
|
||||
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_0" 0 "$whole_table_query"
|
||||
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_10M" 10000000 "$whole_table_query"
|
||||
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_6M" 6000000 "$whole_table_query" # 1.6 replicas -> 1 replica -> No parallel replicas
|
||||
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_5M" 5000000 "$whole_table_query"
|
||||
#run_query_with_custom_key_parallel_replicas "${query_id_base}_0_1M" 1000000 "$whole_table_query"
|
||||
|
||||
$CLICKHOUSE_CLIENT --query "SYSTEM FLUSH LOGS"
|
||||
involved_parallel_replicas "${query_id_base}"
|
@ -1,2 +0,0 @@
|
||||
10
|
||||
1
|
@ -1,15 +0,0 @@
|
||||
DROP TABLE IF EXISTS test_parallel_replicas_automatic_disabling;
|
||||
CREATE TABLE test_parallel_replicas_automatic_disabling (n UInt64) ENGINE=MergeTree() ORDER BY tuple();
|
||||
INSERT INTO test_parallel_replicas_automatic_disabling SELECT * FROM numbers(10);
|
||||
|
||||
SYSTEM FLUSH LOGS;
|
||||
|
||||
SET skip_unavailable_shards=1, allow_experimental_parallel_reading_from_replicas=1, max_parallel_replicas=3, use_hedged_requests=0, cluster_for_parallel_replicas='parallel_replicas', parallel_replicas_for_non_replicated_merge_tree=1, parallel_replicas_min_number_of_granules_to_enable=10000;
|
||||
SET send_logs_level='error';
|
||||
SELECT count() FROM test_parallel_replicas_automatic_disabling WHERE NOT ignore(*);
|
||||
|
||||
SYSTEM FLUSH LOGS;
|
||||
|
||||
SELECT count() > 0 FROM system.text_log WHERE event_time >= now() - INTERVAL 2 MINUTE AND message LIKE '%Parallel replicas will be disabled, because the estimated number of granules to read%';
|
||||
|
||||
DROP TABLE test_parallel_replicas_automatic_disabling;
|
Loading…
Reference in New Issue
Block a user