Revert "Merge pull request #54961 from ClickHouse/remove-test-hash_table_sizes_stats"

This reverts commit 477922617c, reversing
changes made to f23339b4be.
This commit is contained in:
Nikita Taranov 2024-05-28 15:31:17 +01:00
parent bf38e8b3eb
commit a3f4578864
3 changed files with 304 additions and 0 deletions

View File

@ -0,0 +1,21 @@
1
--
1
--
1
--
1
--
1
1
--
1
--
1
1
--
1
--
1
1
--

View File

@ -0,0 +1,88 @@
#!/usr/bin/env bash
# Tags: long, no-tsan
# shellcheck disable=SC2154
CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# shellcheck source=../shell_config.sh
. "$CURDIR"/../shell_config.sh
# tests rely on that all the rows are unique and max_threads divides table_size
table_size=1000005
max_threads=5
prepare_table() {
table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM"
$CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;"
if [ -z "$1" ]; then
$CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';"
else
$CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';"
fi
$CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;"
for ((i = 1; i <= max_threads; i++)); do
cnt=$((table_size / max_threads))
from=$(((i - 1) * cnt))
$CLICKHOUSE_CLIENT -q "INSERT INTO $table_name SELECT * FROM numbers($from, $cnt);"
done
}
prepare_table_with_sorting_key() {
prepare_table "$1"
}
run_query() {
query_id="${CLICKHOUSE_DATABASE}_hash_table_sizes_stats_$RANDOM$RANDOM"
$CLICKHOUSE_CLIENT --query_id="$query_id" --multiquery -q "
SET max_block_size = $((table_size / 10));
SET merge_tree_min_rows_for_concurrent_read = 1;
SET max_untracked_memory = 0;
SET max_size_to_preallocate_for_aggregation = 1e12;
$query"
}
check_preallocated_elements() {
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
# rows may be distributed in any way including "everything goes to the one particular thread"
min=$1
if [ -z "$2" ]; then
max=$1
else
max=$2
fi
$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
SELECT COUNT(*)
FROM system.query_log
WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase()
AND ProfileEvents['AggregationPreallocatedElementsInHashTables'] BETWEEN $min AND $max"
}
check_convertion_to_two_level() {
$CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS"
# rows may be distributed in any way including "everything goes to the one particular thread"
$CLICKHOUSE_CLIENT --param_query_id="$query_id" -q "
SELECT SUM(ProfileEvents['AggregationHashTablesInitializedAsTwoLevel']) BETWEEN 1 AND $max_threads
FROM system.query_log
WHERE event_date >= yesterday() AND query_id = {query_id:String} AND current_database = currentDatabase()"
}
print_border() {
echo "--"
}
# shellcheck source=./02151_hash_table_sizes_stats.testcases
source "$CURDIR"/02151_hash_table_sizes_stats.testcases
test_one_thread_simple_group_by
test_one_thread_simple_group_by_with_limit
test_one_thread_simple_group_by_with_join_and_subquery
test_several_threads_simple_group_by_with_limit_single_level_ht
test_several_threads_simple_group_by_with_limit_two_level_ht
test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht
test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht
test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht
test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht

View File

@ -0,0 +1,195 @@
test_one_thread_simple_group_by() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint --
SELECT number
FROM $table_name
GROUP BY number
SETTINGS max_threads = 1
FORMAT Null;"
run_query
run_query
check_preallocated_elements $expected_size_hint
print_border
}
test_one_thread_simple_group_by_with_limit() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
LIMIT 5
SETTINGS max_threads = 1
FORMAT Null;"
run_query
run_query
check_preallocated_elements $expected_size_hint
print_border
}
test_one_thread_simple_group_by_with_join_and_subquery() {
expected_size_hint=$((table_size + table_size / 2))
prepare_table
query="
-- expected two size_hints for different keys: for the inner ($table_size) and the outer aggregation ($((table_size / 2)))
SELECT number
FROM $table_name AS t1
JOIN
(
SELECT number
FROM $table_name AS t2
GROUP BY number
LIMIT $((table_size / 2))
) AS t3 USING(number)
GROUP BY number
SETTINGS max_threads = 1,
distributed_product_mode = 'local'
FORMAT Null;"
run_query
run_query
check_preallocated_elements $expected_size_hint
print_border
}
test_several_threads_simple_group_by_with_limit_single_level_ht() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
LIMIT 5
SETTINGS max_threads = $max_threads,
group_by_two_level_threshold = $((expected_size_hint + 1)),
group_by_two_level_threshold_bytes = $((table_size * 1000))
FORMAT Null;"
run_query
run_query
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
print_border
}
test_several_threads_simple_group_by_with_limit_two_level_ht() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
LIMIT 5
SETTINGS max_threads = $max_threads,
group_by_two_level_threshold = $expected_size_hint,
group_by_two_level_threshold_bytes = $((table_size * 1000))
FORMAT Null;"
run_query
run_query
check_convertion_to_two_level
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
print_border
}
test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
WITH ROLLUP
LIMIT 5
SETTINGS max_threads = $max_threads,
group_by_two_level_threshold = $((expected_size_hint + 1)),
group_by_two_level_threshold_bytes = $((table_size * 1000))
FORMAT Null;"
run_query
run_query
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
print_border
}
test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
WITH ROLLUP
LIMIT 5
SETTINGS max_threads = $max_threads,
group_by_two_level_threshold = $expected_size_hint,
group_by_two_level_threshold_bytes = $((table_size * 1000))
FORMAT Null;"
run_query
run_query
check_convertion_to_two_level
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
print_border
}
test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
WITH CUBE
LIMIT 5
SETTINGS max_threads = $max_threads,
group_by_two_level_threshold = $((expected_size_hint + 1)),
group_by_two_level_threshold_bytes = $((table_size * 1000))
FORMAT Null;"
run_query
run_query
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
print_border
}
test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht() {
expected_size_hint=$table_size
prepare_table
query="
-- size_hint = $expected_size_hint despite the presence of limit --
SELECT number
FROM $table_name
GROUP BY number
WITH CUBE
LIMIT 5
SETTINGS max_threads = $max_threads,
group_by_two_level_threshold = $expected_size_hint,
group_by_two_level_threshold_bytes = $((table_size * 1000))
FORMAT Null;"
run_query
run_query
check_convertion_to_two_level
check_preallocated_elements $((expected_size_hint / max_threads)) $((expected_size_hint * max_threads))
print_border
}