From 016d6653285225575fa692e915f3bf6dd07e5f16 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Tue, 28 May 2024 19:40:59 +0100 Subject: [PATCH] Revert "Remove 02151_hash_table_sizes_stats_distributed (fixes broken CI) (#54969)" This reverts commit aa5c02c1be5e84c502b1b44c1164853d1f662199. --- ...sh_table_sizes_stats_distributed.reference | 33 +++++++ ...2151_hash_table_sizes_stats_distributed.sh | 95 +++++++++++++++++++ 2 files changed, 128 insertions(+) create mode 100644 tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.reference create mode 100755 tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.reference b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.reference new file mode 100644 index 00000000000..0d10114f4ff --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.reference @@ -0,0 +1,33 @@ +1 +1 +-- +1 +1 +-- +1 +1 +-- +1 +1 +-- +1 +1 +1 +1 +-- +1 +1 +-- +1 +1 +1 +1 +-- +1 +1 +-- +1 +1 +1 +1 +-- diff --git a/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh new file mode 100755 index 00000000000..77b9b2942c5 --- /dev/null +++ b/tests/queries/0_stateless/02151_hash_table_sizes_stats_distributed.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash +# Tags: long, distributed, no-tsan, no-debug + +# These tests don't use `current_database = currentDatabase()` condition, because database name isn't propagated during remote queries. + +# shellcheck disable=SC2154 + +CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CURDIR"/../shell_config.sh + + +# tests rely on that all the rows are unique and max_threads divides table_size +table_size=1000005 +max_threads=5 + + +prepare_table() { + table_name="t_hash_table_sizes_stats_$RANDOM$RANDOM" + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS $table_name;" + if [ -z "$1" ]; then + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY tuple() SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" + else + $CLICKHOUSE_CLIENT -q "CREATE TABLE $table_name(number UInt64) Engine=MergeTree() ORDER BY $1 SETTINGS index_granularity = 8192, index_granularity_bytes = '10Mi';" + fi + $CLICKHOUSE_CLIENT -q "SYSTEM STOP MERGES $table_name;" + for ((i = 1; i <= max_threads; i++)); do + cnt=$((table_size / max_threads)) + from=$(((i - 1) * cnt)) + $CLICKHOUSE_CLIENT -q "INSERT INTO $table_name SELECT * FROM numbers($from, $cnt);" + done + $CLICKHOUSE_CLIENT -q "DROP TABLE IF EXISTS ${table_name}_d;" + $CLICKHOUSE_CLIENT -q "CREATE TABLE ${table_name}_d AS $table_name ENGINE = Distributed(test_cluster_two_shards, currentDatabase(), $table_name);" + table_name="${table_name}_d" +} + +prepare_table_with_sorting_key() { + prepare_table "$1" +} + +run_query() { + query_id="${CLICKHOUSE_DATABASE}_hash_table_sizes_stats_$RANDOM$RANDOM" + $CLICKHOUSE_CLIENT --query_id="$query_id" --multiquery -q " + SET max_block_size = $((table_size / 10)); + SET merge_tree_min_rows_for_concurrent_read = 1; + SET max_untracked_memory = 0; + SET prefer_localhost_replica = 1; + $query" +} + +check_preallocated_elements() { + $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + # rows may be distributed in any way including "everything goes to the one particular thread" + min=$1 + if [ -z "$2" ]; then + max=$1 + else + max=$2 + fi + $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q " + SELECT COUNT(*) + FROM system.query_log + WHERE event_date >= yesterday() AND (query_id = {query_id:String} OR initial_query_id = {query_id:String}) + AND ProfileEvents['AggregationPreallocatedElementsInHashTables'] BETWEEN $min AND $max + GROUP BY query_id" +} + +check_convertion_to_two_level() { + $CLICKHOUSE_CLIENT -q "SYSTEM FLUSH LOGS" + # rows may be distributed in any way including "everything goes to the one particular thread" + $CLICKHOUSE_CLIENT --param_query_id="$query_id" -q " + SELECT SUM(ProfileEvents['AggregationHashTablesInitializedAsTwoLevel']) BETWEEN 1 AND $max_threads + FROM system.query_log + WHERE event_date >= yesterday() AND (query_id = {query_id:String} OR initial_query_id = {query_id:String}) + GROUP BY query_id" +} + +print_border() { + echo "--" +} + + +# shellcheck source=./02151_hash_table_sizes_stats.testcases +source "$CURDIR"/02151_hash_table_sizes_stats.testcases + + +test_one_thread_simple_group_by +test_one_thread_simple_group_by_with_limit +test_one_thread_simple_group_by_with_join_and_subquery +test_several_threads_simple_group_by_with_limit_single_level_ht +test_several_threads_simple_group_by_with_limit_two_level_ht +test_several_threads_simple_group_by_with_limit_and_rollup_single_level_ht +test_several_threads_simple_group_by_with_limit_and_rollup_two_level_ht +test_several_threads_simple_group_by_with_limit_and_cube_single_level_ht +test_several_threads_simple_group_by_with_limit_and_cube_two_level_ht