From 715649900166b1d1b8aaefce215c9a80e6d60f69 Mon Sep 17 00:00:00 2001 From: Nikita Taranov Date: Sun, 10 Nov 2024 19:16:19 +0100 Subject: [PATCH] don't reserve too much --- src/Interpreters/HashJoin/AddedColumns.h | 2 +- tests/performance/hashjoin_with_large_output.xml | 1 + tests/performance/scripts/perf.py | 3 ++- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h index 8316d5df00f..885c1baca8c 100644 --- a/src/Interpreters/HashJoin/AddedColumns.h +++ b/src/Interpreters/HashJoin/AddedColumns.h @@ -169,7 +169,7 @@ public: return; /// Do not allow big allocations when user set max_joined_block_rows to huge value - size_t reserve_size = std::min(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); /// rows_to_add + size_t reserve_size = std::min(max_joined_block_rows, rows_to_add * 2); if (need_replicate) /// Reserve 10% more space for columns, because some rows can be repeated diff --git a/tests/performance/hashjoin_with_large_output.xml b/tests/performance/hashjoin_with_large_output.xml index f4b61c15f82..1eb351255d4 100644 --- a/tests/performance/hashjoin_with_large_output.xml +++ b/tests/performance/hashjoin_with_large_output.xml @@ -9,6 +9,7 @@ settings join_algorithm='hash' + join_algorithm='parallel_hash' join_algorithm='grace_hash' diff --git a/tests/performance/scripts/perf.py b/tests/performance/scripts/perf.py index 9931178fcb4..e4a599cc78d 100755 --- a/tests/performance/scripts/perf.py +++ b/tests/performance/scripts/perf.py @@ -478,6 +478,8 @@ for query_index in queries_to_run: client_seconds = time.perf_counter() - start_seconds print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}") + median = [statistics.median(t) for t in all_server_times] + print(f"median\t{query_index}\t{median[0]}") # Run additional profiling queries to collect profile data, but only if test times appeared to be different. # We have to do it after normal runs because otherwise it will affect test statistics too much @@ -491,7 +493,6 @@ for query_index in queries_to_run: pvalue = stats.ttest_ind( all_server_times[0], all_server_times[1], equal_var=False ).pvalue - median = [statistics.median(t) for t in all_server_times] # Keep this consistent with the value used in report. Should eventually move # to (median[1] - median[0]) / min(median), which is compatible with "times" # difference we use in report (max(median) / min(median)).