From 715649900166b1d1b8aaefce215c9a80e6d60f69 Mon Sep 17 00:00:00 2001
From: Nikita Taranov <nikita.taranov@clickhouse.com>
Date: Sun, 10 Nov 2024 19:16:19 +0100
Subject: [PATCH] don't reserve too much

---
 src/Interpreters/HashJoin/AddedColumns.h         | 2 +-
 tests/performance/hashjoin_with_large_output.xml | 1 +
 tests/performance/scripts/perf.py                | 3 ++-
 3 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/Interpreters/HashJoin/AddedColumns.h b/src/Interpreters/HashJoin/AddedColumns.h
index 8316d5df00f..885c1baca8c 100644
--- a/src/Interpreters/HashJoin/AddedColumns.h
+++ b/src/Interpreters/HashJoin/AddedColumns.h
@@ -169,7 +169,7 @@ public:
             return;
 
         /// Do not allow big allocations when user set max_joined_block_rows to huge value
-        size_t reserve_size = std::min<size_t>(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); /// rows_to_add
+        size_t reserve_size = std::min<size_t>(max_joined_block_rows, rows_to_add * 2);
 
         if (need_replicate)
             /// Reserve 10% more space for columns, because some rows can be repeated
diff --git a/tests/performance/hashjoin_with_large_output.xml b/tests/performance/hashjoin_with_large_output.xml
index f4b61c15f82..1eb351255d4 100644
--- a/tests/performance/hashjoin_with_large_output.xml
+++ b/tests/performance/hashjoin_with_large_output.xml
@@ -9,6 +9,7 @@
             <name>settings</name>
             <values>
                 <value>join_algorithm='hash'</value>
+                <value>join_algorithm='parallel_hash'</value>
                 <value>join_algorithm='grace_hash'</value>
             </values>
         </substitution>
diff --git a/tests/performance/scripts/perf.py b/tests/performance/scripts/perf.py
index 9931178fcb4..e4a599cc78d 100755
--- a/tests/performance/scripts/perf.py
+++ b/tests/performance/scripts/perf.py
@@ -478,6 +478,8 @@ for query_index in queries_to_run:
 
     client_seconds = time.perf_counter() - start_seconds
     print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
+    median = [statistics.median(t) for t in all_server_times]
+    print(f"median\t{query_index}\t{median[0]}")
 
     # Run additional profiling queries to collect profile data, but only if test times appeared to be different.
     # We have to do it after normal runs because otherwise it will affect test statistics too much
@@ -491,7 +493,6 @@ for query_index in queries_to_run:
     pvalue = stats.ttest_ind(
         all_server_times[0], all_server_times[1], equal_var=False
     ).pvalue
-    median = [statistics.median(t) for t in all_server_times]
     # Keep this consistent with the value used in report. Should eventually move
     # to (median[1] - median[0]) / min(median), which is compatible with "times"
     # difference we use in report (max(median) / min(median)).