don't reserve too much

This commit is contained in:
Nikita Taranov 2024-11-10 19:16:19 +01:00
parent 0a79fd0c40
commit 7156499001
3 changed files with 4 additions and 2 deletions

View File

@ -169,7 +169,7 @@ public:
return; return;
/// Do not allow big allocations when user set max_joined_block_rows to huge value /// Do not allow big allocations when user set max_joined_block_rows to huge value
size_t reserve_size = std::min<size_t>(max_joined_block_rows, DEFAULT_BLOCK_SIZE * 2); /// rows_to_add size_t reserve_size = std::min<size_t>(max_joined_block_rows, rows_to_add * 2);
if (need_replicate) if (need_replicate)
/// Reserve 10% more space for columns, because some rows can be repeated /// Reserve 10% more space for columns, because some rows can be repeated

View File

@ -9,6 +9,7 @@
<name>settings</name> <name>settings</name>
<values> <values>
<value>join_algorithm='hash'</value> <value>join_algorithm='hash'</value>
<value>join_algorithm='parallel_hash'</value>
<value>join_algorithm='grace_hash'</value> <value>join_algorithm='grace_hash'</value>
</values> </values>
</substitution> </substitution>

View File

@ -478,6 +478,8 @@ for query_index in queries_to_run:
client_seconds = time.perf_counter() - start_seconds client_seconds = time.perf_counter() - start_seconds
print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}") print(f"client-time\t{query_index}\t{client_seconds}\t{server_seconds}")
median = [statistics.median(t) for t in all_server_times]
print(f"median\t{query_index}\t{median[0]}")
# Run additional profiling queries to collect profile data, but only if test times appeared to be different. # Run additional profiling queries to collect profile data, but only if test times appeared to be different.
# We have to do it after normal runs because otherwise it will affect test statistics too much # We have to do it after normal runs because otherwise it will affect test statistics too much
@ -491,7 +493,6 @@ for query_index in queries_to_run:
pvalue = stats.ttest_ind( pvalue = stats.ttest_ind(
all_server_times[0], all_server_times[1], equal_var=False all_server_times[0], all_server_times[1], equal_var=False
).pvalue ).pvalue
median = [statistics.median(t) for t in all_server_times]
# Keep this consistent with the value used in report. Should eventually move # Keep this consistent with the value used in report. Should eventually move
# to (median[1] - median[0]) / min(median), which is compatible with "times" # to (median[1] - median[0]) / min(median), which is compatible with "times"
# difference we use in report (max(median) / min(median)). # difference we use in report (max(median) / min(median)).