Apply preallocation optimisation more carefully (#44455)

* impl

* add perf test

* fix

* review fixes
This commit is contained in:
Nikita Taranov 2023-01-09 13:30:48 +01:00 committed by GitHub
parent 857799fbca
commit 006fdd32d4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 34 additions and 3 deletions

View File

@ -233,7 +233,8 @@ void initDataVariantsWithSizeHint(
stats_collecting_params.max_size_to_preallocate_for_aggregation,
hint->median_size * max_threads);
}
else
/// https://github.com/ClickHouse/ClickHouse/issues/44402#issuecomment-1359920703
else if ((max_threads > 1 && hint->sum_of_sizes > 100'000) || hint->sum_of_sizes > 500'000)
{
const auto adjusted = std::max(lower_limit, hint->median_size);
if (worthConvertToTwoLevel(

View File

@ -0,0 +1,30 @@
<test>
<settings>
<collect_hash_table_stats_during_aggregation>1</collect_hash_table_stats_during_aggregation>
</settings>
<substitutions>
<substitution>
<name>size</name>
<values>
<value>100000</value>
<value>200000</value>
<value>300000</value>
<value>400000</value>
<value>500000</value>
<value>600000</value>
<value>700000</value>
<value>800000</value>
<value>900000</value>
<value>1000000</value>
<value>1500000</value>
<value>2000000</value>
<value>2500000</value>
<value>3000000</value>
</values>
</substitution>
</substitutions>
<query>select number from numbers({size}) group by number format Null</query>
<query>select number from numbers_mt({size}) group by number format Null</query>
</test>

View File

@ -11,7 +11,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# tests rely on that all the rows are unique and max_threads divides table_size
table_size=10000
table_size=1000005
max_threads=5

View File

@ -13,7 +13,7 @@ CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
# tests rely on that all the rows are unique and max_threads divides table_size
table_size=10000
table_size=1000005
max_threads=5