mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-23 08:02:02 +00:00
Speed up reading uniqState (#41089)
* Speed up reading UniquesHashSet * Improve uniq serialization tests
This commit is contained in:
parent
fe4a485da8
commit
6dac509739
@ -424,14 +424,30 @@ public:
|
||||
|
||||
alloc(new_size_degree);
|
||||
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
if (m_size <= 1)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
if (x == 0)
|
||||
has_zero = true;
|
||||
else
|
||||
reinsertImpl(x);
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
if (x == 0)
|
||||
has_zero = true;
|
||||
else
|
||||
reinsertImpl(x);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto hs = std::make_unique<HashValue[]>(m_size);
|
||||
rb.readStrict(reinterpret_cast<char *>(hs.get()), m_size * sizeof(HashValue));
|
||||
|
||||
for (size_t i = 0; i < m_size; ++i)
|
||||
{
|
||||
if (hs[i] == 0)
|
||||
has_zero = true;
|
||||
else
|
||||
reinsertImpl(hs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -458,11 +474,24 @@ public:
|
||||
resize(new_size_degree);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < rhs_size; ++i)
|
||||
if (rhs_size <= 1)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
insertHash(x);
|
||||
for (size_t i = 0; i < rhs_size; ++i)
|
||||
{
|
||||
HashValue x = 0;
|
||||
DB::readIntBinary(x, rb);
|
||||
insertHash(x);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
auto hs = std::make_unique<HashValue[]>(rhs_size);
|
||||
rb.readStrict(reinterpret_cast<char *>(hs.get()), rhs_size * sizeof(HashValue));
|
||||
|
||||
for (size_t i = 0; i < rhs_size; ++i)
|
||||
{
|
||||
insertHash(hs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
58
tests/performance/uniq_stored.xml
Normal file
58
tests/performance/uniq_stored.xml
Normal file
@ -0,0 +1,58 @@
|
||||
<test>
|
||||
<create_query>
|
||||
create table matview_1
|
||||
(
|
||||
a String,
|
||||
b_count AggregateFunction(uniq, UInt64)
|
||||
) Engine=MergeTree partition by tuple()
|
||||
ORDER by tuple()
|
||||
SETTINGS index_granularity = 1024;
|
||||
</create_query>
|
||||
|
||||
<create_query>
|
||||
create table matview_10000
|
||||
(
|
||||
a String,
|
||||
b_count AggregateFunction(uniq, String)
|
||||
) Engine=MergeTree partition by tuple()
|
||||
ORDER by tuple()
|
||||
SETTINGS index_granularity = 1024;
|
||||
</create_query>
|
||||
|
||||
|
||||
<drop_query>DROP TABLE IF EXISTS matview_1</drop_query>
|
||||
<drop_query>DROP TABLE IF EXISTS matview_10000</drop_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO matview_10000
|
||||
SELECT a, uniqState(b) b_count
|
||||
FROM
|
||||
(
|
||||
SELECT toString(rand() % 1000) a, toString(number % 10000) b
|
||||
FROM numbers_mt(20000000)
|
||||
)
|
||||
GROUP BY a
|
||||
SETTINGS max_insert_threads=8;
|
||||
</fill_query>
|
||||
<fill_query>OPTIMIZE TABLE matview_10000 FINAL</fill_query>
|
||||
|
||||
<fill_query>
|
||||
INSERT INTO matview_1
|
||||
SELECT '1', uniqState(number) b_count
|
||||
FROM
|
||||
(
|
||||
SELECT *
|
||||
FROM numbers_mt(2000000)
|
||||
)
|
||||
GROUP BY number
|
||||
SETTINGS max_insert_threads=8;
|
||||
</fill_query>
|
||||
<fill_query>OPTIMIZE TABLE matview_1 FINAL</fill_query>
|
||||
|
||||
<!-- Test with ~10000 elements per state -->
|
||||
<query>select a, uniqMerge(b_count) as b_count from matview_10000 prewhere a='55' group by a FORMAT Null SETTINGS max_threads=1;</query>
|
||||
<query>select uniqMerge(b_count) as b_count from matview_10000 FORMAT Null SETTINGS max_threads=1;</query>
|
||||
|
||||
<!-- Test with ~1 elements per state -->
|
||||
<query>select uniqMerge(b_count) as b_count FROM matview_1 FORMAT Null SETTINGS max_threads=1;</query>
|
||||
</test>
|
Loading…
Reference in New Issue
Block a user