mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-19 22:22:00 +00:00
DirectDictionary improve performance of dictHas with duplicate keys
This commit is contained in:
parent
9e9969cea7
commit
6443116e80
@ -171,13 +171,13 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
|
||||
auto requested_keys = requested_keys_extractor.extractAllKeys();
|
||||
size_t requested_keys_size = requested_keys.size();
|
||||
|
||||
HashMap<KeyType, size_t> requested_key_to_index;
|
||||
HashMap<KeyType, PaddedPODArray<size_t>> requested_key_to_index;
|
||||
requested_key_to_index.reserve(requested_keys_size);
|
||||
|
||||
for (size_t i = 0; i < requested_keys.size(); ++i)
|
||||
{
|
||||
auto requested_key = requested_keys[i];
|
||||
requested_key_to_index[requested_key] = i;
|
||||
requested_key_to_index[requested_key].push_back(i);
|
||||
}
|
||||
|
||||
auto result = ColumnUInt8::create(requested_keys_size, false);
|
||||
@ -208,10 +208,13 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
|
||||
const auto * it = requested_key_to_index.find(block_key);
|
||||
assert(it);
|
||||
|
||||
size_t result_data_found_index = it->getMapped();
|
||||
/// block_keys_size cannot be used, due to duplicates.
|
||||
keys_found += !result_data[result_data_found_index];
|
||||
result_data[result_data_found_index] = true;
|
||||
auto & result_data_found_indexes = it->getMapped();
|
||||
for (size_t result_data_found_index : result_data_found_indexes)
|
||||
{
|
||||
/// block_keys_size cannot be used, due to duplicates.
|
||||
keys_found += !result_data[result_data_found_index];
|
||||
result_data[result_data_found_index] = true;
|
||||
}
|
||||
|
||||
block_keys_extractor.rollbackCurrentKey();
|
||||
}
|
||||
|
@ -0,0 +1,62 @@
|
||||
0
|
||||
0
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
2
|
||||
1
|
||||
0
|
||||
0 0 1
|
||||
1 0 1
|
||||
2 0 1
|
||||
3 1 0
|
||||
4 0 1
|
||||
5 1 0
|
||||
6 0 1
|
||||
7 2 0
|
||||
8 1 0
|
||||
9 0 1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
1
|
||||
0
|
||||
0
|
||||
1
|
||||
value_0
|
||||
value_0
|
||||
value_0
|
||||
UNKNOWN
|
||||
value_0
|
||||
UNKNOWN
|
||||
value_0
|
||||
UNKNOWN
|
||||
UNKNOWN
|
||||
value_0
|
||||
4 0
|
||||
6 1
|
@ -0,0 +1,56 @@
|
||||
-- Tags: no-backward-compatibility-check
|
||||
DROP DATABASE IF EXISTS 02366_dictionary_db;
|
||||
CREATE DATABASE 02366_dictionary_db;
|
||||
|
||||
CREATE TABLE 02366_dictionary_db.dict_data
|
||||
(
|
||||
id UInt64,
|
||||
val String
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
CREATE TABLE 02366_dictionary_db.lookup_data
|
||||
(
|
||||
id UInt64,
|
||||
lookup_key UInt64,
|
||||
)
|
||||
ENGINE = Memory;
|
||||
|
||||
INSERT INTO 02366_dictionary_db.dict_data VALUES(0, 'value_0');
|
||||
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(0, 0);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(1, 0);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(2, 0);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(3, 1);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(4, 0);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(5, 1);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(6, 0);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(7, 2);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(8, 1);
|
||||
INSERT INTO 02366_dictionary_db.lookup_data VALUES(9, 0);
|
||||
|
||||
CREATE DICTIONARY 02366_dictionary_db.dict0
|
||||
(
|
||||
id UInt64,
|
||||
val String
|
||||
)
|
||||
PRIMARY KEY id
|
||||
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data'))
|
||||
LAYOUT(DIRECT());
|
||||
|
||||
SELECT lookup_key FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
|
||||
SELECT id, lookup_key, dictHas(02366_dictionary_db.dict0, lookup_key) FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
|
||||
|
||||
-- Nesting this way seems to help it make all the lookups as a single block, although even then it isn't guaranteed
|
||||
SELECT dictHas(02366_dictionary_db.dict0, lk) FROM (SELECT any(lookup_key) as lk FROM 02366_dictionary_db.lookup_data group by id ORDER BY id ASC);
|
||||
-- Same with this group by
|
||||
SELECT dictHas(02366_dictionary_db.dict0, any(lookup_key)) FROM 02366_dictionary_db.lookup_data GROUP BY id ORDER BY id ASC;
|
||||
|
||||
|
||||
SELECT dictHas(02366_dictionary_db.dict0, lookup_key) FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
|
||||
SELECT dictGetOrDefault(02366_dictionary_db.dict0, 'val', lookup_key, 'UNKNOWN') FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
|
||||
SELECT count(), has FROM 02366_dictionary_db.lookup_data group by dictHas(02366_dictionary_db.dict0, lookup_key) as has;
|
||||
|
||||
DROP DICTIONARY 02366_dictionary_db.dict0;
|
||||
DROP TABLE 02366_dictionary_db.lookup_data;
|
||||
DROP TABLE 02366_dictionary_db.dict_data;
|
Loading…
Reference in New Issue
Block a user