Merge pull request #39385 from jawm/jawm/direct-dicthas

Remove broken optimisation in Direct dictionary dictHas implementation
This commit is contained in:
Maksim Kita 2022-07-21 11:43:48 +02:00 committed by GitHub
commit baea3b01b1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 128 additions and 17 deletions

View File

@ -171,15 +171,6 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
auto requested_keys = requested_keys_extractor.extractAllKeys();
size_t requested_keys_size = requested_keys.size();
HashMap<KeyType, size_t> requested_key_to_index;
requested_key_to_index.reserve(requested_keys_size);
for (size_t i = 0; i < requested_keys.size(); ++i)
{
auto requested_key = requested_keys[i];
requested_key_to_index[requested_key] = i;
}
auto result = ColumnUInt8::create(requested_keys_size, false);
auto & result_data = result->getData();
@ -205,15 +196,17 @@ ColumnUInt8::Ptr DirectDictionary<dictionary_key_type>::hasKeys(
{
auto block_key = block_keys_extractor.extractCurrentKey();
const auto * it = requested_key_to_index.find(block_key);
assert(it);
size_t index;
for (index = 0; index < requested_keys_size; ++index)
{
if (!result_data[index] && requested_keys[index] == block_key)
{
keys_found++;
result_data[index] = true;
size_t result_data_found_index = it->getMapped();
/// block_keys_size cannot be used, due to duplicates.
keys_found += !result_data[result_data_found_index];
result_data[result_data_found_index] = true;
block_keys_extractor.rollbackCurrentKey();
block_keys_extractor.rollbackCurrentKey();
}
}
}
block_key_columns.clear();

View File

@ -0,0 +1,62 @@
0
0
0
1
0
1
0
2
1
0
0 0 1
1 0 1
2 0 1
3 1 0
4 0 1
5 1 0
6 0 1
7 2 0
8 1 0
9 0 1
1
1
1
0
1
0
1
0
0
1
1
1
1
0
1
0
1
0
0
1
1
1
1
0
1
0
1
0
0
1
value_0
value_0
value_0
UNKNOWN
value_0
UNKNOWN
value_0
UNKNOWN
UNKNOWN
value_0
4 0
6 1

View File

@ -0,0 +1,56 @@
-- Tags: no-backward-compatibility-check
DROP DATABASE IF EXISTS 02366_dictionary_db;
CREATE DATABASE 02366_dictionary_db;
CREATE TABLE 02366_dictionary_db.dict_data
(
id UInt64,
val String
)
ENGINE = Memory;
CREATE TABLE 02366_dictionary_db.lookup_data
(
id UInt64,
lookup_key UInt64,
)
ENGINE = Memory;
INSERT INTO 02366_dictionary_db.dict_data VALUES(0, 'value_0');
INSERT INTO 02366_dictionary_db.lookup_data VALUES(0, 0);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(1, 0);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(2, 0);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(3, 1);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(4, 0);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(5, 1);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(6, 0);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(7, 2);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(8, 1);
INSERT INTO 02366_dictionary_db.lookup_data VALUES(9, 0);
CREATE DICTIONARY 02366_dictionary_db.dict0
(
id UInt64,
val String
)
PRIMARY KEY id
SOURCE(CLICKHOUSE(HOST 'localhost' PORT tcpPort() USER 'default' TABLE 'dict_data'))
LAYOUT(DIRECT());
SELECT lookup_key FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
SELECT id, lookup_key, dictHas(02366_dictionary_db.dict0, lookup_key) FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
-- Nesting this way seems to help it make all the lookups as a single block, although even then it isn't guaranteed
SELECT dictHas(02366_dictionary_db.dict0, lk) FROM (SELECT any(lookup_key) as lk FROM 02366_dictionary_db.lookup_data group by id ORDER BY id ASC);
-- Same with this group by
SELECT dictHas(02366_dictionary_db.dict0, any(lookup_key)) FROM 02366_dictionary_db.lookup_data GROUP BY id ORDER BY id ASC;
SELECT dictHas(02366_dictionary_db.dict0, lookup_key) FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
SELECT dictGetOrDefault(02366_dictionary_db.dict0, 'val', lookup_key, 'UNKNOWN') FROM 02366_dictionary_db.lookup_data ORDER BY id ASC;
SELECT count(), has FROM 02366_dictionary_db.lookup_data group by dictHas(02366_dictionary_db.dict0, lookup_key) as has;
DROP DICTIONARY 02366_dictionary_db.dict0;
DROP TABLE 02366_dictionary_db.lookup_data;
DROP TABLE 02366_dictionary_db.dict_data;