mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-22 01:30:51 +00:00
Fix bloom filters for String (data skipping indices)
bloom filter was broken for the first element, if all of the following conditions satisfied: - they are created on INSERT (in thie case bloom filter hashing uses offsets, in case of OPTIMIZE it does not, since it already has granulars). - the record is not the first in the block - the record is the first per index_granularity (do not confuse this with data skipping index GRANULARITY). - type of the field for indexing is "String" (not FixedString) Because in this case there was incorrect length and *data* for that string.
This commit is contained in:
parent
fc8da2abd3
commit
7f52b615e0
@ -196,18 +196,17 @@ struct BloomFilterHash
|
||||
const ColumnString::Chars & data = index_column->getChars();
|
||||
const ColumnString::Offsets & offsets = index_column->getOffsets();
|
||||
|
||||
ColumnString::Offset current_offset = pos;
|
||||
for (size_t index = 0, size = vec.size(); index < size; ++index)
|
||||
{
|
||||
ColumnString::Offset current_offset = offsets[index + pos - 1];
|
||||
size_t length = offsets[index + pos] - current_offset - 1 /* terminating zero */;
|
||||
UInt64 city_hash = CityHash_v1_0_2::CityHash64(
|
||||
reinterpret_cast<const char *>(&data[current_offset]), offsets[index + pos] - current_offset - 1);
|
||||
reinterpret_cast<const char *>(&data[current_offset]), length);
|
||||
|
||||
if constexpr (is_first)
|
||||
vec[index] = city_hash;
|
||||
else
|
||||
vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash));
|
||||
|
||||
current_offset = offsets[index + pos];
|
||||
}
|
||||
}
|
||||
else if (const auto * fixed_string_index_column = typeid_cast<const ColumnFixedString *>(column))
|
||||
|
@ -0,0 +1,4 @@
|
||||
1
|
||||
1
|
||||
1
|
||||
1
|
@ -0,0 +1,8 @@
|
||||
DROP TABLE IF EXISTS test_01307;
|
||||
CREATE TABLE test_01307 (id UInt64, val String, INDEX ind val TYPE bloom_filter() GRANULARITY 1) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 2;
|
||||
INSERT INTO test_01307 (id, val) select number as id, toString(number) as val from numbers(4);
|
||||
SELECT count() FROM test_01307 WHERE identity(val) = '2';
|
||||
SELECT count() FROM test_01307 WHERE val = '2';
|
||||
OPTIMIZE TABLE test_01307 FINAL;
|
||||
SELECT count() FROM test_01307 WHERE identity(val) = '2';
|
||||
SELECT count() FROM test_01307 WHERE val = '2';
|
Loading…
Reference in New Issue
Block a user