Merge pull request #11638 from azat/skip-idx-bloom-filter-fix

Fix bloom filters for String (data skipping indices)
This commit is contained in:
Alexander Kuzmenkov 2020-06-15 18:41:04 +03:00 committed by GitHub
commit e460d7c2bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 18 additions and 7 deletions

View File

@ -196,18 +196,17 @@ struct BloomFilterHash
const ColumnString::Chars & data = index_column->getChars();
const ColumnString::Offsets & offsets = index_column->getOffsets();
ColumnString::Offset current_offset = pos;
for (size_t index = 0, size = vec.size(); index < size; ++index)
{
ColumnString::Offset current_offset = offsets[index + pos - 1];
size_t length = offsets[index + pos] - current_offset - 1 /* terminating zero */;
UInt64 city_hash = CityHash_v1_0_2::CityHash64(
reinterpret_cast<const char *>(&data[current_offset]), offsets[index + pos] - current_offset - 1);
reinterpret_cast<const char *>(&data[current_offset]), length);
if constexpr (is_first)
vec[index] = city_hash;
else
vec[index] = CityHash_v1_0_2::Hash128to64(CityHash_v1_0_2::uint128(vec[index], city_hash));
current_offset = offsets[index + pos];
}
}
else if (const auto * fixed_string_index_column = typeid_cast<const ColumnFixedString *>(column))

View File

@ -43,7 +43,7 @@ SELECT COUNT() FROM bloom_filter_types_test WHERE f32 = 1 SETTINGS max_rows_to_r
SELECT COUNT() FROM bloom_filter_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_types_test WHERE str IN ( SELECT str FROM bloom_filter_types_test);
@ -122,7 +122,7 @@ SELECT COUNT() FROM bloom_filter_null_types_test WHERE f32 = 1 SETTINGS max_rows
SELECT COUNT() FROM bloom_filter_null_types_test WHERE f64 = 1 SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE date = '1970-01-02' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE date_time = toDateTime('1970-01-01 03:00:01', 'Europe/Moscow') SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_null_types_test WHERE isNull(i8);
@ -150,7 +150,7 @@ CREATE TABLE bloom_filter_lc_null_types_test (order_key UInt64, str LowCardinali
INSERT INTO bloom_filter_lc_null_types_test SELECT number AS order_key, toString(number) AS str, toFixedString(toString(number), 5) AS fixed_string FROM system.numbers LIMIT 100;
INSERT INTO bloom_filter_lc_null_types_test SELECT 0 AS order_key, NULL AS str, NULL AS fixed_string;
SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 6;
SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE str = '1' SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE fixed_string = toFixedString('1', 5) SETTINGS max_rows_to_read = 12;
SELECT COUNT() FROM bloom_filter_lc_null_types_test WHERE isNull(str);

View File

@ -0,0 +1,8 @@
DROP TABLE IF EXISTS test_01307;
CREATE TABLE test_01307 (id UInt64, val String, INDEX ind val TYPE bloom_filter() GRANULARITY 1) ENGINE = MergeTree() ORDER BY id SETTINGS index_granularity = 2;
INSERT INTO test_01307 (id, val) select number as id, toString(number) as val from numbers(4);
SELECT count() FROM test_01307 WHERE identity(val) = '2';
SELECT count() FROM test_01307 WHERE val = '2';
OPTIMIZE TABLE test_01307 FINAL;
SELECT count() FROM test_01307 WHERE identity(val) = '2';
SELECT count() FROM test_01307 WHERE val = '2';