Merge pull request #16642 from kitaisreal/bloom-filter-index-added-big-int-and-uuid-support

BloomFilter index added big integers and UUID support
This commit is contained in:
Anton Popov 2020-11-05 15:26:08 +03:00 committed by GitHub
commit bea0984d12
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 55 additions and 4 deletions

View File

@ -38,7 +38,7 @@ struct BloomFilterHash
static UInt64 getNumberTypeHash(const Field & field)
{
/// For negative, we should convert the type to make sure the symbol is in right place
return field.isNull() ? intHash64(0) : intHash64(ext::bit_cast<UInt64>(FieldType(field.safeGet<FieldGetType>())));
return field.isNull() ? intHash64(0) : DefaultHash64<FieldType>(FieldType(field.safeGet<FieldGetType>()));
}
static UInt64 getStringTypeHash(const Field & field)
@ -79,16 +79,21 @@ struct BloomFilterHash
else if (which.isUInt16()) return build_hash_column(getNumberTypeHash<UInt64, UInt16>(field));
else if (which.isUInt32()) return build_hash_column(getNumberTypeHash<UInt64, UInt32>(field));
else if (which.isUInt64()) return build_hash_column(getNumberTypeHash<UInt64, UInt64>(field));
else if (which.isUInt128()) return build_hash_column(getNumberTypeHash<UInt128, UInt256>(field));
else if (which.isUInt256()) return build_hash_column(getNumberTypeHash<UInt256, UInt256>(field));
else if (which.isInt8()) return build_hash_column(getNumberTypeHash<Int64, Int8>(field));
else if (which.isInt16()) return build_hash_column(getNumberTypeHash<Int64, Int16>(field));
else if (which.isInt32()) return build_hash_column(getNumberTypeHash<Int64, Int32>(field));
else if (which.isInt64()) return build_hash_column(getNumberTypeHash<Int64, Int64>(field));
else if (which.isInt128()) return build_hash_column(getNumberTypeHash<Int128, Int128>(field));
else if (which.isInt256()) return build_hash_column(getNumberTypeHash<Int256, Int256>(field));
else if (which.isEnum8()) return build_hash_column(getNumberTypeHash<Int64, Int8>(field));
else if (which.isEnum16()) return build_hash_column(getNumberTypeHash<Int64, Int16>(field));
else if (which.isDate()) return build_hash_column(getNumberTypeHash<UInt64, UInt16>(field));
else if (which.isDateTime()) return build_hash_column(getNumberTypeHash<UInt64, UInt32>(field));
else if (which.isFloat32()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
else if (which.isFloat64()) return build_hash_column(getNumberTypeHash<Float64, Float64>(field));
else if (which.isUUID()) return build_hash_column(getNumberTypeHash<UInt128, UInt128>(field));
else if (which.isString()) return build_hash_column(getStringTypeHash(field));
else if (which.isFixedString()) return build_hash_column(getFixedStringTypeHash(field, data_type));
else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
@ -135,16 +140,21 @@ struct BloomFilterHash
else if (which.isUInt16()) getNumberTypeHash<UInt16, is_first>(column, vec, pos);
else if (which.isUInt32()) getNumberTypeHash<UInt32, is_first>(column, vec, pos);
else if (which.isUInt64()) getNumberTypeHash<UInt64, is_first>(column, vec, pos);
else if (which.isUInt128()) getNumberTypeHash<UInt128, is_first>(column, vec, pos);
else if (which.isUInt256()) getNumberTypeHash<UInt256, is_first>(column, vec, pos);
else if (which.isInt8()) getNumberTypeHash<Int8, is_first>(column, vec, pos);
else if (which.isInt16()) getNumberTypeHash<Int16, is_first>(column, vec, pos);
else if (which.isInt32()) getNumberTypeHash<Int32, is_first>(column, vec, pos);
else if (which.isInt64()) getNumberTypeHash<Int64, is_first>(column, vec, pos);
else if (which.isInt128()) getNumberTypeHash<Int128, is_first>(column, vec, pos);
else if (which.isInt256()) getNumberTypeHash<Int256, is_first>(column, vec, pos);
else if (which.isEnum8()) getNumberTypeHash<Int8, is_first>(column, vec, pos);
else if (which.isEnum16()) getNumberTypeHash<Int16, is_first>(column, vec, pos);
else if (which.isDate()) getNumberTypeHash<UInt16, is_first>(column, vec, pos);
else if (which.isDateTime()) getNumberTypeHash<UInt32, is_first>(column, vec, pos);
else if (which.isFloat32()) getNumberTypeHash<Float32, is_first>(column, vec, pos);
else if (which.isFloat64()) getNumberTypeHash<Float64, is_first>(column, vec, pos);
else if (which.isUUID()) getNumberTypeHash<UInt128, is_first>(column, vec, pos);
else if (which.isString()) getStringTypeHash<is_first>(column, vec, pos);
else if (which.isFixedString()) getStringTypeHash<is_first>(column, vec, pos);
else throw Exception("Unexpected type " + data_type->getName() + " of bloom filter index.", ErrorCodes::BAD_ARGUMENTS);
@ -166,7 +176,7 @@ struct BloomFilterHash
{
for (size_t index = 0, size = vec.size(); index < size; ++index)
{
UInt64 hash = intHash64(ext::bit_cast<UInt64>(Float64(vec_from[index + pos])));
UInt64 hash = DefaultHash64<Float64>(Float64(vec_from[index + pos]));
if constexpr (is_first)
vec[index] = hash;
@ -178,7 +188,7 @@ struct BloomFilterHash
{
for (size_t index = 0, size = vec.size(); index < size; ++index)
{
UInt64 hash = intHash64(ext::bit_cast<UInt64>(vec_from[index + pos]));
UInt64 hash = DefaultHash64<Type>(vec_from[index + pos]);
if constexpr (is_first)
vec[index] = hash;

View File

@ -152,10 +152,14 @@ Field convertFieldToTypeImpl(const Field & src, const IDataType & type, const ID
if (which_type.isUInt16()) return convertNumericType<UInt16>(src, type);
if (which_type.isUInt32()) return convertNumericType<UInt32>(src, type);
if (which_type.isUInt64()) return convertNumericType<UInt64>(src, type);
if (which_type.isUInt128()) return convertNumericType<UInt128>(src, type);
if (which_type.isUInt256()) return convertNumericType<UInt256>(src, type);
if (which_type.isInt8()) return convertNumericType<Int8>(src, type);
if (which_type.isInt16()) return convertNumericType<Int16>(src, type);
if (which_type.isInt32()) return convertNumericType<Int32>(src, type);
if (which_type.isInt64()) return convertNumericType<Int64>(src, type);
if (which_type.isInt128()) return convertNumericType<Int128>(src, type);
if (which_type.isInt256()) return convertNumericType<Int256>(src, type);
if (which_type.isFloat32()) return convertNumericType<Float32>(src, type);
if (which_type.isFloat64()) return convertNumericType<Float64>(src, type);
if (const auto * ptype = typeid_cast<const DataTypeDecimal<Decimal32> *>(&type)) return convertDecimalType(src, *ptype);

View File

@ -85,7 +85,7 @@ static void assertIndexColumnsType(const Block & header)
WhichDataType which(actual_type);
if (!which.isUInt() && !which.isInt() && !which.isString() && !which.isFixedString() && !which.isFloat() &&
!which.isDateOrDateTime() && !which.isEnum())
!which.isDateOrDateTime() && !which.isEnum() && !which.isUUID())
throw Exception("Unexpected type " + type->getName() + " of bloom filter index.",
ErrorCodes::ILLEGAL_COLUMN);
}

View File

@ -0,0 +1,12 @@
1
1
2
1
1
2
1
1
2
00000000-0000-0001-0000-000000000000
00000000-0000-0001-0000-000000000000
00000000-0000-0002-0000-000000000000

View File

@ -0,0 +1,25 @@
SET allow_experimental_bigint_types = 1;
CREATE TABLE 01154_test (x Int128, INDEX ix_x x TYPE bloom_filter(0.01) GRANULARITY 1) ENGINE = MergeTree() ORDER BY x SETTINGS index_granularity=8192;
INSERT INTO 01154_test VALUES (1), (2), (3);
SELECT x FROM 01154_test WHERE x = 1;
SELECT x FROM 01154_test WHERE x IN (1, 2);
DROP TABLE 01154_test;
CREATE TABLE 01154_test (x Int256, INDEX ix_x x TYPE bloom_filter(0.01) GRANULARITY 1) ENGINE = MergeTree() ORDER BY x SETTINGS index_granularity=8192;
INSERT INTO 01154_test VALUES (1), (2), (3);
SELECT x FROM 01154_test WHERE x = 1;
SELECT x FROM 01154_test WHERE x IN (1, 2);
DROP TABLE 01154_test;
CREATE TABLE 01154_test (x UInt256, INDEX ix_x x TYPE bloom_filter(0.01) GRANULARITY 1) ENGINE = MergeTree() ORDER BY x SETTINGS index_granularity=8192;
INSERT INTO 01154_test VALUES (1), (2), (3);
SELECT x FROM 01154_test WHERE x = 1;
SELECT x FROM 01154_test WHERE x IN (1, 2);
DROP TABLE 01154_test;
CREATE TABLE 01154_test (x UUID, INDEX ix_x x TYPE bloom_filter(0.01) GRANULARITY 1) ENGINE = MergeTree() ORDER BY x SETTINGS index_granularity=8192;
INSERT INTO 01154_test VALUES (toUUID(1)), (toUUID(2)), (toUUID(3));
SELECT x FROM 01154_test WHERE x = toUUID(1);
SELECT x FROM 01154_test WHERE x IN (toUUID(1), toUUID(2));
DROP TABLE 01154_test;