Merge pull request #34911 from larspars/master

Allow LowCardinality strings for ngrambf_v1/tokenbf_v1 indexes. Fixes #21865
This commit is contained in:
Maksim Kita 2022-03-04 19:17:48 +01:00 committed by GitHub
commit 7ae1f0fa3b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 99 additions and 1 deletions

View File

@ -710,9 +710,14 @@ void bloomFilterIndexValidator(const IndexDescription & index, bool /*attach*/)
const auto & array_type = assert_cast<const DataTypeArray &>(*index_data_type);
data_type = WhichDataType(array_type.getNestedType());
}
else if (data_type.isLowCarnality())
{
const auto & low_cardinality = assert_cast<const DataTypeLowCardinality &>(*index_data_type);
data_type = WhichDataType(low_cardinality.getDictionaryType());
}
if (!data_type.isString() && !data_type.isFixedString())
throw Exception("Bloom filter index can be used only with `String`, `FixedString` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY);
throw Exception("Bloom filter index can be used only with `String`, `FixedString`, `LowCardinality(String)`, `LowCardinality(FixedString)` column or Array with `String` or `FixedString` values column.", ErrorCodes::INCORRECT_QUERY);
}
if (index.type == NgramTokenExtractor::getName())

View File

@ -0,0 +1,24 @@
lc_bf_tokenbf
1 K1 K1ZZZZZZ
2 K2 K2ZZZZZZ
lc_fixed_bf_tokenbf
1 K1 K1ZZZZZZ
2 K2 K2ZZZZZZ
lc_ngram
1 K1 K1ZZZZZZ
2 K2 K2ZZZZZZ
lc_fixed_ngram
1 K1 K1ZZZZZZ
2 K2 K2ZZZZZZ
lc_bf_tokenbf
3 abCD3ef abCD3ef\0
4 abCD4ef abCD4ef\0
lc_fixed_bf_tokenbf
3 abCD3ef abCD3ef\0
4 abCD4ef abCD4ef\0
lc_ngram
3 abCD3ef abCD3ef\0
4 abCD4ef abCD4ef\0
lc_fixed_ngram
3 abCD3ef abCD3ef\0
4 abCD4ef abCD4ef\0

View File

@ -0,0 +1,69 @@
DROP TABLE IF EXISTS bf_tokenbf_lowcard_test;
DROP TABLE IF EXISTS bf_ngram_lowcard_test;
CREATE TABLE bf_tokenbf_lowcard_test
(
row_id UInt32,
lc LowCardinality(String),
lc_fixed LowCardinality(FixedString(8)),
INDEX lc_bf_tokenbf lc TYPE tokenbf_v1(256,2,0) GRANULARITY 1,
INDEX lc_fixed_bf_tokenbf lc_fixed TYPE tokenbf_v1(256,2,0) GRANULARITY 1
) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 1;
CREATE TABLE bf_ngram_lowcard_test
(
row_id UInt32,
lc LowCardinality(String),
lc_fixed LowCardinality(FixedString(8)),
INDEX lc_ngram lc TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1,
INDEX lc_fixed_ngram lc_fixed TYPE ngrambf_v1(4,256,2,0) GRANULARITY 1
) Engine=MergeTree() ORDER BY row_id SETTINGS index_granularity = 1;
INSERT INTO bf_tokenbf_lowcard_test VALUES (1, 'K1', 'K1ZZZZZZ'), (2, 'K2', 'K2ZZZZZZ');
INSERT INTO bf_ngram_lowcard_test VALUES (1, 'K1', 'K1ZZZZZZ'), (2, 'K2', 'K2ZZZZZZ');
INSERT INTO bf_tokenbf_lowcard_test VALUES (3, 'abCD3ef', 'abCD3ef'), (4, 'abCD4ef', 'abCD4ef');
INSERT INTO bf_ngram_lowcard_test VALUES (3, 'abCD3ef', 'abCD3ef'), (4, 'abCD4ef', 'abCD4ef');
SELECT 'lc_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K1') SETTINGS force_data_skipping_indices='lc_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K2') SETTINGS force_data_skipping_indices='lc_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, 'K3') SETTINGS force_data_skipping_indices='lc_bf_tokenbf';
SELECT 'lc_fixed_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K1ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K2ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, 'K3ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf';
SELECT 'lc_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K1') SETTINGS force_data_skipping_indices='lc_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K2') SETTINGS force_data_skipping_indices='lc_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, 'K3') SETTINGS force_data_skipping_indices='lc_ngram';
SELECT 'lc_fixed_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K1ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K2ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, 'K3ZZZZZZ') SETTINGS force_data_skipping_indices='lc_fixed_ngram';
SELECT 'lc_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD3%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD4%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc, '%CD5%') SETTINGS force_data_skipping_indices='lc_bf_tokenbf';
SELECT 'lc_fixed_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD3%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD4%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf';
SELECT * FROM bf_tokenbf_lowcard_test WHERE like(lc_fixed, '%CD5%') SETTINGS force_data_skipping_indices='lc_fixed_bf_tokenbf';
SELECT 'lc_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD3%') SETTINGS force_data_skipping_indices='lc_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD4%') SETTINGS force_data_skipping_indices='lc_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc, '%CD5%') SETTINGS force_data_skipping_indices='lc_ngram';
SELECT 'lc_fixed_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD3%') SETTINGS force_data_skipping_indices='lc_fixed_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD4%') SETTINGS force_data_skipping_indices='lc_fixed_ngram';
SELECT * FROM bf_ngram_lowcard_test WHERE like(lc_fixed, '%CD5%') SETTINGS force_data_skipping_indices='lc_fixed_ngram';
DROP TABLE bf_tokenbf_lowcard_test;
DROP TABLE bf_ngram_lowcard_test;