Merge pull request #48128 from ClickHouse/rs/follow-up-to-46252

Cosmetic follow-up to #46252
This commit is contained in:
Robert Schulze 2023-03-28 23:11:17 +02:00 committed by GitHub
commit b4b492450d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 31 additions and 32 deletions

View File

@ -482,10 +482,8 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
| hasTokenCaseInsensitiveOrNull (*) | ✗ | ✗ | ✗ | ✔ | ✗ | ✗ |
Functions with a constant argument that is less than ngram size cant be used by `ngrambf_v1` for query optimization.
(*) For `hasTokenCaseInsensitve` and `hasTokenCaseInsensitive` to be effective, the data skipping index of type `tokenbf_v1` must be created on lowercased data, for example:
```
CREATE TABLE tab (id UInt64, s String, INDEX tok_bf_idx (s, lower(s)) TYPE tokenbf_v1(512, 3, 0) GRANULARITY 1) ... . ) ENGINE = MergeTree()
```
(*) For `hasTokenCaseInsensitve` and `hasTokenCaseInsensitive` to be effective, the `tokenbf_v1` index must be created on lowercased data, for example `INDEX idx (lower(str_col)) TYPE tokenbf_v1(512, 3, 0)`.
:::note
Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false.

View File

@ -12,6 +12,13 @@ insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8);
insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024);
insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024);
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz');
SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz');
-- as table "bloom_filter" but w/o index_granularity_bytes
drop table if exists bloom_filter2;
create table bloom_filter2
(
@ -25,12 +32,6 @@ insert into bloom_filter2 select number+2000, 'ABC,def,zzz' from numbers(8);
insert into bloom_filter2 select number+3000, 'yyy,uuu' from numbers(1024);
insert into bloom_filter2 select number+3000, 'abcdefzzz' from numbers(1024);
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz');
SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz');
set max_rows_to_read = 16;
SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc');