Merge pull request #48128 from ClickHouse/rs/follow-up-to-46252

Cosmetic follow-up to #46252
2024-09-20 08:40:50 +00:00 · 2023-03-28 23:11:17 +02:00 · 2023-03-28 23:11:17 +02:00 · b4b492450d
commit b4b492450d
parent 614f8b313e 44d0a8075d
2 changed files with 31 additions and 32 deletions
--- a/docs/en/engines/table-engines/mergetree-family/mergetree.md
+++ b/docs/en/engines/table-engines/mergetree-family/mergetree.md
@ -482,10 +482,8 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
 | hasTokenCaseInsensitiveOrNull (*)                                                                          | ✗           | ✗      | ✗          | ✔          | ✗            | ✗        |

 Functions with a constant argument that is less than ngram size can’t be used by `ngrambf_v1` for query optimization.
-(*) For `hasTokenCaseInsensitve` and `hasTokenCaseInsensitive` to be effective, the data skipping index of type `tokenbf_v1` must be created on lowercased data, for example:
-```
-CREATE TABLE tab (id UInt64, s String, INDEX tok_bf_idx (s, lower(s)) TYPE tokenbf_v1(512, 3, 0) GRANULARITY 1) ... . ) ENGINE = MergeTree()
-```
+
+(*) For `hasTokenCaseInsensitve` and `hasTokenCaseInsensitive` to be effective, the `tokenbf_v1` index must be created on lowercased data, for example `INDEX idx (lower(str_col)) TYPE tokenbf_v1(512, 3, 0)`.

 :::note
 Bloom filters can have false positive matches, so the `ngrambf_v1`, `tokenbf_v1`, and `bloom_filter` indexes can not be used for optimizing queries where the result of a function is expected to be false.
--- a/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql
+++ b/tests/queries/0_stateless/00990_hasToken_and_tokenbf.sql
@ -12,6 +12,13 @@ insert into bloom_filter select number+2000, 'abc,def,zzz' from numbers(8);
 insert into bloom_filter select number+3000, 'yyy,uuu' from numbers(1024);
 insert into bloom_filter select number+3000, 'abcdefzzz' from numbers(1024);

+SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
+SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
+
+SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz');
+SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz');
+
+-- as table "bloom_filter" but w/o index_granularity_bytes
 drop table if exists bloom_filter2;
 create table bloom_filter2
 (
@ -25,12 +32,6 @@ insert into bloom_filter2 select number+2000, 'ABC,def,zzz' from numbers(8);
 insert into bloom_filter2 select number+3000, 'yyy,uuu' from numbers(1024);
 insert into bloom_filter2 select number+3000, 'abcdefzzz' from numbers(1024);

-SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
-SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitive(s, 'abc,def,zzz'); -- { serverError BAD_ARGUMENTS }
-
-SELECT max(id) FROM bloom_filter WHERE hasTokenOrNull(s, 'abc,def,zzz');
-SELECT max(id) FROM bloom_filter WHERE hasTokenCaseInsensitiveOrNull(s, 'abc,def,zzz');
-
 set max_rows_to_read = 16;

 SELECT max(id) FROM bloom_filter WHERE hasToken(s, 'abc');