mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-10 01:25:21 +00:00
Merge pull request #57878 from Jpnock/master
Apply full-text skipping index when using `hasAny()`
This commit is contained in:
commit
7f675ddf80
@ -520,7 +520,7 @@ Indexes of type `set` can be utilized by all functions. The other index types ar
|
||||
| [empty](/docs/en/sql-reference/functions/array-functions#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](/docs/en/sql-reference/functions/array-functions#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ | ✗ |
|
||||
| [has](/docs/en/sql-reference/functions/array-functions#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| [hasAny](/docs/en/sql-reference/functions/array-functions#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [hasAll](/docs/en/sql-reference/functions/array-functions#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ |
|
||||
| hasTokenOrNull | ✗ | ✗ | ✗ | ✔ | ✗ | ✔ |
|
||||
|
@ -369,6 +369,9 @@ INDEX b (u64 * length(str), i32 + f64 * 100, date, str) TYPE set(100) GRANULARIT
|
||||
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [has](../../../sql-reference/functions/array-functions.md#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [hasAny](../../../sql-reference/functions/array-functions.md#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [hasAll](../../../sql-reference/functions/array-functions.md#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|
||||
Функции с постоянным агрументом, который меньше, чем размер ngram не могут использовать индекс `ngrambf_v1` для оптимизации запроса.
|
||||
|
@ -364,6 +364,9 @@ WHERE 子句中的条件可以包含对某列数据进行运算的函数表达
|
||||
| [greaterOrEquals (\>=)](../../../sql-reference/functions/comparison-functions.md#greaterorequals) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [empty](../../../sql-reference/functions/array-functions.md#function-empty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [notEmpty](../../../sql-reference/functions/array-functions.md#function-notempty) | ✔ | ✔ | ✗ | ✗ | ✗ |
|
||||
| [has](../../../sql-reference/functions/array-functions.md#function-has) | ✗ | ✗ | ✔ | ✔ | ✔ | ✔ |
|
||||
| [hasAny](../../../sql-reference/functions/array-functions.md#function-hasAny) | ✗ | ✗ | ✔ | ✔ | ✔ | ✗ |
|
||||
| [hasAll](../../../sql-reference/functions/array-functions.md#function-hasAll) | ✗ | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
| hasToken | ✗ | ✗ | ✗ | ✔ | ✗ |
|
||||
|
||||
常量参数小于 ngram 大小的函数不能使用 `ngrambf_v1` 进行查询优化。
|
||||
|
@ -201,6 +201,7 @@ bool MergeTreeConditionFullText::alwaysUnknownOrTrue() const
|
||||
|| element.function == RPNElement::FUNCTION_IN
|
||||
|| element.function == RPNElement::FUNCTION_NOT_IN
|
||||
|| element.function == RPNElement::FUNCTION_MULTI_SEARCH
|
||||
|| element.function == RPNElement::FUNCTION_HAS_ANY
|
||||
|| element.function == RPNElement::ALWAYS_FALSE)
|
||||
{
|
||||
rpn_stack.push_back(false);
|
||||
@ -274,7 +275,8 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
||||
if (element.function == RPNElement::FUNCTION_NOT_IN)
|
||||
rpn_stack.back() = !rpn_stack.back();
|
||||
}
|
||||
else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH)
|
||||
else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH
|
||||
|| element.function == RPNElement::FUNCTION_HAS_ANY)
|
||||
{
|
||||
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||
|
||||
@ -395,7 +397,8 @@ bool MergeTreeConditionFullText::extractAtomFromTree(const RPNBuilderTreeNode &
|
||||
function_name.starts_with("hasToken") ||
|
||||
function_name == "startsWith" ||
|
||||
function_name == "endsWith" ||
|
||||
function_name == "multiSearchAny")
|
||||
function_name == "multiSearchAny" ||
|
||||
function_name == "hasAny")
|
||||
{
|
||||
Field const_value;
|
||||
DataTypePtr const_type;
|
||||
@ -574,10 +577,13 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
|
||||
token_extractor->stringToBloomFilter(value.data(), value.size(), *out.bloom_filter);
|
||||
return true;
|
||||
}
|
||||
else if (function_name == "multiSearchAny")
|
||||
else if (function_name == "multiSearchAny"
|
||||
|| function_name == "hasAny")
|
||||
{
|
||||
out.key_column = *key_index;
|
||||
out.function = RPNElement::FUNCTION_MULTI_SEARCH;
|
||||
out.function = function_name == "multiSearchAny" ?
|
||||
RPNElement::FUNCTION_MULTI_SEARCH :
|
||||
RPNElement::FUNCTION_HAS_ANY;
|
||||
|
||||
/// 2d vector is not needed here but is used because already exists for FUNCTION_IN
|
||||
std::vector<std::vector<BloomFilter>> bloom_filters;
|
||||
|
@ -92,6 +92,7 @@ private:
|
||||
FUNCTION_IN,
|
||||
FUNCTION_NOT_IN,
|
||||
FUNCTION_MULTI_SEARCH,
|
||||
FUNCTION_HAS_ANY,
|
||||
FUNCTION_UNKNOWN, /// Can take any value.
|
||||
/// Operators of the logical expression.
|
||||
FUNCTION_NOT,
|
||||
@ -107,13 +108,13 @@ private:
|
||||
: function(function_), key_column(key_column_), bloom_filter(std::move(const_bloom_filter_)) {}
|
||||
|
||||
Function function = FUNCTION_UNKNOWN;
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS and FUNCTION_MULTI_SEARCH
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS, FUNCTION_MULTI_SEARCH and FUNCTION_HAS_ANY
|
||||
size_t key_column;
|
||||
|
||||
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS
|
||||
std::unique_ptr<BloomFilter> bloom_filter;
|
||||
|
||||
/// For FUNCTION_IN, FUNCTION_NOT_IN and FUNCTION_MULTI_SEARCH
|
||||
/// For FUNCTION_IN, FUNCTION_NOT_IN, FUNCTION_MULTI_SEARCH and FUNCTION_HAS_ANY
|
||||
std::vector<std::vector<BloomFilter>> set_bloom_filters;
|
||||
|
||||
/// For FUNCTION_IN and FUNCTION_NOT_IN
|
||||
|
@ -0,0 +1,17 @@
|
||||
1 ['this is a test','example.com']
|
||||
--
|
||||
1 ['this is a test','example.com']
|
||||
--
|
||||
2 ['another test','another example']
|
||||
--
|
||||
1 ['this is a test','example.com']
|
||||
2 ['another test','another example']
|
||||
--
|
||||
1 ['this is a test','example.com']
|
||||
--
|
||||
1 ['this is a test','example.com']
|
||||
--
|
||||
2 ['another test','another example']
|
||||
--
|
||||
1 ['this is a test','example.com']
|
||||
2 ['another test','another example']
|
@ -0,0 +1,39 @@
|
||||
DROP TABLE IF EXISTS tokenbf_v1_hasany_test;
|
||||
DROP TABLE IF EXISTS ngrambf_v1_hasany_test;
|
||||
|
||||
CREATE TABLE tokenbf_v1_hasany_test
|
||||
(
|
||||
id UInt32,
|
||||
array Array(String),
|
||||
INDEX idx_array_tokenbf_v1 array TYPE tokenbf_v1(512,3,0) GRANULARITY 1,
|
||||
) Engine=MergeTree() ORDER BY id SETTINGS index_granularity = 1;
|
||||
|
||||
CREATE TABLE ngrambf_v1_hasany_test
|
||||
(
|
||||
id UInt32,
|
||||
array Array(String),
|
||||
INDEX idx_array_ngrambf_v1 array TYPE ngrambf_v1(3,512,3,0) GRANULARITY 1,
|
||||
) Engine=MergeTree() ORDER BY id SETTINGS index_granularity = 1;
|
||||
|
||||
INSERT INTO tokenbf_v1_hasany_test VALUES (1, ['this is a test', 'example.com']), (2, ['another test', 'another example']);
|
||||
INSERT INTO ngrambf_v1_hasany_test VALUES (1, ['this is a test', 'example.com']), (2, ['another test', 'another example']);
|
||||
|
||||
SELECT * FROM tokenbf_v1_hasany_test WHERE hasAny(array, ['this is a test']) SETTINGS force_data_skipping_indices='idx_array_tokenbf_v1';
|
||||
SELECT '--';
|
||||
SELECT * FROM tokenbf_v1_hasany_test WHERE hasAny(array, ['example.com']) SETTINGS force_data_skipping_indices='idx_array_tokenbf_v1';
|
||||
SELECT '--';
|
||||
SELECT * FROM tokenbf_v1_hasany_test WHERE hasAny(array, ['another test']) SETTINGS force_data_skipping_indices='idx_array_tokenbf_v1';
|
||||
SELECT '--';
|
||||
SELECT * FROM tokenbf_v1_hasany_test WHERE hasAny(array, ['another example', 'example.com']) ORDER BY id ASC SETTINGS force_data_skipping_indices='idx_array_tokenbf_v1';
|
||||
SELECT '--';
|
||||
|
||||
SELECT * FROM ngrambf_v1_hasany_test WHERE hasAny(array, ['this is a test']) SETTINGS force_data_skipping_indices='idx_array_ngrambf_v1';
|
||||
SELECT '--';
|
||||
SELECT * FROM ngrambf_v1_hasany_test WHERE hasAny(array, ['example.com']) SETTINGS force_data_skipping_indices='idx_array_ngrambf_v1';
|
||||
SELECT '--';
|
||||
SELECT * FROM ngrambf_v1_hasany_test WHERE hasAny(array, ['another test']) SETTINGS force_data_skipping_indices='idx_array_ngrambf_v1';
|
||||
SELECT '--';
|
||||
SELECT * FROM ngrambf_v1_hasany_test WHERE hasAny(array, ['another example', 'example.com']) ORDER BY id ASC SETTINGS force_data_skipping_indices='idx_array_ngrambf_v1';
|
||||
|
||||
DROP TABLE tokenbf_v1_hasany_test;
|
||||
DROP TABLE ngrambf_v1_hasany_test;
|
Loading…
Reference in New Issue
Block a user