mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-12-14 18:32:29 +00:00
Some minor adjustments
This commit is contained in:
parent
83d4b72961
commit
6df2548417
@ -1,23 +1,23 @@
|
|||||||
#include <Storages/MergeTree/MergeTreeIndexFullText.h>
|
#include <Storages/MergeTree/MergeTreeIndexFullText.h>
|
||||||
|
|
||||||
#include <Columns/ColumnArray.h>
|
#include <Columns/ColumnArray.h>
|
||||||
#include <DataTypes/DataTypesNumber.h>
|
#include <Common/OptimizedRegularExpression.h>
|
||||||
|
#include <Core/Defines.h>
|
||||||
#include <DataTypes/DataTypeArray.h>
|
#include <DataTypes/DataTypeArray.h>
|
||||||
#include <IO/WriteHelpers.h>
|
#include <DataTypes/DataTypesNumber.h>
|
||||||
#include <IO/ReadHelpers.h>
|
#include <IO/ReadHelpers.h>
|
||||||
|
#include <IO/WriteHelpers.h>
|
||||||
#include <Interpreters/ExpressionActions.h>
|
#include <Interpreters/ExpressionActions.h>
|
||||||
#include <Interpreters/ExpressionAnalyzer.h>
|
#include <Interpreters/ExpressionAnalyzer.h>
|
||||||
#include <Interpreters/TreeRewriter.h>
|
#include <Interpreters/TreeRewriter.h>
|
||||||
#include <Interpreters/misc.h>
|
#include <Interpreters/misc.h>
|
||||||
#include <Storages/MergeTree/MergeTreeData.h>
|
|
||||||
#include <Storages/MergeTree/RPNBuilder.h>
|
|
||||||
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
|
||||||
#include <Parsers/ASTIdentifier.h>
|
#include <Parsers/ASTIdentifier.h>
|
||||||
#include <Parsers/ASTLiteral.h>
|
#include <Parsers/ASTLiteral.h>
|
||||||
#include <Parsers/ASTSubquery.h>
|
|
||||||
#include <Parsers/ASTSelectQuery.h>
|
#include <Parsers/ASTSelectQuery.h>
|
||||||
#include <Core/Defines.h>
|
#include <Parsers/ASTSubquery.h>
|
||||||
#include <Common/OptimizedRegularExpression.h>
|
#include <Storages/MergeTree/MergeTreeData.h>
|
||||||
|
#include <Storages/MergeTree/MergeTreeIndexUtils.h>
|
||||||
|
#include <Storages/MergeTree/RPNBuilder.h>
|
||||||
|
|
||||||
#include <Poco/Logger.h>
|
#include <Poco/Logger.h>
|
||||||
|
|
||||||
@ -243,20 +243,6 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
|||||||
|
|
||||||
/// Check like in KeyCondition.
|
/// Check like in KeyCondition.
|
||||||
std::vector<BoolMask> rpn_stack;
|
std::vector<BoolMask> rpn_stack;
|
||||||
|
|
||||||
auto multi_funtion_processor = [&rpn_stack, &granule] (const RPNElement & element)
|
|
||||||
{
|
|
||||||
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
|
||||||
|
|
||||||
const auto & bloom_filters = element.set_bloom_filters[0];
|
|
||||||
|
|
||||||
for (size_t row = 0; row < bloom_filters.size(); ++row)
|
|
||||||
result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
|
|
||||||
|
|
||||||
rpn_stack.emplace_back(
|
|
||||||
std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const auto & element : rpn)
|
for (const auto & element : rpn)
|
||||||
{
|
{
|
||||||
if (element.function == RPNElement::FUNCTION_UNKNOWN)
|
if (element.function == RPNElement::FUNCTION_UNKNOWN)
|
||||||
@ -294,17 +280,32 @@ bool MergeTreeConditionFullText::mayBeTrueOnGranule(MergeTreeIndexGranulePtr idx
|
|||||||
else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH
|
else if (element.function == RPNElement::FUNCTION_MULTI_SEARCH
|
||||||
|| element.function == RPNElement::FUNCTION_HAS_ANY)
|
|| element.function == RPNElement::FUNCTION_HAS_ANY)
|
||||||
{
|
{
|
||||||
multi_funtion_processor(element);
|
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||||
|
|
||||||
|
const auto & bloom_filters = element.set_bloom_filters[0];
|
||||||
|
|
||||||
|
for (size_t row = 0; row < bloom_filters.size(); ++row)
|
||||||
|
result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
|
||||||
|
|
||||||
|
rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
|
||||||
}
|
}
|
||||||
else if (element.function == RPNElement::FUNCTION_MATCH)
|
else if (element.function == RPNElement::FUNCTION_MATCH)
|
||||||
{
|
{
|
||||||
if (!element.set_bloom_filters.empty())
|
if (!element.set_bloom_filters.empty())
|
||||||
{
|
{
|
||||||
multi_funtion_processor(element);
|
/// Alternative substrings
|
||||||
|
std::vector<bool> result(element.set_bloom_filters.back().size(), true);
|
||||||
|
|
||||||
|
const auto & bloom_filters = element.set_bloom_filters[0];
|
||||||
|
|
||||||
|
for (size_t row = 0; row < bloom_filters.size(); ++row)
|
||||||
|
result[row] = result[row] && granule->bloom_filters[element.key_column].contains(bloom_filters[row]);
|
||||||
|
|
||||||
|
rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
|
||||||
}
|
}
|
||||||
// If set_bloom_filters is not empty means we got alternative substring
|
|
||||||
else if (element.bloom_filter)
|
else if (element.bloom_filter)
|
||||||
{
|
{
|
||||||
|
/// Required substrings
|
||||||
rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
|
rpn_stack.emplace_back(granule->bloom_filters[element.key_column].contains(*element.bloom_filter), true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -535,38 +536,6 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (function_name == "match")
|
|
||||||
{
|
|
||||||
out.key_column = *key_index;
|
|
||||||
out.function = RPNElement::FUNCTION_MATCH;
|
|
||||||
out.bloom_filter = std::make_unique<BloomFilter>(params);
|
|
||||||
|
|
||||||
auto & string_view = const_value.get<String>();
|
|
||||||
String required_substring;
|
|
||||||
std::vector<String> alternatives;
|
|
||||||
bool tmp_var;
|
|
||||||
OptimizedRegularExpression::analyze(string_view, required_substring, tmp_var, tmp_var, alternatives);
|
|
||||||
|
|
||||||
if (required_substring.empty() && alternatives.empty())
|
|
||||||
return false;
|
|
||||||
|
|
||||||
if (!alternatives.empty())
|
|
||||||
{
|
|
||||||
std::vector<std::vector<BloomFilter>> bloom_filters;
|
|
||||||
bloom_filters.emplace_back();
|
|
||||||
for (const auto & alternative : alternatives)
|
|
||||||
{
|
|
||||||
bloom_filters.back().emplace_back(params);
|
|
||||||
token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
|
|
||||||
}
|
|
||||||
out.set_bloom_filters = std::move(bloom_filters);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
else if (function_name == "has")
|
else if (function_name == "has")
|
||||||
{
|
{
|
||||||
out.key_column = *key_index;
|
out.key_column = *key_index;
|
||||||
@ -654,6 +623,39 @@ bool MergeTreeConditionFullText::traverseTreeEquals(
|
|||||||
out.set_bloom_filters = std::move(bloom_filters);
|
out.set_bloom_filters = std::move(bloom_filters);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
else if (function_name == "match")
|
||||||
|
{
|
||||||
|
out.key_column = *key_index;
|
||||||
|
out.function = RPNElement::FUNCTION_MATCH;
|
||||||
|
out.bloom_filter = std::make_unique<BloomFilter>(params);
|
||||||
|
|
||||||
|
auto & value = const_value.get<String>();
|
||||||
|
String required_substring;
|
||||||
|
bool dummy_is_trivial, dummy_required_substring_is_prefix;
|
||||||
|
std::vector<String> alternatives;
|
||||||
|
OptimizedRegularExpression::analyze(value, required_substring, dummy_is_trivial, dummy_required_substring_is_prefix, alternatives);
|
||||||
|
|
||||||
|
if (required_substring.empty() && alternatives.empty())
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/// out.set_bloom_filters means alternatives exist
|
||||||
|
/// out.bloom_filter means required_substring exists
|
||||||
|
if (!alternatives.empty())
|
||||||
|
{
|
||||||
|
std::vector<std::vector<BloomFilter>> bloom_filters;
|
||||||
|
bloom_filters.emplace_back();
|
||||||
|
for (const auto & alternative : alternatives)
|
||||||
|
{
|
||||||
|
bloom_filters.back().emplace_back(params);
|
||||||
|
token_extractor->stringToBloomFilter(alternative.data(), alternative.size(), bloom_filters.back().back());
|
||||||
|
}
|
||||||
|
out.set_bloom_filters = std::move(bloom_filters);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
token_extractor->stringToBloomFilter(required_substring.data(), required_substring.size(), *out.bloom_filter);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,26 @@
|
|||||||
|
1 Hello ClickHouse
|
||||||
|
2 Hello World
|
||||||
|
1 Hello ClickHouse
|
||||||
|
2 Hello World
|
||||||
|
Granules: 6/6
|
||||||
|
Granules: 2/6
|
||||||
|
Granules: 6/6
|
||||||
|
Granules: 2/6
|
||||||
|
---
|
||||||
|
1 Hello ClickHouse
|
||||||
|
2 Hello World
|
||||||
|
6 World Champion
|
||||||
|
1 Hello ClickHouse
|
||||||
|
2 Hello World
|
||||||
|
6 World Champion
|
||||||
|
Granules: 6/6
|
||||||
|
Granules: 3/6
|
||||||
|
Granules: 6/6
|
||||||
|
Granules: 3/6
|
||||||
|
---
|
||||||
|
5 OLAP Database
|
||||||
|
5 OLAP Database
|
||||||
|
Granules: 6/6
|
||||||
|
Granules: 1/6
|
||||||
|
Granules: 6/6
|
||||||
|
Granules: 1/6
|
@ -0,0 +1,107 @@
|
|||||||
|
DROP TABLE IF EXISTS tokenbf_tab;
|
||||||
|
DROP TABLE IF EXISTS ngrambf_tab;
|
||||||
|
|
||||||
|
CREATE TABLE tokenbf_tab
|
||||||
|
(
|
||||||
|
id UInt32,
|
||||||
|
str String,
|
||||||
|
INDEX idx str TYPE tokenbf_v1(256, 2, 0)
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree
|
||||||
|
ORDER BY id
|
||||||
|
SETTINGS index_granularity = 1;
|
||||||
|
|
||||||
|
CREATE TABLE ngrambf_tab
|
||||||
|
(
|
||||||
|
id UInt32,
|
||||||
|
str String,
|
||||||
|
INDEX idx str TYPE ngrambf_v1(3, 256, 2, 0)
|
||||||
|
)
|
||||||
|
ENGINE = MergeTree
|
||||||
|
ORDER BY id
|
||||||
|
SETTINGS index_granularity = 1;
|
||||||
|
|
||||||
|
INSERT INTO tokenbf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
|
||||||
|
INSERT INTO ngrambf_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
|
||||||
|
|
||||||
|
SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
|
||||||
|
SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
|
||||||
|
|
||||||
|
-- Skip 2/6 granules
|
||||||
|
-- Required string: 'Hello '
|
||||||
|
-- Alternatives: 'Hello ClickHouse', 'Hello World'
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM tokenbf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: %';
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes=1
|
||||||
|
SELECT * FROM ngrambf_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: %';
|
||||||
|
|
||||||
|
SELECT '---';
|
||||||
|
|
||||||
|
SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
|
||||||
|
SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
|
||||||
|
|
||||||
|
-- Skip 3/6 granules
|
||||||
|
-- Required string: -
|
||||||
|
-- Alternatives: 'ClickHouse', 'World'
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes = 1
|
||||||
|
SELECT * FROM tokenbf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: %';
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes = 1
|
||||||
|
SELECT * FROM ngrambf_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: %';
|
||||||
|
|
||||||
|
SELECT '---';
|
||||||
|
|
||||||
|
SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
|
||||||
|
SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP.*') ORDER BY id;
|
||||||
|
|
||||||
|
-- Skip 5/6 granules
|
||||||
|
-- Required string: 'OLAP'
|
||||||
|
-- Alternatives: -
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes = 1
|
||||||
|
SELECT * FROM tokenbf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: %';
|
||||||
|
|
||||||
|
SELECT *
|
||||||
|
FROM
|
||||||
|
(
|
||||||
|
EXPLAIN PLAN indexes = 1
|
||||||
|
SELECT * FROM ngrambf_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
|
||||||
|
)
|
||||||
|
WHERE
|
||||||
|
explain LIKE '%Granules: %';
|
||||||
|
|
||||||
|
DROP TABLE tokenbf_tab;
|
||||||
|
DROP TABLE ngrambf_tab;
|
@ -1,10 +0,0 @@
|
|||||||
Granules: 5/5
|
|
||||||
Granules: 2/5
|
|
||||||
|
|
||||||
|
|
||||||
Granules: 5/5
|
|
||||||
Granules: 2/5
|
|
||||||
|
|
||||||
|
|
||||||
Granules: 5/5
|
|
||||||
Granules: 1/5
|
|
@ -1,68 +0,0 @@
|
|||||||
-- Tags: no-parallel
|
|
||||||
|
|
||||||
DROP DATABASE IF EXISTS test_tokenbf_match;
|
|
||||||
|
|
||||||
CREATE DATABASE test_tokenbf_match;
|
|
||||||
|
|
||||||
CREATE TABLE test_tokenbf_match.test_tokenbf
|
|
||||||
(
|
|
||||||
`id` UInt32,
|
|
||||||
`str` String,
|
|
||||||
INDEX str_idx str TYPE tokenbf_v1(256, 2, 0) GRANULARITY 1
|
|
||||||
)
|
|
||||||
ENGINE = MergeTree
|
|
||||||
ORDER BY id
|
|
||||||
SETTINGS index_granularity = 1;
|
|
||||||
|
|
||||||
INSERT INTO test_tokenbf_match.test_tokenbf VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Hello Github'), (4, 'Hello Cloud'), (5, 'OLAP Database');
|
|
||||||
|
|
||||||
--SKIP 3 GRANUS
|
|
||||||
--Required String: Hello
|
|
||||||
--Alternative String: Hello ClickHouse
|
|
||||||
--Alternative String: Hello World
|
|
||||||
SELECT
|
|
||||||
*
|
|
||||||
FROM
|
|
||||||
(
|
|
||||||
EXPLAIN indexes=1
|
|
||||||
SELECT * FROM test_tokenbf_match.test_tokenbf WHERE match(str, 'Hello (ClickHouse|World)')
|
|
||||||
)
|
|
||||||
WHERE
|
|
||||||
explain like '%Granules%';
|
|
||||||
|
|
||||||
|
|
||||||
SELECT '';
|
|
||||||
SELECT '';
|
|
||||||
|
|
||||||
|
|
||||||
--SKIP 3 GRANUS
|
|
||||||
--No Required String
|
|
||||||
--Alternative String: ClickHouse
|
|
||||||
--Alternative String: World
|
|
||||||
SELECT
|
|
||||||
*
|
|
||||||
FROM
|
|
||||||
(
|
|
||||||
EXPLAIN indexes = 1
|
|
||||||
SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, '(.*?)* (ClickHouse|World)')
|
|
||||||
)
|
|
||||||
WHERE
|
|
||||||
explain like '%Granules%';
|
|
||||||
|
|
||||||
SELECT '';
|
|
||||||
SELECT '';
|
|
||||||
|
|
||||||
--SKIP 4 GRANUS
|
|
||||||
--Required String: OLAP
|
|
||||||
--No Alternative String
|
|
||||||
SELECT
|
|
||||||
*
|
|
||||||
FROM
|
|
||||||
(
|
|
||||||
EXPLAIN indexes = 1
|
|
||||||
SELECT * FROM test_tokenbf_match.test_tokenbf where match(str, 'OLAP (.*?)*')
|
|
||||||
)
|
|
||||||
WHERE
|
|
||||||
explain like '%Granules%';
|
|
||||||
|
|
||||||
DROP DATABASE IF EXISTS test_tokenbf_match;
|
|
Loading…
Reference in New Issue
Block a user