Some fixups

This commit is contained in:
Robert Schulze 2024-01-12 14:05:19 +00:00
parent 4bd6cb9cc9
commit d11ed921be
No known key found for this signature in database
GPG Key ID: 26703B55FB13728A
4 changed files with 75 additions and 30 deletions

View File

@ -249,8 +249,8 @@ bool MergeTreeConditionInverted::alwaysUnknownOrTrue() const
|| element.function == RPNElement::FUNCTION_IN
|| element.function == RPNElement::FUNCTION_NOT_IN
|| element.function == RPNElement::FUNCTION_MULTI_SEARCH
|| element.function == RPNElement::ALWAYS_FALSE
|| element.function == RPNElement::FUNCTION_MATCH)
|| element.function == RPNElement::FUNCTION_MATCH
|| element.function == RPNElement::ALWAYS_FALSE)
{
rpn_stack.push_back(false);
}
@ -317,8 +317,7 @@ bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranuleP
result[row] = result[row] && granule->gin_filters[key_idx].contains(gin_filters[row], cache_store);
}
rpn_stack.emplace_back(
std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
if (element.function == RPNElement::FUNCTION_NOT_IN)
rpn_stack.back() = !rpn_stack.back();
}
@ -331,13 +330,13 @@ bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranuleP
for (size_t row = 0; row < gin_filters.size(); ++row)
result[row] = result[row] && granule->gin_filters[element.key_column].contains(gin_filters[row], cache_store);
rpn_stack.emplace_back(
std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
}
else if (element.function == RPNElement::FUNCTION_MATCH)
{
if (!element.set_gin_filters.empty())
{
/// Alternative substrings
std::vector<bool> result(element.set_gin_filters.back().size(), true);
const auto & gin_filters = element.set_gin_filters[0];
@ -345,12 +344,12 @@ bool MergeTreeConditionInverted::mayBeTrueOnGranuleInPart(MergeTreeIndexGranuleP
for (size_t row = 0; row < gin_filters.size(); ++row)
result[row] = result[row] && granule->gin_filters[element.key_column].contains(gin_filters[row], cache_store);
rpn_stack.emplace_back(
std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
rpn_stack.emplace_back(std::find(std::cbegin(result), std::cend(result), true) != std::end(result), true);
}
else if (element.gin_filter)
{
rpn_stack.emplace_back(granule->gin_filters[element.key_column].contains(*element.gin_filter, cache_store), true);
}
}
else if (element.function == RPNElement::FUNCTION_NOT)
@ -653,26 +652,26 @@ bool MergeTreeConditionInverted::traverseASTEquals(
out.key_column = key_column_num;
out.function = RPNElement::FUNCTION_MATCH;
String required_substring;
std::vector<String> alternatives;
bool dummy_is_trivial, dummy_required_substring_is_prefix;
auto & value = const_value.get<String>();
String required_substring;
bool dummy_is_trivial, dummy_required_substring_is_prefix;
std::vector<String> alternatives;
OptimizedRegularExpression::analyze(value, required_substring, dummy_is_trivial, dummy_required_substring_is_prefix, alternatives);
if (required_substring.empty() && alternatives.empty())
return false;
/// out.set_gin_filters means alternatives exist
/// out.gin_filter means required_substring exists
if (!alternatives.empty())
{
std::vector<GinFilters> gin_filters;
gin_filters.emplace_back();
for (const auto & alternative : alternatives)
{
gin_filters.back().emplace_back(params);
token_extractor->stringToGinFilter(alternative.data(), alternative.size(), gin_filters.back().back());
}
out.set_gin_filters = std::move(gin_filters);
}
else

View File

@ -117,6 +117,7 @@ private:
: function(function_), key_column(key_column_), gin_filter(std::move(const_gin_filter_)) {}
Function function = FUNCTION_UNKNOWN;
/// For FUNCTION_EQUALS, FUNCTION_NOT_EQUALS and FUNCTION_MULTI_SEARCH
size_t key_column;

View File

@ -1,14 +1,20 @@
1 Hello ClickHouse
2 Hello World
Granules: 6/6
Granules: 2/6
Granules: 6/6
Granules: 2/6
---
1 Hello ClickHouse
2 Hello World
6 World Champion
Granules: 6/6
Granules: 3/6
Granules: 6/6
Granules: 3/6
---
5 OLAP Database
Granules: 6/6
Granules: 1/6
Granules: 6/6
Granules: 1/6

View File

@ -1,8 +1,8 @@
SET allow_experimental_analyzer = 1;
SET allow_experimental_inverted_index = true;
DROP TABLE IF EXISTS inverted_tab;
CREATE TABLE inverted_tab
DROP TABLE IF EXISTS tab;
CREATE TABLE tab
(
id UInt32,
str String,
@ -12,11 +12,11 @@ ENGINE = MergeTree
ORDER BY id
SETTINGS index_granularity = 1;
INSERT INTO inverted_tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
INSERT INTO tab VALUES (1, 'Hello ClickHouse'), (2, 'Hello World'), (3, 'Good Weather'), (4, 'Say Hello'), (5, 'OLAP Database'), (6, 'World Champion');
SELECT * FROM inverted_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id;
-- Skip 2/6 granules
-- Read 2/6 granules
-- Required string: 'Hello '
-- Alternatives: 'Hello ClickHouse', 'Hello World'
@ -24,16 +24,29 @@ SELECT *
FROM
(
EXPLAIN PLAN indexes=1
SELECT * FROM inverted_tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
)
WHERE
explain LIKE '%Granules: %';
explain LIKE '%Granules: %'
SETTINGS
allow_experimental_analyzer = 0;
SELECT *
FROM
(
EXPLAIN PLAN indexes=1
SELECT * FROM tab WHERE match(str, 'Hello (ClickHouse|World)') ORDER BY id
)
WHERE
explain LIKE '%Granules: %'
SETTINGS
allow_experimental_analyzer = 1;
SELECT '---';
SELECT * FROM inverted_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id;
-- Skip 3/6 granules
-- Read 3/6 granules
-- Required string: -
-- Alternatives: 'ClickHouse', 'World'
@ -41,16 +54,29 @@ SELECT *
FROM
(
EXPLAIN PLAN indexes = 1
SELECT * FROM inverted_tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
)
WHERE
explain LIKE '%Granules: %';
explain LIKE '%Granules: %'
SETTINGS
allow_experimental_analyzer = 0;
SELECT *
FROM
(
EXPLAIN PLAN indexes = 1
SELECT * FROM tab WHERE match(str, '.*(ClickHouse|World)') ORDER BY id
)
WHERE
explain LIKE '%Granules: %'
SETTINGS
allow_experimental_analyzer = 1;
SELECT '---';
SELECT * FROM inverted_tab WHERE match(str, 'OLAP.*') ORDER BY id;
SELECT * FROM tab WHERE match(str, 'OLAP.*') ORDER BY id;
-- Skip 5/6 granules
-- Read 1/6 granules
-- Required string: 'OLAP'
-- Alternatives: -
@ -58,9 +84,22 @@ SELECT *
FROM
(
EXPLAIN PLAN indexes = 1
SELECT * FROM inverted_tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
)
WHERE
explain LIKE '%Granules: %';
explain LIKE '%Granules: %'
SETTINGS
allow_experimental_analyzer = 0;
SELECT *
FROM
(
EXPLAIN PLAN indexes = 1
SELECT * FROM tab WHERE match(str, 'OLAP (.*?)*') ORDER BY id
)
WHERE
explain LIKE '%Granules: %'
SETTINGS
allow_experimental_analyzer = 1;
DROP TABLE tab;