Add ability to ignore index

This commit is contained in:
Boris Kuschel 2023-05-29 13:46:27 -07:00
parent 707abc85f4
commit 068b1fbbcc
4 changed files with 112 additions and 12 deletions

View File

@ -201,6 +201,8 @@ class IColumn;
M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
\
M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
\
M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \

View File

@ -16,6 +16,7 @@
#include <Parsers/ASTLiteral.h>
#include <Parsers/ASTFunction.h>
#include <Parsers/ASTSampleRatio.h>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseIdentifierOrStringLiteral.h>
#include <Interpreters/ExpressionAnalyzer.h>
#include <Interpreters/InterpreterSelectQuery.h>
@ -948,25 +949,52 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd
std::list<DataSkippingIndexAndCondition> useful_indices;
std::map<std::pair<String, size_t>, MergedDataSkippingIndexAndCondition> merged_indices;
std::unordered_set<std::string> ignored_index_names;
if (use_skip_indexes && settings.ignore_data_skipping_indices.changed)
{
const auto & indices = settings.ignore_data_skipping_indices.toString();
Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size);
IParser::Pos pos(tokens, static_cast<unsigned>(settings.max_parser_depth));
Expected expected;
/// Use an unordered list rather than string vector
auto parse_single_id_or_literal = [&]
{
String str;
if (!parseIdentifierOrStringLiteral(pos, expected, str))
return false;
ignored_index_names.insert(std::move(str));
return true;
};
if (!ParserList::parseUtil(pos, expected, parse_single_id_or_literal, false))
throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse ignore_data_skipping_indices ('{}')", indices);
}
if (use_skip_indexes)
{
for (const auto & index : metadata_snapshot->getSecondaryIndices())
{
auto index_helper = MergeTreeIndexFactory::instance().get(index);
if (index_helper->isMergeable())
{
auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()});
if (inserted)
it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot);
it->second.addIndex(index_helper);
}
else
auto index_helper = MergeTreeIndexFactory::instance().get(index);
if(!ignored_index_names.contains(index.name))
{
auto condition = index_helper->createIndexCondition(query_info, context);
if (!condition->alwaysUnknownOrTrue())
useful_indices.emplace_back(index_helper, condition);
if (index_helper->isMergeable())
{
auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()});
if (inserted)
it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot);
it->second.addIndex(index_helper);
}
else
{
auto condition = index_helper->createIndexCondition(query_info, context);
if (!condition->alwaysUnknownOrTrue())
useful_indices.emplace_back(index_helper, condition);
}
}
}
}

View File

@ -0,0 +1,44 @@
1 2 3
1 2 3
1 2 3
Expression ((Projection + Before ORDER BY))
Filter (WHERE)
ReadFromMergeTree (default.data_02771)
Indexes:
PrimaryKey
Condition: true
Parts: 1/1
Granules: 1/1
Skip
Name: x_idx
Description: minmax GRANULARITY 1
Parts: 0/1
Granules: 0/1
Skip
Name: y_idx
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0
Skip
Name: xy_idx
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0
Expression ((Projection + Before ORDER BY))
Filter (WHERE)
ReadFromMergeTree (default.data_02771)
Indexes:
PrimaryKey
Condition: true
Parts: 1/1
Granules: 1/1
Skip
Name: x_idx
Description: minmax GRANULARITY 1
Parts: 0/1
Granules: 0/1
Skip
Name: y_idx
Description: minmax GRANULARITY 1
Parts: 0/0
Granules: 0/0

View File

@ -0,0 +1,26 @@
DROP TABLE IF EXISTS data_02771;
CREATE TABLE data_02771
(
key Int,
x Int,
y Int,
INDEX x_idx x TYPE minmax GRANULARITY 1,
INDEX y_idx y TYPE minmax GRANULARITY 1,
INDEX xy_idx (x,y) TYPE minmax GRANULARITY 1
)
Engine=MergeTree()
ORDER BY key;
INSERT INTO data_02771 VALUES (1, 2, 3);
SELECT * FROM data_02771;
SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices=''; -- { serverError 6 }
SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='x_idx';
SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';
SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2;
EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
DROP TABLE data_02771;