Add ability to ignore index

2024-09-20 08:40:50 +00:00 · 2023-05-29 13:46:27 -07:00 · 2023-05-29 13:46:27 -07:00 · 068b1fbbcc
commit 068b1fbbcc
parent 707abc85f4
4 changed files with 112 additions and 12 deletions
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@ -201,6 +201,8 @@ class IColumn;
    M(Bool, force_primary_key, false, "Throw an exception if there is primary key in a table, and it is not used.", 0) \
    M(Bool, use_skip_indexes, true, "Use data skipping indexes during query execution.", 0) \
    M(Bool, use_skip_indexes_if_final, false, "If query has FINAL, then skipping data based on indexes may produce incorrect result, hence disabled by default.", 0) \
+    M(String, ignore_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be excluded during query execution.", 0) \
+    \
    M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \
    \
    M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \
--- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
+++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp
@ -16,6 +16,7 @@
 #include <Parsers/ASTLiteral.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTSampleRatio.h>
+#include <Parsers/ExpressionListParsers.h>
 #include <Parsers/parseIdentifierOrStringLiteral.h>
 #include <Interpreters/ExpressionAnalyzer.h>
 #include <Interpreters/InterpreterSelectQuery.h>
@ -948,25 +949,52 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd

    std::list<DataSkippingIndexAndCondition> useful_indices;
    std::map<std::pair<String, size_t>, MergedDataSkippingIndexAndCondition> merged_indices;
+    std::unordered_set<std::string> ignored_index_names;
+
+    if (use_skip_indexes && settings.ignore_data_skipping_indices.changed)
+    {
+        const auto & indices = settings.ignore_data_skipping_indices.toString();
+        Tokens tokens(indices.data(), &indices[indices.size()], settings.max_query_size);
+        IParser::Pos pos(tokens, static_cast<unsigned>(settings.max_parser_depth));
+        Expected expected;
+
+        /// Use an unordered list rather than string vector
+        auto parse_single_id_or_literal = [&]
+        {
+            String str;
+            if (!parseIdentifierOrStringLiteral(pos, expected, str))
+                return false;
+
+            ignored_index_names.insert(std::move(str));
+            return true;
+        };
+
+        if (!ParserList::parseUtil(pos, expected, parse_single_id_or_literal, false))
+            throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "Cannot parse ignore_data_skipping_indices ('{}')", indices);
+    }

    if (use_skip_indexes)
    {
        for (const auto & index : metadata_snapshot->getSecondaryIndices())
        {
-            auto index_helper = MergeTreeIndexFactory::instance().get(index);
-            if (index_helper->isMergeable())
-            {
-                auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()});
-                if (inserted)
-                    it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot);

-                it->second.addIndex(index_helper);
-            }
-            else
+            auto index_helper = MergeTreeIndexFactory::instance().get(index);
+            if(!ignored_index_names.contains(index.name))
            {
-                auto condition = index_helper->createIndexCondition(query_info, context);
-                if (!condition->alwaysUnknownOrTrue())
-                    useful_indices.emplace_back(index_helper, condition);
+                if (index_helper->isMergeable())
+                {
+                    auto [it, inserted] = merged_indices.try_emplace({index_helper->index.type, index_helper->getGranularity()});
+                    if (inserted)
+                        it->second.condition = index_helper->createIndexMergedCondition(query_info, metadata_snapshot);
+
+                    it->second.addIndex(index_helper);
+                }
+                else
+                {
+                    auto condition = index_helper->createIndexCondition(query_info, context);
+                    if (!condition->alwaysUnknownOrTrue())
+                        useful_indices.emplace_back(index_helper, condition);
+                }
            }
        }
    }
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.reference
@ -0,0 +1,44 @@
+1	2	3
+1	2	3
+1	2	3
+Expression ((Projection + Before ORDER BY))
+  Filter (WHERE)
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+      Skip
+        Name: xy_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
+Expression ((Projection + Before ORDER BY))
+  Filter (WHERE)
+    ReadFromMergeTree (default.data_02771)
+    Indexes:
+      PrimaryKey
+        Condition: true
+        Parts: 1/1
+        Granules: 1/1
+      Skip
+        Name: x_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/1
+        Granules: 0/1
+      Skip
+        Name: y_idx
+        Description: minmax GRANULARITY 1
+        Parts: 0/0
+        Granules: 0/0
--- a/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
+++ b/tests/queries/0_stateless/02771_ignore_data_skipping_indices.sql
@ -0,0 +1,26 @@
+DROP TABLE IF EXISTS data_02771;
+CREATE TABLE data_02771
+(
+    key Int,
+    x Int,
+    y Int,
+    INDEX x_idx x TYPE minmax GRANULARITY 1,
+    INDEX y_idx y TYPE minmax GRANULARITY 1,
+    INDEX xy_idx (x,y) TYPE minmax GRANULARITY 1
+)
+Engine=MergeTree()
+ORDER BY key;
+
+INSERT INTO data_02771 VALUES (1, 2, 3);
+
+SELECT * FROM data_02771;
+SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices=''; -- { serverError 6 }
+SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='x_idx';
+SELECT * FROM data_02771 SETTINGS ignore_data_skipping_indices='na_idx';
+
+SELECT * FROM data_02771 WHERE x = 1 AND y = 1 SETTINGS ignore_data_skipping_indices='xy_idx',force_data_skipping_indices='xy_idx' ; -- { serverError 277 }
+SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2;
+EXPLAIN indexes = 1 SELECT * FROM data_02771 WHERE x = 1 AND y = 2 SETTINGS ignore_data_skipping_indices='xy_idx';
+
+DROP TABLE data_02771;