From 75e612fc16544e1902a517d75c2d971f7a260783 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 6 Oct 2020 09:46:12 +0300 Subject: [PATCH] Use full featured parser for force_data_skipping_indices --- docs/en/operations/settings/settings.md | 23 ++++++++++++++++ src/Core/Settings.h | 2 +- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 26 ++++++++++++------- .../01515_force_data_skipping_indices.sql | 13 ++++++---- 4 files changed, 49 insertions(+), 15 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index e901e478605..e7ac9c4a17d 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -74,6 +74,29 @@ If `force_primary_key=1`, ClickHouse checks to see if the query has a primary ke Disables query execution if passed data skipping indices wasn't used. +Consider the following example: + +```sql +CREATE TABLE data +( + key Int, + d1 Int, + d1_null Nullable(Int), + INDEX d1_idx d1 TYPE minmax GRANULARITY 1, + INDEX d1_null_idx assumeNotNull(d1_null) TYPE minmax GRANULARITY 1 +) +Engine=MergeTree() +ORDER BY key; + +SELECT * FROM data_01515; +SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; -- query will produce CANNOT_PARSE_TEXT error. +SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_idx'; -- query will produce INDEX_NOT_USED error. +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_idx'; -- Ok. +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`'; -- Ok (example of full featured parser). +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- query will produce INDEX_NOT_USED error, since d1_null_idx is not used. +SELECT * FROM data_01515 WHERE d1 = 0 AND assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='`d1_idx`, d1_null_idx'; -- Ok. +``` + Works with tables in the MergeTree family. ## format\_schema {#format-schema} diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 901c4fd0b03..6d3437defb2 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -135,7 +135,7 @@ class IColumn; \ M(Bool, force_index_by_date, 0, "Throw an exception if there is a partition key in a table, and it is not used.", 0) \ M(Bool, force_primary_key, 0, "Throw an exception if there is primary key in a table, and it is not used.", 0) \ - M(String, force_data_skipping_indices, "", "Comma separated list of data skipping indices that should be used, otherwise an exception will be thrown.", 0) \ + M(String, force_data_skipping_indices, "", "Comma separated list of strings or literals with the name of the data skipping indices that should be used during query execution, otherwise an exception will be thrown.", 0) \ \ M(Float, max_streams_to_max_threads_ratio, 1, "Allows you to use more sources than the number of threads - to more evenly distribute work across threads. It is assumed that this is a temporary solution, since it will be possible in the future to make the number of sources equal to the number of threads, but for each source to dynamically select available work for itself.", 0) \ M(Float, max_streams_multiplier_for_merge_tables, 5, "Ask more streams when reading from Merge table. Streams will be spread across tables that Merge table will use. This allows more even distribution of work across threads and especially helpful when merged tables differ in size.", 0) \ diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 09ab3201085..f30e37d3e31 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -1,5 +1,4 @@ #include /// For calculations related to sampling coefficients. -#include #include #include #include @@ -20,6 +19,7 @@ #include #include #include +#include #include #include @@ -61,6 +61,7 @@ namespace ErrorCodes extern const int ILLEGAL_COLUMN; extern const int ARGUMENT_OUT_OF_BOUND; extern const int TOO_MANY_ROWS; + extern const int CANNOT_PARSE_TEXT; } @@ -556,20 +557,27 @@ Pipe MergeTreeDataSelectExecutor::readFromParts( if (settings.force_data_skipping_indices.changed) { + const auto & indices = settings.force_data_skipping_indices.toString(); + + Strings forced_indices; + { + Tokens tokens(&indices[0], &indices[indices.size()], settings.max_query_size); + IParser::Pos pos(tokens, settings.max_parser_depth); + Expected expected; + if (!parseIdentifiersOrStringLiterals(pos, expected, forced_indices)) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, + "Cannot parse force_data_skipping_indices ('{}')", indices); + } + + if (forced_indices.empty()) + throw Exception(ErrorCodes::CANNOT_PARSE_TEXT, "No indices parsed from force_data_skipping_indices ('{}')", indices); + std::unordered_set useful_indices_names; for (const auto & useful_index : useful_indices) useful_indices_names.insert(useful_index.first->index.name); - std::vector forced_indices; - boost::split(forced_indices, - settings.force_data_skipping_indices.toString(), - [](char c){ return c == ','; }); - for (const auto & index_name : forced_indices) { - if (index_name.empty()) - continue; - if (!useful_indices_names.count(index_name)) { throw Exception(ErrorCodes::INDEX_NOT_USED, diff --git a/tests/queries/0_stateless/01515_force_data_skipping_indices.sql b/tests/queries/0_stateless/01515_force_data_skipping_indices.sql index 7a78cfe5ab5..5e45018707d 100644 --- a/tests/queries/0_stateless/01515_force_data_skipping_indices.sql +++ b/tests/queries/0_stateless/01515_force_data_skipping_indices.sql @@ -10,19 +10,22 @@ CREATE TABLE data_01515 Engine=MergeTree() ORDER BY key; -SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; +SELECT * FROM data_01515; +SELECT * FROM data_01515 SETTINGS force_data_skipping_indices=''; -- { serverError 6 } SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_idx'; -- { serverError 277 } SELECT * FROM data_01515 SETTINGS force_data_skipping_indices='d1_null_idx'; -- { serverError 277 } SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_idx'; -SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=',d1_idx,'; -SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=',,d1_idx,,'; +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='`d1_idx`'; +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=' d1_idx '; +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=' d1_idx '; SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_idx,d1_null_idx'; -- { serverError 277 } SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_null_idx,d1_idx'; -- { serverError 277 } SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_null_idx,d1_idx,,'; -- { serverError 277 } -SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=',,d1_null_idx,d1_idx'; -- { serverError 277 } +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=' d1_null_idx,d1_idx'; -- { serverError 277 } +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=' `d1_null_idx`,d1_idx'; -- { serverError 277 } SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices='d1_null_idx'; -- { serverError 277 } -SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=',,d1_null_idx,,'; -- { serverError 277 } +SELECT * FROM data_01515 WHERE d1 = 0 SETTINGS force_data_skipping_indices=' d1_null_idx '; -- { serverError 277 } SELECT * FROM data_01515 WHERE d1_null = 0 SETTINGS force_data_skipping_indices='d1_null_idx'; -- { serverError 277 } SELECT * FROM data_01515 WHERE assumeNotNull(d1_null) = 0 SETTINGS force_data_skipping_indices='d1_null_idx';