From 041533eae204a2bfc478ed551e3554032d940ef4 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 16 Jun 2020 21:49:04 +0300 Subject: [PATCH 1/8] Disable optimize_skip_unused_shards if sharding_key has non-deterministic func Example of such functions is rand() And this patch disables only optimize_skip_unused_shards, i.e. INSERT code path does not changed, so it will work as before. --- src/Storages/StorageDistributed.cpp | 18 +++++++++++++++++- src/Storages/StorageDistributed.h | 1 + ...01071_force_optimize_skip_unused_shards.sql | 6 ++++++ ...nused_shards_no_non_deterministic.reference | 0 ...skip_unused_shards_no_non_deterministic.sql | 10 ++++++++++ 5 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.reference create mode 100644 tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index 77ed0470d4a..d434aa4b0b9 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -188,6 +189,18 @@ ExpressionActionsPtr buildShardingKeyExpression(const ASTPtr & sharding_key, con return ExpressionAnalyzer(query, syntax_result, context).getActions(project); } +bool isExpressionActionsDeterministics(const ExpressionActionsPtr & actions) +{ + for (const auto & action : actions->getActions()) + { + if (action.type != ExpressionAction::APPLY_FUNCTION) + continue; + if (!action.function_base->isDeterministic()) + return false; + } + return true; +} + class ReplacingConstantExpressionsMatcher { public: @@ -292,6 +305,7 @@ StorageDistributed::StorageDistributed( { sharding_key_expr = buildShardingKeyExpression(sharding_key_, *global_context, getColumns().getAllPhysical(), false); sharding_key_column_name = sharding_key_->getColumnName(); + sharding_key_is_deterministic = isExpressionActionsDeterministics(sharding_key_expr); } if (!relative_data_path.empty()) @@ -687,7 +701,7 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons ClusterPtr cluster = getCluster(); const Settings & settings = context.getSettingsRef(); - if (has_sharding_key) + if (has_sharding_key && sharding_key_is_deterministic) { ClusterPtr optimized = skipUnusedShards(cluster, query_ptr, context); if (optimized) @@ -700,6 +714,8 @@ ClusterPtr StorageDistributed::getOptimizedCluster(const Context & context, cons std::stringstream exception_message; if (!has_sharding_key) exception_message << "No sharding key"; + else if (sharding_key_is_deterministic) + exception_message << "Sharding key is not deterministic"; else exception_message << "Sharding key " << sharding_key_column_name << " is not used"; diff --git a/src/Storages/StorageDistributed.h b/src/Storages/StorageDistributed.h index ecd2b17b48e..02da81a1172 100644 --- a/src/Storages/StorageDistributed.h +++ b/src/Storages/StorageDistributed.h @@ -142,6 +142,7 @@ public: const String cluster_name; bool has_sharding_key; + bool sharding_key_is_deterministic = false; ExpressionActionsPtr sharding_key_expr; String sharding_key_column_name; diff --git a/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql b/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql index dbbc2c735ce..98878f4fdd8 100644 --- a/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql +++ b/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql @@ -24,6 +24,12 @@ set force_optimize_skip_unused_shards=1; select * from dist_01071; -- { serverError 507 } set force_optimize_skip_unused_shards=2; select * from dist_01071; -- { serverError 507 } +drop table if exists dist_01071; + +-- non deterministic function (i.e. rand()) +create table dist_01071 as data_01071 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01071, key + rand()); +set force_optimize_skip_unused_shards=1; +select * from dist_01071 where key = 0; -- { serverError 507 } drop table if exists data_01071; drop table if exists dist_01071; diff --git a/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.reference b/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql b/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql new file mode 100644 index 00000000000..ca58f7be94c --- /dev/null +++ b/tests/queries/0_stateless/01320_optimize_skip_unused_shards_no_non_deterministic.sql @@ -0,0 +1,10 @@ +drop table if exists data_01320; +drop table if exists dist_01320; + +create table data_01320 (key Int) Engine=Null(); +-- non deterministic function (i.e. rand()) +create table dist_01320 as data_01320 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01320, key + rand()); + +set optimize_skip_unused_shards=1; +set force_optimize_skip_unused_shards=1; +select * from dist_01320 where key = 0; -- { serverError 507 } From 65072473284812856e032eded7c8a356563dee7c Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 16 Jun 2020 22:02:06 +0300 Subject: [PATCH 2/8] optimize_skip_unused_shards=2 will disable it for nested distributed queries P.S. Looks like settings can be converted between SettingUInt64 and SettingBool without breaking binary protocol. FWIW maybe it is a good idea to change the semantics of the settings as follow (but I guess that changing semantic is not a good idea, better to add new settings and deprecate old ones): - optimize_skip_unused_shards -- accept nesting level on which the optimization will work - force_skip_optimize_shards_nesting -- accept nesting level on which the optimization will work --- docs/en/operations/settings/settings.md | 3 ++- docs/ru/operations/settings/settings.md | 3 ++- src/Core/Settings.h | 2 +- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++++++++ ...optimize_skip_unused_shards_no_nested.reference | 0 ...01319_optimize_skip_unused_shards_no_nested.sql | 14 ++++++++++++++ 6 files changed, 27 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference create mode 100644 tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 237058f1b83..89a3e60b6e7 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1121,7 +1121,8 @@ Enables or disables skipping of unused shards for [SELECT](../../sql-reference/s Possible values: - 0 — Disabled. -- 1 — Enabled. +- 1 — Enabled, including nested `Distributed()` tables. +- 2 — Enabled, excluding nested `Distributed()` tables. Default value: 0 diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 5e34affcaac..05492700ee7 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1032,7 +1032,8 @@ ClickHouse генерирует исключение Возможные значения: - 0 — Выключена. -- 1 — Включена. +- 1 — Включена, включая вложенные `Distributed` таблицы. +- 2 — Включена, исключая вложенные `Distributed` таблицы. Значение по умолчанию: 0 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index 1f3a8f42400..daef73a002f 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -121,7 +121,7 @@ struct Settings : public SettingsCollection \ M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \ M(SettingBool, parallel_distributed_insert_select, false, "If true, distributed insert select query in the same cluster will be processed on local tables on every shard", 0) \ - M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ + M(SettingUInt64, optimize_skip_unused_shards, 0, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key (if 1 - includes nested Distributed, 2 - disable for nested Distributed).", 0) \ M(SettingBool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avodiing costly aggregation on the initiator server).", 0) \ M(SettingUInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \ M(SettingBool, force_optimize_skip_unused_shards_no_nested, false, "Do not apply force_optimize_skip_unused_shards for nested Distributed tables.", 0) \ diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index fa977249eaa..64aae175598 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -17,6 +17,8 @@ namespace ClusterProxy Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings) { + static const UInt64 OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED = 2; + Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -34,6 +36,12 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.force_optimize_skip_unused_shards.changed = false; } + if (settings.optimize_skip_unused_shards == OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED) + { + new_settings.optimize_skip_unused_shards = 0; + new_settings.optimize_skip_unused_shards.changed = false; + } + Context new_context(context); new_context.setSettings(new_settings); diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql new file mode 100644 index 00000000000..293ab42dcf4 --- /dev/null +++ b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql @@ -0,0 +1,14 @@ +drop table if exists data_01319; +drop table if exists dist_01319; +drop table if exists dist_layer_01319; + +create table data_01319 (key Int, sub_key Int) Engine=Null(); + +set force_optimize_skip_unused_shards=2; +set optimize_skip_unused_shards=1; + +create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key%2); +create table dist_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), dist_layer_01319, key%2); +select * from dist_01319 where key = 1; -- { serverError 507 } +set optimize_skip_unused_shards=2; -- no nested +select * from dist_01319 where key = 1; From d34e6217bcf325f5f2273c079d4b1b9d3ac87c0f Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jun 2020 21:45:39 +0300 Subject: [PATCH 3/8] Add logging of adjusting conditional settings for distributed queries --- src/Interpreters/ClusterProxy/executeQuery.cpp | 14 +++++++++++--- src/Interpreters/ClusterProxy/executeQuery.h | 4 ++-- src/Storages/StorageDistributed.cpp | 4 ++-- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 64aae175598..38ad60f30bf 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -15,7 +15,7 @@ namespace DB namespace ClusterProxy { -Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings) +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log) { static const UInt64 OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED = 2; @@ -34,12 +34,18 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin { new_settings.force_optimize_skip_unused_shards = 0; new_settings.force_optimize_skip_unused_shards.changed = false; + + if (log) + LOG_TRACE(log, "Disabling force_optimize_skip_unused_shards (due to force_optimize_skip_unused_shards_no_nested)"); } if (settings.optimize_skip_unused_shards == OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED) { new_settings.optimize_skip_unused_shards = 0; new_settings.optimize_skip_unused_shards.changed = false; + + if (log) + LOG_TRACE(log, "Disabling optimize_skip_unused_shards (due to optimize_skip_unused_shards=2)"); } Context new_context(context); @@ -49,14 +55,16 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } Pipes executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, + IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info) { + assert(log); + Pipes res; const std::string query = queryToString(query_ast); - Context new_context = removeUserRestrictionsFromSettings(context, settings); + Context new_context = removeUserRestrictionsFromSettings(context, settings, log); ThrottlerPtr user_level_throttler; if (auto * process_list_element = context.getProcessListElement()) diff --git a/src/Interpreters/ClusterProxy/executeQuery.h b/src/Interpreters/ClusterProxy/executeQuery.h index fed8b83db03..dcbbe0c7e95 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.h +++ b/src/Interpreters/ClusterProxy/executeQuery.h @@ -21,13 +21,13 @@ class IStreamFactory; /// removes different restrictions (like max_concurrent_queries_for_user, max_memory_usage_for_user, etc.) /// from settings and creates new context with them -Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings); +Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log = nullptr); /// Execute a distributed query, creating a vector of BlockInputStreams, from which the result can be read. /// `stream_factory` object encapsulates the logic of creating streams for a different type of query /// (currently SELECT, DESCRIBE). Pipes executeQuery( - IStreamFactory & stream_factory, const ClusterPtr & cluster, + IStreamFactory & stream_factory, const ClusterPtr & cluster, Poco::Logger * log, const ASTPtr & query_ast, const Context & context, const Settings & settings, const SelectQueryInfo & query_info); } diff --git a/src/Storages/StorageDistributed.cpp b/src/Storages/StorageDistributed.cpp index d434aa4b0b9..201aeb7273b 100644 --- a/src/Storages/StorageDistributed.cpp +++ b/src/Storages/StorageDistributed.cpp @@ -519,8 +519,8 @@ Pipes StorageDistributed::read( : ClusterProxy::SelectStreamFactory( header, processed_stage, StorageID{remote_database, remote_table}, scalars, has_virtual_shard_num_column, context.getExternalTables()); - return ClusterProxy::executeQuery( - select_stream_factory, cluster, modified_query_ast, context, context.getSettingsRef(), query_info); + return ClusterProxy::executeQuery(select_stream_factory, cluster, log, + modified_query_ast, context, context.getSettingsRef(), query_info); } From 724c09a22c75bded4f043ac6d7e2616d70b54307 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jun 2020 21:45:39 +0300 Subject: [PATCH 4/8] Add missing DROP TABLE in 01319_mv_constants_bug --- tests/queries/0_stateless/01319_mv_constants_bug.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/queries/0_stateless/01319_mv_constants_bug.sql b/tests/queries/0_stateless/01319_mv_constants_bug.sql index 975a33d7b71..191183ab286 100644 --- a/tests/queries/0_stateless/01319_mv_constants_bug.sql +++ b/tests/queries/0_stateless/01319_mv_constants_bug.sql @@ -3,6 +3,7 @@ DROP TABLE IF EXISTS distributed_table_1; DROP TABLE IF EXISTS distributed_table_2; DROP TABLE IF EXISTS local_table_1; DROP TABLE IF EXISTS local_table_2; +DROP TABLE IF EXISTS local_table_merged; CREATE TABLE local_table_1 (id String) ENGINE = MergeTree ORDER BY (id); CREATE TABLE local_table_2(id String) ENGINE = MergeTree ORDER BY (id); From 0e218b0f15a502ed5c95499e13fa89f8d52fa006 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Thu, 18 Jun 2020 21:45:40 +0300 Subject: [PATCH 5/8] Improve 01319_optimize_skip_unused_shards_no_nested Before there is no check that optimize_skip_unused_shards was working for the first level, use cluster with unavalable shard to guarantee this. --- .../01319_optimize_skip_unused_shards_no_nested.sql | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql index 293ab42dcf4..6bf8e17a56c 100644 --- a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql +++ b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql @@ -7,8 +7,10 @@ create table data_01319 (key Int, sub_key Int) Engine=Null(); set force_optimize_skip_unused_shards=2; set optimize_skip_unused_shards=1; -create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key%2); -create table dist_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), dist_layer_01319, key%2); +create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key); +-- test_unavailable_shard here to check that optimize_skip_unused_shards always +-- remove some nodes from the cluster for the first nesting level +create table dist_01319 as data_01319 Engine=Distributed(test_unavailable_shard, currentDatabase(), dist_layer_01319, key+1); select * from dist_01319 where key = 1; -- { serverError 507 } set optimize_skip_unused_shards=2; -- no nested select * from dist_01319 where key = 1; From fb30629ff246c0d74a3be01f0b47f3b89201574b Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Sun, 21 Jun 2020 14:29:54 +0300 Subject: [PATCH 6/8] Add settings to control nesting level for shards skipping optimization - optimize_skip_unused_shards_nesting (allows control nesting level for shards skipping optimization) - force_skip_optimize_shards_nesting (allows control nesting level for checking was shards skipped or not) - deprecates force_skip_optimize_shards_no_nested --- docs/en/operations/settings/settings.md | 25 +++++++---- docs/es/operations/settings/settings.md | 11 ----- docs/fa/operations/settings/settings.md | 11 ----- docs/fr/operations/settings/settings.md | 11 ----- docs/ja/operations/settings/settings.md | 11 ----- docs/tr/operations/settings/settings.md | 11 ----- docs/zh/operations/settings/settings.md | 11 ----- src/Core/Settings.h | 6 ++- .../ClusterProxy/executeQuery.cpp | 44 ++++++++++++++----- ...1071_force_optimize_skip_unused_shards.sql | 2 +- ...mize_skip_unused_shards_nesting.reference} | 0 ...9_optimize_skip_unused_shards_nesting.sql} | 14 ++++-- 12 files changed, 64 insertions(+), 93 deletions(-) rename tests/queries/0_stateless/{01319_optimize_skip_unused_shards_no_nested.reference => 01319_optimize_skip_unused_shards_nesting.reference} (100%) rename tests/queries/0_stateless/{01319_optimize_skip_unused_shards_no_nested.sql => 01319_optimize_skip_unused_shards_nesting.sql} (69%) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 89a3e60b6e7..267e75436ed 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1126,6 +1126,18 @@ Possible values: Default value: 0 +## optimize\_skip\_unused\_shards\_nesting {#optimize-skip-unused-shards-nesting} + +Controls nesting level for [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)). + +Possible values: + +- 0 — Disabled. +- 1 — Enables `optimize_skip_unused_shards` only for the first level +- 2 — Enables `optimize_skip_unused_shards` up to the second level + +Default value: 0 + ## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards} Enables or disables query execution if [optimize\_skip\_unused\_shards](#optimize-skip-unused-shards) is enabled and skipping of unused shards is not possible. If the skipping is not possible and the setting is enabled, an exception will be thrown. @@ -1138,16 +1150,13 @@ Possible values: Default value: 0 -## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} +## force\_optimize\_skip\_unused\_shards\_nesting {#settings-force_optimize_skip_unused_shards_nesting} -Reset [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) for nested `Distributed` table +Controls nesting level for [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)). -Possible values: - -- 1 — Enabled. -- 0 — Disabled. - -Default value: 0. +- Type: unsigned int +- 1 — Enables `force_optimize_skip_unused_shards_nesting` only for the first level +- 2 — Enables `force_optimize_skip_unused_shards_nesting` up to the second level ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} diff --git a/docs/es/operations/settings/settings.md b/docs/es/operations/settings/settings.md index 1989bb71036..d709bb69bc8 100644 --- a/docs/es/operations/settings/settings.md +++ b/docs/es/operations/settings/settings.md @@ -1048,17 +1048,6 @@ Valores posibles: Valor predeterminado: 0 -## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} - -Restablecer [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) para anidados `Distributed` tabla - -Valores posibles: - -- 1 — Enabled. -- 0 — Disabled. - -Valor predeterminado: 0. - ## Optize\_throw\_if\_noop {#setting-optimize_throw_if_noop} Habilita o deshabilita el lanzamiento de una excepción [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) la consulta no realizó una fusión. diff --git a/docs/fa/operations/settings/settings.md b/docs/fa/operations/settings/settings.md index 3de3f3b7230..6b820dcf5c2 100644 --- a/docs/fa/operations/settings/settings.md +++ b/docs/fa/operations/settings/settings.md @@ -1048,17 +1048,6 @@ The results of the compilation are saved in the build directory in the form of . مقدار پیشفرض: 0 -## به زور \_بهتیتیتیتی\_سکیپ\_اس\_ش\_شارد\_مایش داده میشود {#settings-force_optimize_skip_unused_shards_no_nested} - -بازنشانی [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) برای تو در تو `Distributed` جدول - -مقادیر ممکن: - -- 1 — Enabled. -- 0 — Disabled. - -مقدار پیش فرض: 0. - ## ا\_فزون\_ف\_کوپ {#setting-optimize_throw_if_noop} را قادر می سازد و یا غیر فعال پرتاب یک استثنا اگر یک [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) پرس و جو یک ادغام انجام نمی. diff --git a/docs/fr/operations/settings/settings.md b/docs/fr/operations/settings/settings.md index ab26a114bcf..06748ad8c70 100644 --- a/docs/fr/operations/settings/settings.md +++ b/docs/fr/operations/settings/settings.md @@ -1048,17 +1048,6 @@ Valeurs possibles: Valeur par défaut: 0 -## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} - -Réinitialiser [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) pour imbriquée `Distributed` table - -Valeurs possibles: - -- 1 — Enabled. -- 0 — Disabled. - -Valeur par défaut: 0. - ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} Active ou désactive le lancement d'une exception si [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) la requête n'a pas effectué de fusion. diff --git a/docs/ja/operations/settings/settings.md b/docs/ja/operations/settings/settings.md index be97c0934b7..721f161ebda 100644 --- a/docs/ja/operations/settings/settings.md +++ b/docs/ja/operations/settings/settings.md @@ -1048,17 +1048,6 @@ PREWHERE/WHEREにシャーディングキー条件があるSELECTクエリの未 デフォルト値:0 -## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} - -リセット [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) 入れ子の場合 `Distributed` テーブル - -可能な値: - -- 1 — Enabled. -- 0 — Disabled. - -デフォルト値は0です。 - ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} 例外のスローを有効または無効にします。 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) クエリがマージを実行しませんでした。 diff --git a/docs/tr/operations/settings/settings.md b/docs/tr/operations/settings/settings.md index 342c35caab2..f942da86c10 100644 --- a/docs/tr/operations/settings/settings.md +++ b/docs/tr/operations/settings/settings.md @@ -1048,17 +1048,6 @@ Olası değerler: Varsayılan değer: 0 -## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} - -Sıfırlamak [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) iç içe geçmiş için `Distributed` Tablo - -Olası değerler: - -- 1 — Enabled. -- 0 — Disabled. - -Varsayılan değer: 0. - ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} Bir özel durum atmayı etkinleştirir veya devre dışı bırakır. [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) sorgu birleştirme gerçekleştirmedi. diff --git a/docs/zh/operations/settings/settings.md b/docs/zh/operations/settings/settings.md index 07362dcaceb..01e65e92c42 100644 --- a/docs/zh/operations/settings/settings.md +++ b/docs/zh/operations/settings/settings.md @@ -1048,17 +1048,6 @@ ClickHouse生成异常 默认值:0 -## force\_optimize\_skip\_unused\_shards\_no\_nested {#settings-force_optimize_skip_unused_shards_no_nested} - -重置 [`optimize_skip_unused_shards`](#settings-force_optimize_skip_unused_shards) 对于嵌套 `Distributed` 表 - -可能的值: - -- 1 — Enabled. -- 0 — Disabled. - -默认值:0。 - ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} 启用或禁用抛出异常,如果 [OPTIMIZE](../../sql-reference/statements/misc.md#misc_operations-optimize) 查询未执行合并。 diff --git a/src/Core/Settings.h b/src/Core/Settings.h index daef73a002f..2a58a79d8dc 100644 --- a/src/Core/Settings.h +++ b/src/Core/Settings.h @@ -121,10 +121,11 @@ struct Settings : public SettingsCollection \ M(SettingBool, distributed_group_by_no_merge, false, "Do not merge aggregation states from different servers for distributed query processing - in case it is for certain that there are different keys on different shards.", 0) \ M(SettingBool, parallel_distributed_insert_select, false, "If true, distributed insert select query in the same cluster will be processed on local tables on every shard", 0) \ - M(SettingUInt64, optimize_skip_unused_shards, 0, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key (if 1 - includes nested Distributed, 2 - disable for nested Distributed).", 0) \ M(SettingBool, optimize_distributed_group_by_sharding_key, false, "Optimize GROUP BY sharding_key queries (by avodiing costly aggregation on the initiator server).", 0) \ + M(SettingBool, optimize_skip_unused_shards, false, "Assumes that data is distributed by sharding_key. Optimization to skip unused shards if SELECT query filters by sharding_key.", 0) \ M(SettingUInt64, force_optimize_skip_unused_shards, 0, "Throw an exception if unused shards cannot be skipped (1 - throw only if the table has the sharding key, 2 - always throw.", 0) \ - M(SettingBool, force_optimize_skip_unused_shards_no_nested, false, "Do not apply force_optimize_skip_unused_shards for nested Distributed tables.", 0) \ + M(SettingUInt64, optimize_skip_unused_shards_nesting, 0, "Same as optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \ + M(SettingUInt64, force_optimize_skip_unused_shards_nesting, 0, "Same as force_optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \ \ M(SettingBool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \ M(SettingUInt64, min_chunk_bytes_for_parallel_parsing, (10 * 1024 * 1024), "The minimum chunk size in bytes, which each thread will parse in parallel.", 0) \ @@ -395,6 +396,7 @@ struct Settings : public SettingsCollection M(SettingBool, partial_merge_join, false, "Obsolete. Use join_algorithm='prefer_partial_merge' instead.", 0) \ M(SettingUInt64, max_memory_usage_for_all_queries, 0, "Obsolete. Will be removed after 2020-10-20", 0) \ \ + M(SettingBool, force_optimize_skip_unused_shards_no_nested, false, "Obsolete setting, does nothing. Will be removed after 2020-12-01. Use force_optimize_skip_unused_shards_nesting instead.", 0) \ M(SettingBool, experimental_use_processors, true, "Obsolete setting, does nothing. Will be removed after 2020-11-29.", 0) #define FORMAT_FACTORY_SETTINGS(M) \ diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 38ad60f30bf..1af154d0180 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -17,8 +17,6 @@ namespace ClusterProxy Context removeUserRestrictionsFromSettings(const Context & context, const Settings & settings, Poco::Logger * log) { - static const UInt64 OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED = 2; - Settings new_settings = settings; new_settings.queue_max_wait_ms = Cluster::saturate(new_settings.queue_max_wait_ms, settings.max_execution_time); @@ -30,22 +28,44 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin new_settings.max_concurrent_queries_for_user.changed = false; new_settings.max_memory_usage_for_user.changed = false; - if (settings.force_optimize_skip_unused_shards_no_nested) + if (settings.force_optimize_skip_unused_shards_nesting) { - new_settings.force_optimize_skip_unused_shards = 0; - new_settings.force_optimize_skip_unused_shards.changed = false; + if (new_settings.force_optimize_skip_unused_shards_nesting == 1) + { + new_settings.force_optimize_skip_unused_shards = 0; + new_settings.force_optimize_skip_unused_shards.changed = false; - if (log) - LOG_TRACE(log, "Disabling force_optimize_skip_unused_shards (due to force_optimize_skip_unused_shards_no_nested)"); + if (log) + LOG_TRACE(log, "Disabling force_optimize_skip_unused_shards for nested queries (force_optimize_skip_unused_shards_nesting exceeded)"); + } + else + { + new_settings.force_optimize_skip_unused_shards_nesting.value--; + new_settings.force_optimize_skip_unused_shards_nesting.changed = true; + + if (log) + LOG_TRACE(log, "force_optimize_skip_unused_shards_nesting is now {}", new_settings.force_optimize_skip_unused_shards_nesting); + } } - if (settings.optimize_skip_unused_shards == OPTIMIZE_SKIP_UNUSED_SHARDS_NO_NESTED) + if (settings.optimize_skip_unused_shards_nesting) { - new_settings.optimize_skip_unused_shards = 0; - new_settings.optimize_skip_unused_shards.changed = false; + if (new_settings.optimize_skip_unused_shards_nesting == 1) + { + new_settings.optimize_skip_unused_shards = 0; + new_settings.optimize_skip_unused_shards.changed = false; - if (log) - LOG_TRACE(log, "Disabling optimize_skip_unused_shards (due to optimize_skip_unused_shards=2)"); + if (log) + LOG_TRACE(log, "Disabling optimize_skip_unused_shards for nested queries (optimize_skip_unused_shards_nesting exceeded)"); + } + else + { + new_settings.optimize_skip_unused_shards_nesting.value--; + new_settings.optimize_skip_unused_shards_nesting.changed = true; + + if (log) + LOG_TRACE(log, "optimize_skip_unused_shards_nesting is now {}", new_settings.optimize_skip_unused_shards_nesting); + } } Context new_context(context); diff --git a/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql b/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql index 98878f4fdd8..33806cc7416 100644 --- a/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql +++ b/tests/queries/0_stateless/01071_force_optimize_skip_unused_shards.sql @@ -41,7 +41,7 @@ create table data2_01071 (key Int, sub_key Int) Engine=Null(); create table dist2_layer_01071 as data2_01071 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data2_01071, sub_key%2); create table dist2_01071 as data2_01071 Engine=Distributed(test_cluster_two_shards, currentDatabase(), dist2_layer_01071, key%2); select * from dist2_01071 where key = 1; -- { serverError 507 } -set force_optimize_skip_unused_shards_no_nested=1; +set force_optimize_skip_unused_shards_nesting=1; select * from dist2_01071 where key = 1; drop table if exists data2_01071; drop table if exists dist2_layer_01071; diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_nesting.reference similarity index 100% rename from tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.reference rename to tests/queries/0_stateless/01319_optimize_skip_unused_shards_nesting.reference diff --git a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_nesting.sql similarity index 69% rename from tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql rename to tests/queries/0_stateless/01319_optimize_skip_unused_shards_nesting.sql index 6bf8e17a56c..b8a48c27e5f 100644 --- a/tests/queries/0_stateless/01319_optimize_skip_unused_shards_no_nested.sql +++ b/tests/queries/0_stateless/01319_optimize_skip_unused_shards_nesting.sql @@ -4,13 +4,19 @@ drop table if exists dist_layer_01319; create table data_01319 (key Int, sub_key Int) Engine=Null(); -set force_optimize_skip_unused_shards=2; -set optimize_skip_unused_shards=1; - create table dist_layer_01319 as data_01319 Engine=Distributed(test_cluster_two_shards, currentDatabase(), data_01319, sub_key); -- test_unavailable_shard here to check that optimize_skip_unused_shards always -- remove some nodes from the cluster for the first nesting level create table dist_01319 as data_01319 Engine=Distributed(test_unavailable_shard, currentDatabase(), dist_layer_01319, key+1); + +set optimize_skip_unused_shards=1; +set force_optimize_skip_unused_shards=1; + +set force_optimize_skip_unused_shards_nesting=2; +set optimize_skip_unused_shards_nesting=2; select * from dist_01319 where key = 1; -- { serverError 507 } -set optimize_skip_unused_shards=2; -- no nested +set force_optimize_skip_unused_shards_nesting=1; +select * from dist_01319 where key = 1; +set force_optimize_skip_unused_shards_nesting=2; +set optimize_skip_unused_shards_nesting=1; select * from dist_01319 where key = 1; From 38b018fa0435aab45079da0e3f3f116211aeb44f Mon Sep 17 00:00:00 2001 From: alexey-milovidov Date: Sun, 21 Jun 2020 21:29:11 +0300 Subject: [PATCH 7/8] Update executeQuery.cpp --- src/Interpreters/ClusterProxy/executeQuery.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/Interpreters/ClusterProxy/executeQuery.cpp b/src/Interpreters/ClusterProxy/executeQuery.cpp index 1af154d0180..aac78b755da 100644 --- a/src/Interpreters/ClusterProxy/executeQuery.cpp +++ b/src/Interpreters/ClusterProxy/executeQuery.cpp @@ -32,7 +32,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin { if (new_settings.force_optimize_skip_unused_shards_nesting == 1) { - new_settings.force_optimize_skip_unused_shards = 0; + new_settings.force_optimize_skip_unused_shards = false; new_settings.force_optimize_skip_unused_shards.changed = false; if (log) @@ -40,7 +40,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } else { - new_settings.force_optimize_skip_unused_shards_nesting.value--; + --new_settings.force_optimize_skip_unused_shards_nesting.value; new_settings.force_optimize_skip_unused_shards_nesting.changed = true; if (log) @@ -52,7 +52,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin { if (new_settings.optimize_skip_unused_shards_nesting == 1) { - new_settings.optimize_skip_unused_shards = 0; + new_settings.optimize_skip_unused_shards = false; new_settings.optimize_skip_unused_shards.changed = false; if (log) @@ -60,7 +60,7 @@ Context removeUserRestrictionsFromSettings(const Context & context, const Settin } else { - new_settings.optimize_skip_unused_shards_nesting.value--; + --new_settings.optimize_skip_unused_shards_nesting.value; new_settings.optimize_skip_unused_shards_nesting.changed = true; if (log) From b0779dbfe7d17203267e9e3e918b4761c566c0ae Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Tue, 23 Jun 2020 21:07:41 +0300 Subject: [PATCH 8/8] Fix documentation --- docs/en/operations/settings/settings.md | 23 ++++++++------- docs/ru/operations/settings/settings.md | 37 ++++++++++++++++++++----- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/docs/en/operations/settings/settings.md b/docs/en/operations/settings/settings.md index 267e75436ed..422a4ba87db 100644 --- a/docs/en/operations/settings/settings.md +++ b/docs/en/operations/settings/settings.md @@ -1121,20 +1121,19 @@ Enables or disables skipping of unused shards for [SELECT](../../sql-reference/s Possible values: - 0 — Disabled. -- 1 — Enabled, including nested `Distributed()` tables. -- 2 — Enabled, excluding nested `Distributed()` tables. +- 1 — Enabled. Default value: 0 ## optimize\_skip\_unused\_shards\_nesting {#optimize-skip-unused-shards-nesting} -Controls nesting level for [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)). +Controls [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (hence still requires [`optimize_skip_unused_shards`](#optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table). Possible values: -- 0 — Disabled. -- 1 — Enables `optimize_skip_unused_shards` only for the first level -- 2 — Enables `optimize_skip_unused_shards` up to the second level +- 0 — Disabled, `optimize_skip_unused_shards` works always. +- 1 — Enables `optimize_skip_unused_shards` only for the first level. +- 2 — Enables `optimize_skip_unused_shards` up to the second level. Default value: 0 @@ -1152,11 +1151,15 @@ Default value: 0 ## force\_optimize\_skip\_unused\_shards\_nesting {#settings-force_optimize_skip_unused_shards_nesting} -Controls nesting level for [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)). +Controls [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (hence still requires [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards)) depends on the nesting level of the distributed query (case when you have `Distributed` table that look into another `Distributed` table). -- Type: unsigned int -- 1 — Enables `force_optimize_skip_unused_shards_nesting` only for the first level -- 2 — Enables `force_optimize_skip_unused_shards_nesting` up to the second level +Possible values: + +- 0 - Disabled, `force_optimize_skip_unused_shards` works always. +- 1 — Enables `force_optimize_skip_unused_shards` only for the first level. +- 2 — Enables `force_optimize_skip_unused_shards` up to the second level. + +Default value: 0 ## optimize\_throw\_if\_noop {#setting-optimize_throw_if_noop} diff --git a/docs/ru/operations/settings/settings.md b/docs/ru/operations/settings/settings.md index 05492700ee7..bd29a0cbb67 100644 --- a/docs/ru/operations/settings/settings.md +++ b/docs/ru/operations/settings/settings.md @@ -1025,27 +1025,50 @@ ClickHouse генерирует исключение Значение по умолчанию: 0. -## optimize_skip_unused_shards {#optimize-skip-unused-shards} +## optimize\_skip\_unused\_shards {#optimize-skip-unused-shards} Включает или отключает пропуск неиспользуемых шардов для запросов [SELECT](../../sql-reference/statements/select/index.md) , в которых условие ключа шардирования задано в секции `WHERE/PREWHERE`. Предполагается, что данные распределены с помощью ключа шардирования, в противном случае настройка ничего не делает. Возможные значения: - 0 — Выключена. -- 1 — Включена, включая вложенные `Distributed` таблицы. -- 2 — Включена, исключая вложенные `Distributed` таблицы. +- 1 — Включена. Значение по умолчанию: 0 -## force_optimize_skip_unused_shards {#force-optimize-skip-unused-shards} +## optimize\_skip\_unused\_shards\_nesting {#optimize-skip-unused-shards-nesting} + +Контролирует настройку [`optimize_skip_unused_shards`](#optimize-skip-unused-shards) (поэтому все еще требует `optimize_skip_unused_shards`) в зависимости от вложенности распределенного запроса (когда у вас есть `Distributed` таблица которая смотрит на другую `Distributed` таблицу). + +Возможные значения: + +- 0 — Выключена, `optimize_skip_unused_shards` работает всегда. +- 1 — Включает `optimize_skip_unused_shards` только для 1-ого уровня вложенности. +- 2 — Включает `optimize_skip_unused_shards` для 1-ого и 2-ого уровня вложенности. + +Значение по умолчанию: 0 + +## force\_optimize\_skip\_unused\_shards {#force-optimize-skip-unused-shards} Разрешает или запрещает выполнение запроса, если настройка [optimize_skip_unused_shards](#optimize-skip-unused-shards) включена, а пропуск неиспользуемых шардов невозможен. Если данная настройка включена и пропуск невозможен, ClickHouse генерирует исключение. Возможные значения: -- 0 — Выключена. ClickHouse не генерирует исключение. -- 1 — Включена. Выполнение запроса запрещается, только если у таблицы есть ключ шардирования. -- 2 — Включена. Выполнение запроса запрещается, даже если для таблицы не определен ключ шардирования. +- 0 — Выключена, `force_optimize_skip_unused_shards` работает всегда. +- 1 — Включает `force_optimize_skip_unused_shards` только для 1-ого уровня вложенности. +- 2 — Включает `force_optimize_skip_unused_shards` для 1-ого и 2-ого уровня вложенности. + +Значение по умолчанию: 0 + +## force\_optimize\_skip\_unused\_shards\_nesting {#settings-force_optimize_skip_unused_shards_nesting} + +Контролирует настройку [`force_optimize_skip_unused_shards`](#force-optimize-skip-unused-shards) (поэтому все еще требует `optimize_skip_unused_shards`) в зависимости от вложенности распределенного запроса (когда у вас есть `Distributed` таблица которая смотрит на другую `Distributed` таблицу). + +Возможные значения: + +- 0 - Disabled, `force_optimize_skip_unused_shards` works on all levels. +- 1 — Enables `force_optimize_skip_unused_shards` only for the first level. +- 2 — Enables `force_optimize_skip_unused_shards` up to the second level. Значение по умолчанию: 0