From 574f26f21fbf5673fcac056d4205c33bacefb893 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 19 Nov 2024 23:16:47 +0000 Subject: [PATCH 1/5] PR: skip index analysis on followers --- .../MergeTree/MergeTreeDataSelectExecutor.cpp | 21 +++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 52ea6db787d..c47736c7c31 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -80,6 +80,7 @@ namespace Setting extern const SettingsUInt64 parallel_replica_offset; extern const SettingsUInt64 parallel_replicas_count; extern const SettingsParallelReplicasMode parallel_replicas_mode; + extern const SettingsBool parallel_replicas_local_plan; } namespace MergeTreeSetting @@ -631,10 +632,26 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd bool use_skip_indexes, bool find_exact_ranges) { - RangesInDataParts parts_with_ranges; - parts_with_ranges.resize(parts.size()); const Settings & settings = context->getSettingsRef(); + if (context->canUseParallelReplicasOnFollower() && settings[Setting::parallel_replicas_local_plan]) + { + RangesInDataParts parts_with_ranges; + parts_with_ranges.reserve(parts.size()); + for (size_t part_index = 0; part_index < parts.size(); ++part_index) + { + const auto & part = parts[part_index]; + // LOG_DEBUG(getLogger(__PRETTY_FUNCTION__), "part {}", part->getNameWithState()); + + MarkRanges ranges; + ranges.emplace_back(0, part->getMarksCount()); + parts_with_ranges.emplace_back(part, part_index, std::move(ranges)); + } + return parts_with_ranges; + } + + RangesInDataParts parts_with_ranges; + parts_with_ranges.resize(parts.size()); if (use_skip_indexes && settings[Setting::force_data_skipping_indices].changed) { const auto & indices_str = settings[Setting::force_data_skipping_indices].toString(); From 4fc32ea96fb59bcc95ef6a9a471f47f3c7ace1b6 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Tue, 19 Nov 2024 23:52:54 +0000 Subject: [PATCH 2/5] Polish --- src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index c47736c7c31..36bc7588542 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -650,8 +650,6 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd return parts_with_ranges; } - RangesInDataParts parts_with_ranges; - parts_with_ranges.resize(parts.size()); if (use_skip_indexes && settings[Setting::force_data_skipping_indices].changed) { const auto & indices_str = settings[Setting::force_data_skipping_indices].toString(); @@ -690,6 +688,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd std::atomic sum_marks_pk = 0; std::atomic sum_parts_pk = 0; + RangesInDataParts parts_with_ranges(parts.size()); + /// Let's find what range to read from each part. { auto mark_cache = context->getIndexMarkCache(); From 5a95219e89aa9ac9f8d56980073d8bc18aea18d1 Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 20 Nov 2024 22:07:57 +0000 Subject: [PATCH 3/5] Add setting parallel_replicas_skip_index_analysis_on_workers --- src/Core/Settings.cpp | 3 +++ src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp | 4 +++- .../integration/test_parallel_replicas_all_marks_read/test.py | 1 + 3 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/Core/Settings.cpp b/src/Core/Settings.cpp index 140a77011dd..2cd3d272490 100644 --- a/src/Core/Settings.cpp +++ b/src/Core/Settings.cpp @@ -5652,6 +5652,9 @@ Parts virtually divided into segments to be distributed between replicas for par )", BETA) \ DECLARE(Bool, parallel_replicas_local_plan, true, R"( Build local plan for local replica +)", BETA) \ + DECLARE(Bool, parallel_replicas_skip_index_analysis_on_workers, true, R"( +Skip index analysis on workers. Effective only with enabled parallel_replicas_local_plan )", BETA) \ \ DECLARE(Bool, allow_experimental_analyzer, true, R"( diff --git a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp index 36bc7588542..f1c0a781502 100644 --- a/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp +++ b/src/Storages/MergeTree/MergeTreeDataSelectExecutor.cpp @@ -81,6 +81,7 @@ namespace Setting extern const SettingsUInt64 parallel_replicas_count; extern const SettingsParallelReplicasMode parallel_replicas_mode; extern const SettingsBool parallel_replicas_local_plan; + extern const SettingsBool parallel_replicas_skip_index_analysis_on_workers; } namespace MergeTreeSetting @@ -634,7 +635,8 @@ RangesInDataParts MergeTreeDataSelectExecutor::filterPartsByPrimaryKeyAndSkipInd { const Settings & settings = context->getSettingsRef(); - if (context->canUseParallelReplicasOnFollower() && settings[Setting::parallel_replicas_local_plan]) + if (context->canUseParallelReplicasOnFollower() && settings[Setting::parallel_replicas_local_plan] + && settings[Setting::parallel_replicas_skip_index_analysis_on_workers]) { RangesInDataParts parts_with_ranges; parts_with_ranges.reserve(parts.size()); diff --git a/tests/integration/test_parallel_replicas_all_marks_read/test.py b/tests/integration/test_parallel_replicas_all_marks_read/test.py index 593b98126df..9f91a980474 100644 --- a/tests/integration/test_parallel_replicas_all_marks_read/test.py +++ b/tests/integration/test_parallel_replicas_all_marks_read/test.py @@ -71,6 +71,7 @@ def _get_result_with_parallel_replicas( "cluster_for_parallel_replicas": f"{cluster_name}", "parallel_replicas_mark_segment_size": parallel_replicas_mark_segment_size, "query_id": query_id, + "parallel_replicas_skip_index_analysis_on_workers": True, }, ) From c0195f5482ddae3ac5b8a76d04b3c9c0ad1cbb6e Mon Sep 17 00:00:00 2001 From: Igor Nikonov Date: Wed, 20 Nov 2024 22:56:36 +0000 Subject: [PATCH 4/5] Update settings history --- src/Core/SettingsChangesHistory.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Core/SettingsChangesHistory.cpp b/src/Core/SettingsChangesHistory.cpp index f0d3e001362..c4cf5cf22a1 100644 --- a/src/Core/SettingsChangesHistory.cpp +++ b/src/Core/SettingsChangesHistory.cpp @@ -90,6 +90,7 @@ static std::initializer_list Date: Wed, 20 Nov 2024 22:57:09 +0000 Subject: [PATCH 5/5] Fix test_parallel_replicas_all_marks_read --- tests/integration/test_parallel_replicas_all_marks_read/test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/test_parallel_replicas_all_marks_read/test.py b/tests/integration/test_parallel_replicas_all_marks_read/test.py index 9f91a980474..92317afabbe 100644 --- a/tests/integration/test_parallel_replicas_all_marks_read/test.py +++ b/tests/integration/test_parallel_replicas_all_marks_read/test.py @@ -71,7 +71,7 @@ def _get_result_with_parallel_replicas( "cluster_for_parallel_replicas": f"{cluster_name}", "parallel_replicas_mark_segment_size": parallel_replicas_mark_segment_size, "query_id": query_id, - "parallel_replicas_skip_index_analysis_on_workers": True, + "parallel_replicas_skip_index_analysis_on_workers": False, }, )