From 1ccf10505acfc9d564af9e83670156855bab7c4c Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Tue, 13 Oct 2020 21:25:45 +0300 Subject: [PATCH] add tests and comments --- .../MergeTree/MergeTreeDataMergerMutator.cpp | 6 +++++- .../MergeTree/MergeTreeDataMergerMutator.h | 4 +++- src/Storages/StorageMergeTree.cpp | 4 ++++ src/Storages/StorageReplicatedMergeTree.cpp | 10 +++++++++- ...3_optimize_skip_merged_partitions.reference | 2 ++ .../01533_optimize_skip_merged_partitions.sql | 18 ++++++++++++++++++ 6 files changed, 41 insertions(+), 3 deletions(-) create mode 100644 tests/queries/0_stateless/01533_optimize_skip_merged_partitions.reference create mode 100644 tests/queries/0_stateless/01533_optimize_skip_merged_partitions.sql diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp index 3c1a40342de..ac08cd31dd2 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.cpp @@ -358,7 +358,7 @@ bool MergeTreeDataMergerMutator::selectAllPartsToMergeWithinPartition( const AllowedMergingPredicate & can_merge, const String & partition_id, bool final, - bool * is_single_merged_part, + bool * is_single_merged_part, String * out_disable_reason) { MergeTreeData::DataPartsVector parts = selectAllPartsFromPartition(partition_id); @@ -373,6 +373,8 @@ bool MergeTreeDataMergerMutator::selectAllPartsToMergeWithinPartition( return false; } + /// If final, optimize_skip_merged_partitions is true and we have only one part in partition with level > 0 + /// than we don't select it to merge if (final && data.getSettings()->optimize_skip_merged_partitions && parts.size() == 1 && parts[0]->info.level > 0) { *is_single_merged_part = true; @@ -637,6 +639,8 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataMergerMutator::mergePartsToTempor { static const String TMP_PREFIX = "tmp_merge_"; + + if (merges_blocker.isCancelled()) throw Exception("Cancelled merging parts", ErrorCodes::ABORTED); diff --git a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h index 4118c09fa89..481fa24637b 100644 --- a/src/Storages/MergeTree/MergeTreeDataMergerMutator.h +++ b/src/Storages/MergeTree/MergeTreeDataMergerMutator.h @@ -88,7 +88,9 @@ public: String * out_disable_reason = nullptr); /** Select all the parts in the specified partition for merge, if possible. - * final - choose to merge even a single part - that is, allow to merge one part "with itself". + * final - choose to merge even a single part - that is, allow to merge one part "with itself", + * but if setting optimize_skip_merged_partitions is true (it's true as default) than single part with level > 0 + * won't be merged with itself. */ bool selectAllPartsToMergeWithinPartition( FutureMergedMutatedPart & future_part, diff --git a/src/Storages/StorageMergeTree.cpp b/src/Storages/StorageMergeTree.cpp index 6826ab8f220..574c65502f1 100644 --- a/src/Storages/StorageMergeTree.cpp +++ b/src/Storages/StorageMergeTree.cpp @@ -654,6 +654,9 @@ bool StorageMergeTree::merge( }; bool selected = false; + + /// This flag is true when there is only one part in partition, it's level > 0 + /// and setting optimize_skip_merged_partitions is true bool is_single_merged_part = false; if (partition_id.empty()) @@ -710,6 +713,7 @@ bool StorageMergeTree::merge( if (!selected) { + /// If is_single_merged_part is true we treat this part as already merged if (final && is_single_merged_part) { return true; diff --git a/src/Storages/StorageReplicatedMergeTree.cpp b/src/Storages/StorageReplicatedMergeTree.cpp index f8122f59046..19ce88ad0b8 100644 --- a/src/Storages/StorageReplicatedMergeTree.cpp +++ b/src/Storages/StorageReplicatedMergeTree.cpp @@ -3761,12 +3761,16 @@ bool StorageReplicatedMergeTree::optimize( ReplicatedMergeTreeMergePredicate can_merge = queue.getMergePredicate(zookeeper); FutureMergedMutatedPart future_merged_part; + + /// This flag is true when there is only one part in partition, it's level > 0 + /// and setting optimize_skip_merged_partitions is true bool is_single_merged_part = false; bool selected = merger_mutator.selectAllPartsToMergeWithinPartition( future_merged_part, disk_space, can_merge, partition_id, true, &is_single_merged_part, nullptr); if (!selected) { + /// If is_single_merged_part is true we treat this part as already merged if (is_single_merged_part) return true; break; @@ -3789,7 +3793,7 @@ bool StorageReplicatedMergeTree::optimize( } if (try_no == max_retries) return handle_noop("Can't create merge queue node in ZooKeeper, because log was updated in every of " - + toString(max_retries) + " tries"); + + toString(max_retries) + " tries"); } } else @@ -3803,6 +3807,9 @@ bool StorageReplicatedMergeTree::optimize( FutureMergedMutatedPart future_merged_part; String disable_reason; bool selected = false; + + /// This flag is true when there is only one part in partition, it's level > 0 + /// and setting optimize_skip_merged_partitions is true bool is_single_merged_part = false; if (!partition) { @@ -3819,6 +3826,7 @@ bool StorageReplicatedMergeTree::optimize( if (!selected) { + /// If is_single_merged_part is true we treat this part as already merged if (final && is_single_merged_part) return true; std::stringstream message; diff --git a/tests/queries/0_stateless/01533_optimize_skip_merged_partitions.reference b/tests/queries/0_stateless/01533_optimize_skip_merged_partitions.reference new file mode 100644 index 00000000000..300e1103c0f --- /dev/null +++ b/tests/queries/0_stateless/01533_optimize_skip_merged_partitions.reference @@ -0,0 +1,2 @@ +optimize_final 200001 1 1 +optimize_final 202001 1 1 diff --git a/tests/queries/0_stateless/01533_optimize_skip_merged_partitions.sql b/tests/queries/0_stateless/01533_optimize_skip_merged_partitions.sql new file mode 100644 index 00000000000..325aaa2d6bb --- /dev/null +++ b/tests/queries/0_stateless/01533_optimize_skip_merged_partitions.sql @@ -0,0 +1,18 @@ +DROP TABLE IF EXISTS optimize_final; + +CREATE TABLE optimize_final(t DateTime, x Int32) ENGINE = MergeTree() PARTITION BY toYYYYMM(t) ORDER BY x; + +INSERT INTO optimize_final SELECT toDate('2000-01-01'), number FROM numbers(5); +INSERT INTO optimize_final SELECT toDate('2000-01-01'), number + 5 FROM numbers(5); + +OPTIMIZE TABLE optimize_final FINAL; + +INSERT INTO optimize_final SELECT toDate('2020-01-01'), number FROM numbers(5); +INSERT INTO optimize_final SELECT toDate('2020-01-01'), number + 5 FROM numbers(5); + +OPTIMIZE TABLE optimize_final FINAL; + +SELECT table, partition, active, level from system.parts where table = 'optimize_final' and active = 1; + +DROP TABLE optimize_final; +