From f0163c2acfe41c78124e49582301e896ee3f8240 Mon Sep 17 00:00:00 2001 From: Pavel Kruglov Date: Fri, 12 Feb 2021 17:02:04 +0300 Subject: [PATCH] Don't create empty parts on INSERT --- src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp | 6 ++++++ src/Storages/MergeTree/MergeTreeDataWriter.cpp | 5 +++++ .../MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp | 5 +++++ .../queries/0_stateless/01560_optimize_on_insert.reference | 1 + tests/queries/0_stateless/01560_optimize_on_insert.sql | 7 +++++++ 5 files changed, 24 insertions(+) diff --git a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp index 904081cc1df..bb5644567ae 100644 --- a/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/MergeTreeBlockOutputStream.cpp @@ -29,6 +29,12 @@ void MergeTreeBlockOutputStream::write(const Block & block) Stopwatch watch; MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block, metadata_snapshot, optimize_on_insert); + + /// If optimize_on_insert setting is true, current_block could become empty after merge + /// and we didn't create part. + if (!part) + continue; + storage.renameTempPartAndAdd(part, &storage.increment); PartLog::addNewPart(storage.global_context, part, watch.elapsed()); diff --git a/src/Storages/MergeTree/MergeTreeDataWriter.cpp b/src/Storages/MergeTree/MergeTreeDataWriter.cpp index 5a9bdd90bc8..5929293d714 100644 --- a/src/Storages/MergeTree/MergeTreeDataWriter.cpp +++ b/src/Storages/MergeTree/MergeTreeDataWriter.cpp @@ -327,6 +327,11 @@ MergeTreeData::MutableDataPartPtr MergeTreeDataWriter::writeTempPart(BlockWithPa /// Size of part would not be greater than block.bytes() + epsilon size_t expected_size = block.bytes(); + /// If optimize_on_insert is true, block may become empty after merge. + /// There is no need to create empty part. + if (expected_size == 0) + return nullptr; + DB::IMergeTreeDataPart::TTLInfos move_ttl_infos; const auto & move_ttl_entries = metadata_snapshot->getMoveTTLs(); for (const auto & ttl_entry : move_ttl_entries) diff --git a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp index 7046a510f75..6f90d9f00a9 100644 --- a/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp +++ b/src/Storages/MergeTree/ReplicatedMergeTreeBlockOutputStream.cpp @@ -144,6 +144,11 @@ void ReplicatedMergeTreeBlockOutputStream::write(const Block & block) MergeTreeData::MutableDataPartPtr part = storage.writer.writeTempPart(current_block, metadata_snapshot, optimize_on_insert); + /// If optimize_on_insert setting is true, current_block could become empty after merge + /// and we didn't create part. + if (!part) + continue; + String block_id; if (deduplicate) diff --git a/tests/queries/0_stateless/01560_optimize_on_insert.reference b/tests/queries/0_stateless/01560_optimize_on_insert.reference index 7ace2043be0..477f48be7a9 100644 --- a/tests/queries/0_stateless/01560_optimize_on_insert.reference +++ b/tests/queries/0_stateless/01560_optimize_on_insert.reference @@ -11,3 +11,4 @@ Summing Merge Tree Aggregating Merge Tree 1 5 2020-01-01 00:00:00 2 5 2020-01-02 00:00:00 +Check creating empty parts diff --git a/tests/queries/0_stateless/01560_optimize_on_insert.sql b/tests/queries/0_stateless/01560_optimize_on_insert.sql index 9f6dac686bb..f64f4c75cfe 100644 --- a/tests/queries/0_stateless/01560_optimize_on_insert.sql +++ b/tests/queries/0_stateless/01560_optimize_on_insert.sql @@ -33,3 +33,10 @@ INSERT INTO aggregating_merge_tree VALUES (1, 1, '2020-01-01'), (2, 1, '2020-01- SELECT * FROM aggregating_merge_tree ORDER BY key; DROP TABLE aggregating_merge_tree; +SELECT 'Check creating empty parts'; +DROP TABLE IF EXISTS empty; +CREATE TABLE empty (key UInt32, val UInt32, date Datetime) ENGINE=SummingMergeTree(val) PARTITION BY date ORDER BY key; +INSERT INTO empty VALUES (1, 1, '2020-01-01'), (1, 1, '2020-01-01'), (1, -2, '2020-01-01'); +SELECT * FROM empty ORDER BY key; +SELECT table, partition, active FROM system.parts where table = 'empty' and active = 1; +DROP TABLE empty;