From d14cc1691cb0e9efc573856e31423a704b42f6a8 Mon Sep 17 00:00:00 2001 From: Smita Kulkarni Date: Mon, 17 Apr 2023 18:53:26 +0200 Subject: [PATCH] =?UTF-8?q?Added=20an=20option=20=E2=80=98force=E2=80=99?= =?UTF-8?q?=20to=20clearOldTemporaryDirectories,=20which=20is=20currently?= =?UTF-8?q?=20used=20by=20dropAllData=20to=20remove=20blobs=20when=20zero?= =?UTF-8?q?=20copy=20replication=20is=20enabled.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/Storages/MergeTree/MergeTreeData.cpp | 6 +++--- src/Storages/MergeTree/MergeTreeData.h | 3 ++- .../queries/0_stateless/02432_s3_parallel_parts_cleanup.sql | 2 +- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Storages/MergeTree/MergeTreeData.cpp b/src/Storages/MergeTree/MergeTreeData.cpp index 45759c449f6..5c189887e23 100644 --- a/src/Storages/MergeTree/MergeTreeData.cpp +++ b/src/Storages/MergeTree/MergeTreeData.cpp @@ -1960,7 +1960,7 @@ static bool isOldPartDirectory(const DiskPtr & disk, const String & directory_pa } -size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes) +size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes, const bool & force) { /// If the method is already called from another thread, then we don't need to do anything. std::unique_lock lock(clear_old_temporary_directories_mutex, std::defer_lock); @@ -2018,7 +2018,7 @@ size_t MergeTreeData::clearOldTemporaryDirectories(size_t custom_directories_lif /// We don't control the amount of refs for temporary parts so we cannot decide can we remove blobs /// or not. So we are not doing it bool keep_shared = false; - if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication) + if (disk->supportZeroCopyReplication() && settings->allow_remote_fs_zero_copy_replication && !force) { LOG_WARNING(log, "Since zero-copy replication is enabled we are not going to remove blobs from shared storage for {}", full_path); keep_shared = true; @@ -2724,7 +2724,7 @@ void MergeTreeData::dropAllData() } LOG_INFO(log, "dropAllData: clearing temporary directories"); - clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}); + clearOldTemporaryDirectories(0, {"tmp_", "delete_tmp_", "tmp-fetch_"}, /* force */ true); column_sizes.clear(); diff --git a/src/Storages/MergeTree/MergeTreeData.h b/src/Storages/MergeTree/MergeTreeData.h index b03b7d4a71e..3053657e37b 100644 --- a/src/Storages/MergeTree/MergeTreeData.h +++ b/src/Storages/MergeTree/MergeTreeData.h @@ -667,7 +667,8 @@ public: /// Delete all directories which names begin with "tmp" /// Must be called with locked lockForShare() because it's using relative_data_path. - size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}); + /// 'force' is used by dropAllData(), this will remove blobs even if zero copy replication is enabled + size_t clearOldTemporaryDirectories(size_t custom_directories_lifetime_seconds, const NameSet & valid_prefixes = {"tmp_", "tmp-fetch_"}, const bool & force = false); size_t clearEmptyParts(); diff --git a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql index 3688a649d5e..0230f30bf05 100644 --- a/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql +++ b/tests/queries/0_stateless/02432_s3_parallel_parts_cleanup.sql @@ -55,7 +55,7 @@ select sleep(3); select count(), sum(n), sum(m) from rmt; select count(), sum(n), sum(m) from rmt2; --- So there will be at least 2 parts (just in case no parts are removed until drop) +-- So there will be at least 2 parts (just in case no parts are removed until drop). insert into rmt(n) values (10); drop table rmt;