From d24b9874bc4dba88b260ef114950c576e64e9932 Mon Sep 17 00:00:00 2001 From: Alexander Tokmakov Date: Fri, 12 Aug 2022 12:47:25 +0200 Subject: [PATCH] fix Cannot quickly remove directory --- .../MergeTree/DataPartStorageOnDisk.cpp | 17 +++++++++++++---- src/Storages/MergeTree/DataPartStorageOnDisk.h | 4 ++++ src/Storages/MergeTree/IDataPartStorage.h | 3 +++ src/Storages/MergeTree/IMergeTreeDataPart.cpp | 2 +- src/Storages/MergeTree/IMergeTreeDataPart.h | 3 ++- 5 files changed, 23 insertions(+), 6 deletions(-) diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp index f3b228a0748..db59ebc7978 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.cpp +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.cpp @@ -208,6 +208,8 @@ void DataPartStorageOnDisk::remove( const NameSet & names_not_to_remove, const MergeTreeDataPartChecksums & checksums, std::list projections, + bool is_temp, + MergeTreeDataPartState state, Poco::Logger * log) const { /// NOTE We rename part to delete_tmp_ instead of delete_tmp_ to avoid race condition @@ -276,7 +278,7 @@ void DataPartStorageOnDisk::remove( clearDirectory( fs::path(to) / proj_dir_name, - can_remove_shared_data, names_not_to_remove, projection.checksums, {}, log, true); + can_remove_shared_data, names_not_to_remove, projection.checksums, {}, is_temp, state, log, true); } /// It is possible that we are removing the part which have a written but not loaded projection. @@ -303,7 +305,7 @@ void DataPartStorageOnDisk::remove( clearDirectory( fs::path(to) / name, - can_remove_shared_data, names_not_to_remove, tmp_checksums, {}, log, true); + can_remove_shared_data, names_not_to_remove, tmp_checksums, {}, is_temp, state, log, true); } catch (...) { @@ -313,7 +315,7 @@ void DataPartStorageOnDisk::remove( } } - clearDirectory(to, can_remove_shared_data, names_not_to_remove, checksums, projection_directories, log, false); + clearDirectory(to, can_remove_shared_data, names_not_to_remove, checksums, projection_directories, is_temp, state, log, false); } void DataPartStorageOnDisk::clearDirectory( @@ -322,12 +324,19 @@ void DataPartStorageOnDisk::clearDirectory( const NameSet & names_not_to_remove, const MergeTreeDataPartChecksums & checksums, const std::unordered_set & skip_directories, + bool is_temp, + MergeTreeDataPartState state, Poco::Logger * log, bool is_projection) const { auto disk = volume->getDisk(); - if (checksums.empty()) + /// It does not make sense to try fast path for incomplete temporary parts, because some files are probably absent. + /// Sometimes we add something to checksums.files before actually writing checksums and columns on disk. + /// Also sometimes we write checksums.txt and columns.txt in arbitrary order, so this check becomes complex... + bool looks_like_temporary_part = is_temp || state == MergeTreeDataPartState::Temporary; + bool incomplete_temporary_part = looks_like_temporary_part && (!disk->exists(fs::path(dir) / "checksums.txt") || !disk->exists(fs::path(dir) / "columns.txt")); + if (checksums.empty() || incomplete_temporary_part) { if (is_projection) { diff --git a/src/Storages/MergeTree/DataPartStorageOnDisk.h b/src/Storages/MergeTree/DataPartStorageOnDisk.h index 2426b5eee80..5b5cff8e636 100644 --- a/src/Storages/MergeTree/DataPartStorageOnDisk.h +++ b/src/Storages/MergeTree/DataPartStorageOnDisk.h @@ -49,6 +49,8 @@ public: const NameSet & names_not_to_remove, const MergeTreeDataPartChecksums & checksums, std::list projections, + bool is_temp, + MergeTreeDataPartState state, Poco::Logger * log) const override; std::string getRelativePathForPrefix(Poco::Logger * log, const String & prefix, bool detached) const override; @@ -120,6 +122,8 @@ private: const NameSet & names_not_to_remove, const MergeTreeDataPartChecksums & checksums, const std::unordered_set & skip_directories, + bool is_temp, + MergeTreeDataPartState state, Poco::Logger * log, bool is_projection) const; }; diff --git a/src/Storages/MergeTree/IDataPartStorage.h b/src/Storages/MergeTree/IDataPartStorage.h index f0173baecb7..54b3927d077 100644 --- a/src/Storages/MergeTree/IDataPartStorage.h +++ b/src/Storages/MergeTree/IDataPartStorage.h @@ -3,6 +3,7 @@ #include #include #include +#include #include namespace DB @@ -116,6 +117,8 @@ public: const NameSet & names_not_to_remove, const MergeTreeDataPartChecksums & checksums, std::list projections, + bool is_temp, + MergeTreeDataPartState state, Poco::Logger * log) const = 0; /// Get a name like 'prefix_partdir_tryN' which does not exist in a root dir. diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.cpp b/src/Storages/MergeTree/IMergeTreeDataPart.cpp index 188a159c4b4..d9fba3ef6bd 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.cpp +++ b/src/Storages/MergeTree/IMergeTreeDataPart.cpp @@ -1455,7 +1455,7 @@ void IMergeTreeDataPart::remove() const projection_checksums.emplace_back(IDataPartStorage::ProjectionChecksums{.name = p_name, .checksums = projection_part->checksums}); } - data_part_storage->remove(can_remove, files_not_to_remove, checksums, projection_checksums, storage.log); + data_part_storage->remove(can_remove, files_not_to_remove, checksums, projection_checksums, is_temp, getState(), storage.log); } String IMergeTreeDataPart::getRelativePathForPrefix(const String & prefix, bool detached) const diff --git a/src/Storages/MergeTree/IMergeTreeDataPart.h b/src/Storages/MergeTree/IMergeTreeDataPart.h index 2a5e95e23d1..86ba34a0744 100644 --- a/src/Storages/MergeTree/IMergeTreeDataPart.h +++ b/src/Storages/MergeTree/IMergeTreeDataPart.h @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -42,6 +41,8 @@ class IMergeTreeDataPartWriter; class MarkCache; class UncompressedCache; class MergeTreeTransaction; +class IDataPartStorage; +using DataPartStoragePtr = std::shared_ptr; /// Description of the data part. class IMergeTreeDataPart : public std::enable_shared_from_this