From b1ba2cf2bca67255376bc12f05f9a6760c2e8f34 Mon Sep 17 00:00:00 2001 From: Pavel Kovalenko Date: Tue, 30 Mar 2021 13:20:03 +0300 Subject: [PATCH] Disk S3 possibility to restore parts to 'detached' directory. --- src/Disks/S3/DiskS3.cpp | 46 ++++++++++++++++++++++++++++++++++++++--- src/Disks/S3/DiskS3.h | 5 ++++- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/src/Disks/S3/DiskS3.cpp b/src/Disks/S3/DiskS3.cpp index 1de4ab843ac..95579851f54 100644 --- a/src/Disks/S3/DiskS3.cpp +++ b/src/Disks/S3/DiskS3.cpp @@ -1173,6 +1173,7 @@ struct DiskS3::RestoreInformation UInt64 revision = LATEST_REVISION; String source_bucket; String source_path; + bool detached; }; void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_information) @@ -1201,6 +1202,12 @@ void DiskS3::readRestoreInformation(DiskS3::RestoreInformation & restore_informa readText(restore_information.source_path, buffer); assertChar('\n', buffer); + if (!buffer.hasPendingData()) + return; + + readBoolText(restore_information.detached, buffer); + assertChar('\n', buffer); + if (buffer.hasPendingData()) throw Exception("Extra information at the end of restore file", ErrorCodes::UNKNOWN_FORMAT); } @@ -1253,7 +1260,7 @@ void DiskS3::restore() removeSharedRecursive(root + '/', !cleanup_s3); restoreFiles(information.source_bucket, information.source_path, information.revision); - restoreFileOperations(information.source_bucket, information.source_path, information.revision); + restoreFileOperations(information.source_bucket, information.source_path, information.revision, information.detached); Poco::File restore_file(metadata_path + RESTORE_FILE_NAME); restore_file.remove(); @@ -1348,14 +1355,16 @@ void DiskS3::processRestoreFiles(const String & source_bucket, const String & so } } -void DiskS3::restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision) +void DiskS3::restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision, bool detached) { LOG_INFO(&Poco::Logger::get("DiskS3"), "Starting restore file operations for disk {}", name); /// Enable recording file operations if we restore to different bucket / path. send_metadata = bucket != source_bucket || s3_root_path != source_path; - listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision](auto list_result) + std::set renames; + + listObjects(source_bucket, source_path + "operations/", [this, &source_bucket, &target_revision, &detached, &renames](auto list_result) { const String rename = "rename"; const String hardlink = "hardlink"; @@ -1389,6 +1398,16 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String & { moveFile(from_path, to_path); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Revision {}. Restored rename {} -> {}", revision, from_path, to_path); + + if (detached) + { + /// We don't need path, which is already renamed. + auto it = renames.find(from_path); + if (it != renames.end()) + renames.erase(it); + + renames.insert(to_path); + } } } else if (operation == hardlink) @@ -1400,6 +1419,9 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String & createDirectories(directoryPath(dst_path)); createHardLink(src_path, dst_path); LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Revision {}. Restored hardlink {} -> {}", revision, src_path, dst_path); + + if (detached) + renames.insert(directoryPath(dst_path)); } } } @@ -1407,6 +1429,18 @@ void DiskS3::restoreFileOperations(const String & source_bucket, const String & return true; }); + if (detached) + { + send_metadata = false; + + for (const auto & path : renames) + { + auto detached_path = pathToDetached(path); + moveFile(path, detached_path); + LOG_DEBUG(&Poco::Logger::get("DiskS3"), "Move directory to detached {} -> {}", path, detached_path); + } + } + send_metadata = true; LOG_INFO(&Poco::Logger::get("DiskS3"), "File operations restored for disk {}", name); @@ -1451,4 +1485,10 @@ void DiskS3::onFreeze(const String & path) revision_file_buf.finalize(); } +String DiskS3::pathToDetached(const String & source_path) +{ + Poco::Path path (source_path); + return Poco::Path(path).parent().append(Poco::Path("detached")).append(path.directory(path.depth() - 1)).toString(); +} + } diff --git a/src/Disks/S3/DiskS3.h b/src/Disks/S3/DiskS3.h index 87aab71fc44..4ed2a13112a 100644 --- a/src/Disks/S3/DiskS3.h +++ b/src/Disks/S3/DiskS3.h @@ -167,13 +167,16 @@ private: void readRestoreInformation(RestoreInformation & restore_information); void restoreFiles(const String & source_bucket, const String & source_path, UInt64 target_revision); void processRestoreFiles(const String & source_bucket, const String & source_path, std::vector keys); - void restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision); + void restoreFileOperations(const String & source_bucket, const String & source_path, UInt64 target_revision, bool detached); /// Remove 'path' prefix from 'key' to get relative key. /// It's needed to store keys to metadata files in RELATIVE_PATHS version. static String shrinkKey(const String & path, const String & key); std::tuple extractRevisionAndOperationFromKey(const String & key); + /// Forms detached path '../../detached/part_name/' from '../../part_name/' + static String pathToDetached(const String & source_path); + const String name; std::shared_ptr client; std::shared_ptr proxy_configuration;