From f0934d134d50b10cab57a978c7790240601b589b Mon Sep 17 00:00:00 2001 From: kssenii Date: Fri, 8 Nov 2024 16:35:32 +0100 Subject: [PATCH 1/3] Allow to shrink file segment only to aligned size --- src/Interpreters/Cache/FileCache.cpp | 15 ++----- src/Interpreters/Cache/FileCache.h | 2 + src/Interpreters/Cache/FileCacheUtils.cpp | 0 src/Interpreters/Cache/FileCacheUtils.h | 17 ++++++++ src/Interpreters/Cache/FileSegment.cpp | 51 +++++++++++++++++++---- src/Interpreters/Cache/FileSegment.h | 1 + src/Interpreters/Cache/Metadata.cpp | 36 ---------------- src/Interpreters/Cache/Metadata.h | 2 - 8 files changed, 66 insertions(+), 58 deletions(-) create mode 100644 src/Interpreters/Cache/FileCacheUtils.cpp create mode 100644 src/Interpreters/Cache/FileCacheUtils.h diff --git a/src/Interpreters/Cache/FileCache.cpp b/src/Interpreters/Cache/FileCache.cpp index 7de3f7af78d..170ee6856bd 100644 --- a/src/Interpreters/Cache/FileCache.cpp +++ b/src/Interpreters/Cache/FileCache.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -53,16 +54,6 @@ namespace ErrorCodes namespace { - size_t roundDownToMultiple(size_t num, size_t multiple) - { - return (num / multiple) * multiple; - } - - size_t roundUpToMultiple(size_t num, size_t multiple) - { - return roundDownToMultiple(num + multiple - 1, multiple); - } - std::string getCommonUserID() { auto user_from_context = DB::Context::getGlobalContextInstance()->getFilesystemCacheUser(); @@ -601,8 +592,8 @@ FileCache::getOrSet( /// 2. max_file_segments_limit FileSegment::Range result_range = initial_range; - const auto aligned_offset = roundDownToMultiple(initial_range.left, boundary_alignment); - auto aligned_end_offset = std::min(roundUpToMultiple(initial_range.right + 1, boundary_alignment), file_size) - 1; + const auto aligned_offset = FileCacheUtils::roundDownToMultiple(initial_range.left, boundary_alignment); + auto aligned_end_offset = std::min(FileCacheUtils::roundUpToMultiple(initial_range.right + 1, boundary_alignment), file_size) - 1; chassert(aligned_offset <= initial_range.left); chassert(aligned_end_offset >= initial_range.right); diff --git a/src/Interpreters/Cache/FileCache.h b/src/Interpreters/Cache/FileCache.h index 810ed481300..ae052cab4e3 100644 --- a/src/Interpreters/Cache/FileCache.h +++ b/src/Interpreters/Cache/FileCache.h @@ -161,6 +161,8 @@ public: size_t getMaxFileSegmentSize() const { return max_file_segment_size; } + size_t getBoundaryAlignment() const { return boundary_alignment; } + bool tryReserve( FileSegment & file_segment, size_t size, diff --git a/src/Interpreters/Cache/FileCacheUtils.cpp b/src/Interpreters/Cache/FileCacheUtils.cpp new file mode 100644 index 00000000000..e69de29bb2d diff --git a/src/Interpreters/Cache/FileCacheUtils.h b/src/Interpreters/Cache/FileCacheUtils.h new file mode 100644 index 00000000000..b35ce867a79 --- /dev/null +++ b/src/Interpreters/Cache/FileCacheUtils.h @@ -0,0 +1,17 @@ +#pragma once +#include + +namespace FileCacheUtils +{ + +static size_t roundDownToMultiple(size_t num, size_t multiple) +{ + return (num / multiple) * multiple; +} + +static size_t roundUpToMultiple(size_t num, size_t multiple) +{ + return roundDownToMultiple(num + multiple - 1, multiple); +} + +} diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 541f0f5607a..95cc84fb03e 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -85,7 +86,6 @@ FileSegment::FileSegment( break; } /// DOWNLOADED is used either on initial cache metadata load into memory on server startup - /// or on shrinkFileSegmentToDownloadedSize() -- when file segment object is updated. case (State::DOWNLOADED): { reserved_size = downloaded_size = size_; @@ -629,6 +629,46 @@ void FileSegment::completePartAndResetDownloader() LOG_TEST(log, "Complete batch. ({})", getInfoForLogUnlocked(lk)); } +void FileSegment::shrinkFileSegmentToDownloadedSize(const LockedKey & locked_key, const FileSegmentGuard::Lock & lock) +{ + if (downloaded_size == range().size()) + { + /// Nothing to resize; + return; + } + + if (!locked_key.isLastOwnerOfFileSegment(offset())) + { + throw Exception( + ErrorCodes::LOGICAL_ERROR, + "Shrinking of file segment can be done only by the last holder: {}", + getInfoForLog()); + } + + const size_t aligned_downloaded_size = FileCacheUtils::roundUpToMultiple(downloaded_size, cache->getBoundaryAlignment()); + + chassert(aligned_downloaded_size <= range().size()); + chassert(aligned_downloaded_size >= downloaded_size); + + if (aligned_downloaded_size == range().size()) + { + /// Nothing to resize; + return; + } + + if (downloaded_size == aligned_downloaded_size) + setDownloadState(State::DOWNLOADED, lock); + else + setDownloadState(State::PARTIALLY_DOWNLOADED, lock); + + segment_range.right = segment_range.left + aligned_downloaded_size - 1; + + const size_t diff = reserved_size - downloaded_size; + chassert(reserved_size >= downloaded_size); + if (diff) + getQueueIterator()->decrementSize(diff); +} + void FileSegment::complete(bool allow_background_download) { ProfileEventTimeIncrement watch(ProfileEvents::FileSegmentCompleteMicroseconds); @@ -716,8 +756,7 @@ void FileSegment::complete(bool allow_background_download) if (!added_to_download_queue) { - locked_key->shrinkFileSegmentToDownloadedSize(offset(), segment_lock); - setDetachedState(segment_lock); /// See comment below. + shrinkFileSegmentToDownloadedSize(*locked_key, segment_lock); } } break; @@ -747,11 +786,7 @@ void FileSegment::complete(bool allow_background_download) /// but current file segment should remain PARRTIALLY_DOWNLOADED_NO_CONTINUATION and with detached state, /// because otherwise an invariant that getOrSet() returns a contiguous range of file segments will be broken /// (this will be crucial for other file segment holder, not for current one). - locked_key->shrinkFileSegmentToDownloadedSize(offset(), segment_lock); - - /// We mark current file segment with state DETACHED, even though the data is still in cache - /// (but a separate file segment) because is_last_holder is satisfied, so it does not matter. - setDetachedState(segment_lock); + shrinkFileSegmentToDownloadedSize(*locked_key, segment_lock); } } break; diff --git a/src/Interpreters/Cache/FileSegment.h b/src/Interpreters/Cache/FileSegment.h index 21d5f9dab5f..d9f56f7e1a8 100644 --- a/src/Interpreters/Cache/FileSegment.h +++ b/src/Interpreters/Cache/FileSegment.h @@ -239,6 +239,7 @@ private: void setDownloadedUnlocked(const FileSegmentGuard::Lock &); void setDownloadFailedUnlocked(const FileSegmentGuard::Lock &); + void shrinkFileSegmentToDownloadedSize(const LockedKey &, const FileSegmentGuard::Lock &); void assertNotDetached() const; void assertNotDetachedUnlocked(const FileSegmentGuard::Lock &) const; diff --git a/src/Interpreters/Cache/Metadata.cpp b/src/Interpreters/Cache/Metadata.cpp index 231545212cd..93bb91ff0fe 100644 --- a/src/Interpreters/Cache/Metadata.cpp +++ b/src/Interpreters/Cache/Metadata.cpp @@ -990,42 +990,6 @@ KeyMetadata::iterator LockedKey::removeFileSegmentImpl( return key_metadata->erase(it); } -void LockedKey::shrinkFileSegmentToDownloadedSize( - size_t offset, - const FileSegmentGuard::Lock & segment_lock) -{ - /** - * In case file was partially downloaded and it's download cannot be continued - * because of no space left in cache, we need to be able to cut file segment's size to downloaded_size. - */ - - auto file_segment_metadata = getByOffset(offset); - const auto & file_segment = file_segment_metadata->file_segment; - chassert(file_segment->assertCorrectnessUnlocked(segment_lock)); - - const size_t downloaded_size = file_segment->getDownloadedSize(); - if (downloaded_size == file_segment->range().size()) - { - throw Exception( - ErrorCodes::LOGICAL_ERROR, - "Nothing to reduce, file segment fully downloaded: {}", - file_segment->getInfoForLogUnlocked(segment_lock)); - } - - chassert(file_segment->reserved_size >= downloaded_size); - int64_t diff = file_segment->reserved_size - downloaded_size; - - file_segment_metadata->file_segment = std::make_shared( - getKey(), offset, downloaded_size, FileSegment::State::DOWNLOADED, - CreateFileSegmentSettings(file_segment->getKind()), false, - file_segment->cache, key_metadata, file_segment->queue_iterator); - - if (diff) - file_segment_metadata->getQueueIterator()->decrementSize(diff); - - chassert(file_segment_metadata->file_segment->assertCorrectnessUnlocked(segment_lock)); -} - bool LockedKey::addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock &) { auto it = key_metadata->find(offset); diff --git a/src/Interpreters/Cache/Metadata.h b/src/Interpreters/Cache/Metadata.h index 0e85ead3265..ef69658246d 100644 --- a/src/Interpreters/Cache/Metadata.h +++ b/src/Interpreters/Cache/Metadata.h @@ -325,8 +325,6 @@ struct LockedKey : private boost::noncopyable bool can_be_broken = false, bool invalidate_queue_entry = true); - void shrinkFileSegmentToDownloadedSize(size_t offset, const FileSegmentGuard::Lock &); - bool addToDownloadQueue(size_t offset, const FileSegmentGuard::Lock &); bool isLastOwnerOfFileSegment(size_t offset) const; From 847f0441e3713860f807bd9a6fb37c2baff0096a Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 12 Nov 2024 12:41:05 +0100 Subject: [PATCH 2/3] Fix assert --- src/Interpreters/Cache/FileSegment.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 95cc84fb03e..82154c8c9a4 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -645,9 +645,9 @@ void FileSegment::shrinkFileSegmentToDownloadedSize(const LockedKey & locked_key getInfoForLog()); } - const size_t aligned_downloaded_size = FileCacheUtils::roundUpToMultiple(downloaded_size, cache->getBoundaryAlignment()); + size_t aligned_downloaded_size = FileCacheUtils::roundUpToMultiple(downloaded_size, cache->getBoundaryAlignment()); + aligned_downloaded_size = std::min(aligned_downloaded_size, range().size()); - chassert(aligned_downloaded_size <= range().size()); chassert(aligned_downloaded_size >= downloaded_size); if (aligned_downloaded_size == range().size()) From 844594de18a43eee1c9dc4e87ef7e60514cc775f Mon Sep 17 00:00:00 2001 From: kssenii Date: Tue, 12 Nov 2024 16:58:30 +0100 Subject: [PATCH 3/3] Fix --- src/Interpreters/Cache/FileSegment.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/Interpreters/Cache/FileSegment.cpp b/src/Interpreters/Cache/FileSegment.cpp index 82154c8c9a4..85208a5541f 100644 --- a/src/Interpreters/Cache/FileSegment.cpp +++ b/src/Interpreters/Cache/FileSegment.cpp @@ -646,8 +646,6 @@ void FileSegment::shrinkFileSegmentToDownloadedSize(const LockedKey & locked_key } size_t aligned_downloaded_size = FileCacheUtils::roundUpToMultiple(downloaded_size, cache->getBoundaryAlignment()); - aligned_downloaded_size = std::min(aligned_downloaded_size, range().size()); - chassert(aligned_downloaded_size >= downloaded_size); if (aligned_downloaded_size == range().size()) @@ -655,13 +653,18 @@ void FileSegment::shrinkFileSegmentToDownloadedSize(const LockedKey & locked_key /// Nothing to resize; return; } - - if (downloaded_size == aligned_downloaded_size) + else if (aligned_downloaded_size > range().size() + || downloaded_size == aligned_downloaded_size) + { + /// Does not make sense to resize upwords. setDownloadState(State::DOWNLOADED, lock); + segment_range.right = segment_range.left + downloaded_size - 1; + } else + { setDownloadState(State::PARTIALLY_DOWNLOADED, lock); - - segment_range.right = segment_range.left + aligned_downloaded_size - 1; + segment_range.right = segment_range.left + aligned_downloaded_size - 1; + } const size_t diff = reserved_size - downloaded_size; chassert(reserved_size >= downloaded_size);