#include "FileSegment.h"
#include
#include
#include
#include
#include
#include
namespace DB
{
namespace ErrorCodes
{
extern const int REMOTE_FS_OBJECT_CACHE_ERROR;
extern const int LOGICAL_ERROR;
}
FileSegment::FileSegment(
size_t offset_,
size_t size_,
const Key & key_,
IFileCache * cache_,
State download_state_)
: segment_range(offset_, offset_ + size_ - 1)
, download_state(download_state_)
, file_key(key_)
, cache(cache_)
#ifndef NDEBUG
, log(&Poco::Logger::get(fmt::format("FileSegment({}) : {}", getHexUIntLowercase(key_), range().toString())))
#else
, log(&Poco::Logger::get("FileSegment"))
#endif
{
/// On creation, file segment state can be EMPTY, DOWNLOADED, DOWNLOADING.
switch (download_state)
{
/// EMPTY is used when file segment is not in cache and
/// someone will _potentially_ want to download it (after calling getOrSetDownloader()).
case (State::EMPTY):
{
break;
}
/// DOWNLOADED is used either on initial cache metadata load into memory on server startup
/// or on reduceSizeToDownloaded() -- when file segment object is updated.
case (State::DOWNLOADED):
{
reserved_size = downloaded_size = size_;
break;
}
/// DOWNLOADING is used only for write-through caching (e.g. getOrSetDownloader() is not
/// needed, downloader is set on file segment creation).
case (State::DOWNLOADING):
{
/// On write-through cache we do not check downloader id.
is_write_through_cache = true;
downloader_id = getCallerId();
break;
}
default:
{
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Can create cell with either EMPTY, DOWNLOADED, DOWNLOADING state");
}
}
}
FileSegment::State FileSegment::state() const
{
std::lock_guard segment_lock(mutex);
return download_state;
}
size_t FileSegment::getDownloadOffset() const
{
std::lock_guard segment_lock(mutex);
return range().left + getDownloadedSize(segment_lock);
}
size_t FileSegment::getDownloadedSize() const
{
std::lock_guard segment_lock(mutex);
return getDownloadedSize(segment_lock);
}
size_t FileSegment::getAvailableSize() const
{
std::lock_guard segment_lock(mutex);
return range().size() - downloaded_size;
}
size_t FileSegment::getDownloadedSize(std::lock_guard & /* segment_lock */) const
{
if (download_state == State::DOWNLOADED)
return downloaded_size;
std::lock_guard download_lock(download_mutex);
return downloaded_size;
}
String FileSegment::getCallerId()
{
return getCallerIdImpl();
}
String FileSegment::getCallerIdImpl()
{
if (!CurrentThread::isInitialized()
|| !CurrentThread::get().getQueryContext()
|| CurrentThread::getQueryId().size == 0)
return "None:" + toString(getThreadId());
return CurrentThread::getQueryId().toString() + ":" + toString(getThreadId());
}
String FileSegment::getOrSetDownloader()
{
std::lock_guard segment_lock(mutex);
if (detached)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot set downloader for a detached file segment");
if (downloader_id.empty())
{
assert(download_state != State::DOWNLOADING);
if (download_state != State::EMPTY
&& download_state != State::PARTIALLY_DOWNLOADED)
return "None";
downloader_id = getCallerId();
download_state = State::DOWNLOADING;
}
else if (downloader_id == getCallerId())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Attempt to set the same downloader for segment {} for the second time", range().toString());
return downloader_id;
}
void FileSegment::resetDownloader()
{
std::lock_guard segment_lock(mutex);
if (downloader_id.empty())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "There is no downloader");
if (getCallerId() != downloader_id)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Downloader can be reset only by downloader");
resetDownloaderImpl(segment_lock);
}
void FileSegment::resetDownloaderImpl(std::lock_guard & segment_lock)
{
if (downloaded_size == range().size())
setDownloaded(segment_lock);
else
download_state = State::PARTIALLY_DOWNLOADED;
downloader_id.clear();
}
String FileSegment::getDownloader() const
{
std::lock_guard segment_lock(mutex);
return downloader_id;
}
bool FileSegment::isDownloader() const
{
std::lock_guard segment_lock(mutex);
return getCallerId() == downloader_id;
}
bool FileSegment::isDownloaderImpl(std::lock_guard & /* segment_lock */) const
{
return getCallerId() == downloader_id;
}
FileSegment::RemoteFileReaderPtr FileSegment::getRemoteFileReader()
{
if (!isDownloader())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Only downloader can use remote filesystem file reader");
return remote_file_reader;
}
void FileSegment::setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_)
{
if (!isDownloader())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Only downloader can use remote filesystem file reader");
if (remote_file_reader)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader already exists");
remote_file_reader = remote_file_reader_;
}
void FileSegment::resetRemoteFileReader()
{
if (!isDownloader())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Only downloader can use remote filesystem file reader");
if (!remote_file_reader)
throw Exception(ErrorCodes::LOGICAL_ERROR, "Remote file reader does not exist");
remote_file_reader.reset();
}
void FileSegment::write(const char * from, size_t size, size_t offset_)
{
if (!size)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Writing zero size is not allowed");
if (availableSize() < size)
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Not enough space is reserved. Available: {}, expected: {}", availableSize(), size);
if (!isDownloader() && !is_write_through_cache)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Only downloader can do the downloading. (CallerId: {}, DownloaderId: {})",
getCallerId(), downloader_id);
if (downloaded_size == range().size())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Attempt to write {} bytes to offset: {}, but current file segment is already fully downloaded",
size, offset_);
auto download_offset = range().left + downloaded_size;
if (offset_ != download_offset)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Attempt to write {} bytes to offset: {}, but current download offset is {}",
size, offset_, download_offset);
{
std::lock_guard segment_lock(mutex);
assertNotDetached(segment_lock);
}
if (!cache_writer)
{
if (downloaded_size > 0)
throw Exception(ErrorCodes::LOGICAL_ERROR,
"Cache writer was finalized (downloaded size: {}, state: {})",
downloaded_size, stateToString(download_state));
auto download_path = cache->getPathInLocalCache(key(), offset());
cache_writer = std::make_unique(download_path);
}
try
{
cache_writer->write(from, size);
std::lock_guard download_lock(download_mutex);
cache_writer->next();
downloaded_size += size;
}
catch (Exception & e)
{
std::lock_guard segment_lock(mutex);
wrapWithCacheInfo(e, "while writing into cache", segment_lock);
setDownloadFailed(segment_lock);
cv.notify_all();
throw;
}
assert(getDownloadOffset() == offset_ + size);
}
FileSegment::State FileSegment::wait()
{
std::unique_lock segment_lock(mutex);
if (downloader_id.empty())
return download_state;
if (download_state == State::EMPTY)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot wait on a file segment with empty state");
if (download_state == State::DOWNLOADING)
{
LOG_TEST(log, "{} waiting on: {}, current downloader: {}", getCallerId(), range().toString(), downloader_id);
assert(!downloader_id.empty());
assert(downloader_id != getCallerId());
cv.wait_for(segment_lock, std::chrono::seconds(60));
}
return download_state;
}
bool FileSegment::reserve(size_t size)
{
if (!size)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Zero space reservation is not allowed");
{
std::lock_guard segment_lock(mutex);
assertNotDetached(segment_lock);
auto caller_id = getCallerId();
bool is_downloader = caller_id == downloader_id;
if (!is_downloader && !is_write_through_cache)
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Space can be reserved only by downloader (current: {}, expected: {})", caller_id, downloader_id);
if (downloaded_size + size > range().size())
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Attempt to reserve space too much space ({}) for file segment with range: {} (downloaded size: {})",
size, range().toString(), downloaded_size);
assert(reserved_size >= downloaded_size);
}
/**
* It is possible to have downloaded_size < reserved_size when reserve is called
* in case previous downloader did not fully download current file_segment
* and the caller is going to continue;
*/
size_t free_space = reserved_size - downloaded_size;
size_t size_to_reserve = size - free_space;
std::lock_guard cache_lock(cache->mutex);
bool reserved = cache->tryReserve(key(), offset(), size_to_reserve, cache_lock);
if (reserved)
reserved_size += size;
return reserved;
}
void FileSegment::setDownloaded(std::lock_guard & /* segment_lock */)
{
if (is_downloaded)
return;
download_state = State::DOWNLOADED;
is_downloaded = true;
downloader_id.clear();
if (cache_writer)
{
cache_writer->finalize();
cache_writer.reset();
remote_file_reader.reset();
}
}
void FileSegment::setDownloadFailed(std::lock_guard & /* segment_lock */)
{
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
downloader_id.clear();
if (cache_writer)
{
cache_writer->finalize();
cache_writer.reset();
remote_file_reader.reset();
}
}
void FileSegment::completeBatchAndResetDownloader()
{
std::lock_guard segment_lock(mutex);
assertNotDetached(segment_lock);
if (!isDownloaderImpl(segment_lock))
{
cv.notify_all();
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"File segment can be completed only by downloader ({} != {})",
downloader_id, getCallerId());
}
resetDownloaderImpl(segment_lock);
LOG_TEST(log, "Complete batch. Current downloaded size: {}", downloaded_size);
cv.notify_all();
}
void FileSegment::complete(State state)
{
std::lock_guard cache_lock(cache->mutex);
std::lock_guard segment_lock(mutex);
assertNotDetached(segment_lock);
bool is_downloader = isDownloaderImpl(segment_lock);
if (!is_downloader && !is_write_through_cache)
{
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"File segment can be completed only by downloader or downloader's FileSegmentsHodler");
}
if (state != State::DOWNLOADED
&& state != State::PARTIALLY_DOWNLOADED
&& state != State::PARTIALLY_DOWNLOADED_NO_CONTINUATION)
{
cv.notify_all();
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cannot complete file segment with state: {}", stateToString(state));
}
if (state == State::DOWNLOADED)
setDownloaded(segment_lock);
download_state = state;
try
{
completeImpl(cache_lock, segment_lock);
}
catch (...)
{
if (!downloader_id.empty() && is_downloader)
downloader_id.clear();
cv.notify_all();
throw;
}
cv.notify_all();
}
void FileSegment::complete(std::lock_guard & cache_lock)
{
std::lock_guard segment_lock(mutex);
completeUnlocked(cache_lock, segment_lock);
}
void FileSegment::completeUnlocked(std::lock_guard & cache_lock, std::lock_guard & segment_lock)
{
if (download_state == State::SKIP_CACHE || detached)
return;
if (isDownloaderImpl(segment_lock)
&& download_state != State::DOWNLOADED
&& getDownloadedSize(segment_lock) == range().size())
{
setDownloaded(segment_lock);
}
assertNotDetached(segment_lock);
if (download_state == State::DOWNLOADING || download_state == State::EMPTY)
{
/// Segment state can be changed from DOWNLOADING or EMPTY only if the caller is the
/// downloader or the only owner of the segment.
bool can_update_segment_state = isDownloaderImpl(segment_lock)
|| cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
if (can_update_segment_state)
download_state = State::PARTIALLY_DOWNLOADED;
}
try
{
completeImpl(cache_lock, segment_lock);
}
catch (...)
{
if (!downloader_id.empty() && isDownloaderImpl(segment_lock))
downloader_id.clear();
cv.notify_all();
throw;
}
cv.notify_all();
}
void FileSegment::completeImpl(std::lock_guard & cache_lock, std::lock_guard & segment_lock)
{
bool is_last_holder = cache->isLastFileSegmentHolder(key(), offset(), cache_lock, segment_lock);
if (is_last_holder
&& (download_state == State::PARTIALLY_DOWNLOADED || download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION))
{
size_t current_downloaded_size = getDownloadedSize(segment_lock);
if (current_downloaded_size == 0)
{
download_state = State::SKIP_CACHE;
LOG_TEST(log, "Remove cell {} (nothing downloaded)", range().toString());
cache->remove(key(), offset(), cache_lock, segment_lock);
}
else
{
/**
* Only last holder of current file segment can resize the cell,
* because there is an invariant that file segments returned to users
* in FileSegmentsHolder represent a contiguous range, so we can resize
* it only when nobody needs it.
*/
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
LOG_TEST(log, "Resize cell {} to downloaded: {}", range().toString(), current_downloaded_size);
cache->reduceSizeToDownloaded(key(), offset(), cache_lock, segment_lock);
}
detached = true;
if (cache_writer)
{
cache_writer->finalize();
cache_writer.reset();
remote_file_reader.reset();
}
}
if (!downloader_id.empty() && (isDownloaderImpl(segment_lock) || is_last_holder))
{
LOG_TEST(log, "Clearing downloader id: {}, current state: {}", downloader_id, stateToString(download_state));
downloader_id.clear();
}
LOG_TEST(log, "Completed file segment: {}", getInfoForLogImpl(segment_lock));
assertCorrectnessImpl(segment_lock);
}
String FileSegment::getInfoForLog() const
{
std::lock_guard segment_lock(mutex);
return getInfoForLogImpl(segment_lock);
}
String FileSegment::getInfoForLogImpl(std::lock_guard & segment_lock) const
{
WriteBufferFromOwnString info;
info << "File segment: " << range().toString() << ", ";
info << "state: " << download_state << ", ";
info << "downloaded size: " << getDownloadedSize(segment_lock) << ", ";
info << "downloader id: " << downloader_id << ", ";
info << "caller id: " << getCallerId();
return info.str();
}
void FileSegment::wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard & segment_lock) const
{
e.addMessage(fmt::format("{}, current cache state: {}", message, getInfoForLogImpl(segment_lock)));
}
String FileSegment::stateToString(FileSegment::State state)
{
switch (state)
{
case FileSegment::State::DOWNLOADED:
return "DOWNLOADED";
case FileSegment::State::EMPTY:
return "EMPTY";
case FileSegment::State::DOWNLOADING:
return "DOWNLOADING";
case FileSegment::State::PARTIALLY_DOWNLOADED:
return "PARTIALLY DOWNLOADED";
case FileSegment::State::PARTIALLY_DOWNLOADED_NO_CONTINUATION:
return "PARTIALLY DOWNLOADED NO CONTINUATION";
case FileSegment::State::SKIP_CACHE:
return "SKIP_CACHE";
}
__builtin_unreachable();
}
void FileSegment::assertCorrectness() const
{
std::lock_guard segment_lock(mutex);
assertCorrectnessImpl(segment_lock);
}
void FileSegment::assertCorrectnessImpl(std::lock_guard & /* segment_lock */) const
{
assert(downloader_id.empty() == (download_state != FileSegment::State::DOWNLOADING));
assert(!downloader_id.empty() == (download_state == FileSegment::State::DOWNLOADING));
assert(download_state != FileSegment::State::DOWNLOADED || std::filesystem::file_size(cache->getPathInLocalCache(key(), offset())) > 0);
}
void FileSegment::assertNotDetached(std::lock_guard & /* segment_lock */) const
{
if (detached)
{
throw Exception(
ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR,
"Cache file segment is in detached state, operation not allowed. "
"It can happen when cache was concurrently dropped with SYSTEM DROP FILESYSTEM CACHE FORCE. "
"Please, retry");
}
}
void FileSegment::assertDetachedStatus(std::lock_guard & segment_lock) const
{
if (download_state != State::EMPTY && !hasFinalizedState())
{
throw Exception(
ErrorCodes::LOGICAL_ERROR, "Cache get into inconsistent state {}", getInfoForLogImpl(segment_lock));
}
}
FileSegmentPtr FileSegment::getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard & /* cache_lock */)
{
auto snapshot = std::make_shared(
file_segment->offset(),
file_segment->range().size(),
file_segment->key(),
nullptr,
State::EMPTY);
snapshot->hits_count = file_segment->getHitsCount();
snapshot->ref_count = file_segment.use_count();
snapshot->downloaded_size = file_segment->getDownloadedSize();
snapshot->download_state = file_segment->state();
return snapshot;
}
bool FileSegment::hasFinalizedState() const
{
return download_state == State::DOWNLOADED
|| download_state == State::PARTIALLY_DOWNLOADED_NO_CONTINUATION
|| download_state == State::SKIP_CACHE;
}
void FileSegment::detach(std::lock_guard & /* cache_lock */, std::lock_guard & segment_lock)
{
if (detached)
return;
detached = true;
download_state = State::PARTIALLY_DOWNLOADED_NO_CONTINUATION;
downloader_id.clear();
LOG_TEST(log, "Detached file segment: {}", getInfoForLogImpl(segment_lock));
}
FileSegmentsHolder::~FileSegmentsHolder()
{
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
/// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
/// remain only uncompleted file segments.
IFileCache * cache = nullptr;
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
{
auto current_file_segment_it = file_segment_it;
auto & file_segment = *current_file_segment_it;
if (!cache)
cache = file_segment->cache;
{
bool detached = false;
{
std::lock_guard segment_lock(file_segment->mutex);
detached = file_segment->isDetached(segment_lock);
if (detached)
file_segment->assertDetachedStatus(segment_lock);
}
if (detached)
{
/// This file segment is not owned by cache, so it will be destructed
/// at this point, therefore no completion required.
file_segment_it = file_segments.erase(current_file_segment_it);
continue;
}
}
try
{
/// File segment pointer must be reset right after calling complete() and
/// under the same mutex, because complete() checks for segment pointers.
std::lock_guard cache_lock(cache->mutex);
file_segment->complete(cache_lock);
file_segment_it = file_segments.erase(current_file_segment_it);
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
assert(false);
}
}
}
String FileSegmentsHolder::toString()
{
String ranges;
for (const auto & file_segment : file_segments)
{
if (!ranges.empty())
ranges += ", ";
ranges += file_segment->range().toString();
}
return ranges;
}
FileSegmentRangeWriter::FileSegmentRangeWriter(
IFileCache * cache_,
const FileSegment::Key & key_,
size_t max_file_segment_size_)
: cache(cache_)
, key(key_)
, max_file_segment_size(max_file_segment_size_)
{
}
void FileSegmentRangeWriter::allocateFileSegment(size_t offset)
{
std::lock_guard cache_lock(cache->mutex);
auto file_segment = cache->setDownloading(key, offset, max_file_segment_size, cache_lock);
current_file_segment = file_segment;
file_segments_holder.add(std::move(file_segment));
}
bool FileSegmentRangeWriter::write(char * data, size_t size, size_t offset)
{
/**
* We want to write eventually some size, which is not known until the very end.
* Therefore we allocate file segments lazily. Each file segment is assigned capacity
* of max_file_segment_size, but reserved_size remains 0, until call to tryReserve().
* Once current file segment is full (reached max_file_segment_size), we allocate a
* new file segment. All allocated file segments resize in file segments holder.
* If at the end of all writes, the last file segment is not full, then it is resized.
*/
std::lock_guard lock(mutex);
if (finalized)
return false;
if (current_file_segment.expired())
allocateFileSegment(current_file_segment_start_offset);
auto file_segment = current_file_segment.lock();
if (file_segment->getAvailableSize() == 0)
{
file_segment->complete(FileSegment::State::DOWNLOADED);
allocateFileSegment(current_file_segment_start_offset);
file_segment = current_file_segment.lock();
}
bool reserved = file_segment->reserve(size);
if (!reserved)
return false;
file_segment->write(data, size, offset);
current_file_segment_start_offset += size;
return true;
}
void FileSegmentRangeWriter::finalize()
{
std::lock_guard lock(mutex);
if (finalized)
return;
if (file_segments_holder.file_segments.empty() || current_file_segment.expired())
return;
auto file_segment = current_file_segment.lock();
std::lock_guard cache_lock(cache->mutex);
file_segment->complete(cache_lock);
finalized = true;
}
FileSegmentRangeWriter::~FileSegmentRangeWriter()
{
try
{
if (!finalized)
finalize();
}
catch (...)
{
tryLogCurrentException(__PRETTY_FUNCTION__);
}
}
void FileSegmentRangeWriter::clearDownloaded()
{
std::lock_guard lock(mutex);
current_file_segment.reset();
auto & file_segments = file_segments_holder.file_segments;
if (file_segments.empty())
return;
std::lock_guard cache_lock(cache->mutex);
for (auto file_segment_it = file_segments.begin(); file_segment_it != file_segments.end();)
{
auto file_segment = *file_segment_it;
std::lock_guard segment_lock(file_segment->mutex);
cache->remove(key, file_segment->offset(), cache_lock, segment_lock);
}
file_segments.clear();
}
}