mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-09-23 02:00:49 +00:00
Fix race
This commit is contained in:
parent
5a8606e10b
commit
2dcfe66236
@ -70,16 +70,27 @@ String FileSegment::getOrSetDownloader()
|
|||||||
download_state);
|
download_state);
|
||||||
|
|
||||||
downloader_id = getCallerId();
|
downloader_id = getCallerId();
|
||||||
LOG_TEST(log, "Set downloader: {}, prev state: {}", downloader_id, stateToString(download_state));
|
|
||||||
download_state = State::DOWNLOADING;
|
download_state = State::DOWNLOADING;
|
||||||
}
|
}
|
||||||
else if (downloader_id == getCallerId())
|
else if (downloader_id == getCallerId())
|
||||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to set the same downloader for segment {} for the second time", range().toString());
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Attempt to set the same downloader for segment {} for the second time", range().toString());
|
||||||
|
|
||||||
LOG_TEST(log, "Returning with downloader: {} and state: {}", downloader_id, stateToString(download_state));
|
|
||||||
return downloader_id;
|
return downloader_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void FileSegment::resetDownloader()
|
||||||
|
{
|
||||||
|
std::lock_guard segment_lock(mutex);
|
||||||
|
|
||||||
|
if (downloader_id.empty())
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "There is no downloader");
|
||||||
|
|
||||||
|
if (getCallerId() != downloader_id)
|
||||||
|
throw Exception(ErrorCodes::LOGICAL_ERROR, "Downloader can be reset only by downloader");
|
||||||
|
|
||||||
|
downloader_id.clear();
|
||||||
|
}
|
||||||
|
|
||||||
String FileSegment::getDownloader() const
|
String FileSegment::getDownloader() const
|
||||||
{
|
{
|
||||||
std::lock_guard segment_lock(mutex);
|
std::lock_guard segment_lock(mutex);
|
||||||
@ -158,6 +169,9 @@ FileSegment::State FileSegment::wait()
|
|||||||
{
|
{
|
||||||
std::unique_lock segment_lock(mutex);
|
std::unique_lock segment_lock(mutex);
|
||||||
|
|
||||||
|
if (downloader_id.empty())
|
||||||
|
return download_state;
|
||||||
|
|
||||||
if (download_state == State::EMPTY)
|
if (download_state == State::EMPTY)
|
||||||
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot wait on a file segment with empty state");
|
throw Exception(ErrorCodes::REMOTE_FS_OBJECT_CACHE_ERROR, "Cannot wait on a file segment with empty state");
|
||||||
|
|
||||||
|
@ -105,6 +105,8 @@ public:
|
|||||||
|
|
||||||
String getDownloader() const;
|
String getDownloader() const;
|
||||||
|
|
||||||
|
void resetDownloader();
|
||||||
|
|
||||||
bool isDownloader() const;
|
bool isDownloader() const;
|
||||||
|
|
||||||
bool isDownloaded() const { return is_downloaded.load(); }
|
bool isDownloaded() const { return is_downloaded.load(); }
|
||||||
|
@ -4,6 +4,7 @@
|
|||||||
#include <Common/hex.h>
|
#include <Common/hex.h>
|
||||||
#include <IO/createReadBufferFromFileBase.h>
|
#include <IO/createReadBufferFromFileBase.h>
|
||||||
#include <IO/ReadBufferFromFile.h>
|
#include <IO/ReadBufferFromFile.h>
|
||||||
|
#include <base/scope_guard.h>
|
||||||
|
|
||||||
|
|
||||||
namespace ProfileEvents
|
namespace ProfileEvents
|
||||||
@ -65,7 +66,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getCacheReadBuffer(size_t of
|
|||||||
return std::make_shared<ReadBufferFromFile>(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size);
|
return std::make_shared<ReadBufferFromFile>(cache->getPathInLocalCache(cache_key, offset), settings.local_fs_buffer_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr file_segment, ReadType read_type_)
|
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_)
|
||||||
{
|
{
|
||||||
switch (read_type_)
|
switch (read_type_)
|
||||||
{
|
{
|
||||||
@ -114,7 +115,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getRemoteFSReadBuffer(FileSe
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(FileSegmentPtr file_segment)
|
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(FileSegmentPtr & file_segment)
|
||||||
{
|
{
|
||||||
auto range = file_segment->range();
|
auto range = file_segment->range();
|
||||||
|
|
||||||
@ -184,28 +185,29 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
|
|||||||
}
|
}
|
||||||
case FileSegment::State::PARTIALLY_DOWNLOADED:
|
case FileSegment::State::PARTIALLY_DOWNLOADED:
|
||||||
{
|
{
|
||||||
size_t download_offset = file_segment->getDownloadOffset();
|
|
||||||
bool can_start_from_cache = download_offset > file_offset_of_buffer_end;
|
|
||||||
|
|
||||||
if (can_start_from_cache)
|
|
||||||
{
|
|
||||||
/// segment{k}
|
|
||||||
/// cache: [______|___________
|
|
||||||
/// ^
|
|
||||||
/// download_offset
|
|
||||||
/// requested_range: [__________]
|
|
||||||
/// ^
|
|
||||||
/// file_offset_of_buffer_end
|
|
||||||
|
|
||||||
read_type = ReadType::CACHED;
|
|
||||||
return getCacheReadBuffer(range.left);
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_TEST(log, "Current download offset: {}, file offset of buffer end: {}", download_offset, file_offset_of_buffer_end);
|
|
||||||
|
|
||||||
auto downloader_id = file_segment->getOrSetDownloader();
|
auto downloader_id = file_segment->getOrSetDownloader();
|
||||||
if (downloader_id == file_segment->getCallerId())
|
if (downloader_id == file_segment->getCallerId())
|
||||||
{
|
{
|
||||||
|
size_t download_offset = file_segment->getDownloadOffset();
|
||||||
|
bool can_start_from_cache = download_offset > file_offset_of_buffer_end;
|
||||||
|
|
||||||
|
LOG_TEST(log, "Current download offset: {}, file offset of buffer end: {}", download_offset, file_offset_of_buffer_end);
|
||||||
|
|
||||||
|
if (can_start_from_cache)
|
||||||
|
{
|
||||||
|
/// segment{k}
|
||||||
|
/// cache: [______|___________
|
||||||
|
/// ^
|
||||||
|
/// download_offset
|
||||||
|
/// requested_range: [__________]
|
||||||
|
/// ^
|
||||||
|
/// file_offset_of_buffer_end
|
||||||
|
|
||||||
|
read_type = ReadType::CACHED;
|
||||||
|
file_segment->resetDownloader();
|
||||||
|
return getCacheReadBuffer(range.left);
|
||||||
|
}
|
||||||
|
|
||||||
if (download_offset < file_offset_of_buffer_end)
|
if (download_offset < file_offset_of_buffer_end)
|
||||||
{
|
{
|
||||||
/// segment{1}
|
/// segment{1}
|
||||||
@ -220,6 +222,10 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
|
|||||||
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset();
|
bytes_to_predownload = file_offset_of_buffer_end - file_segment->getDownloadOffset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
download_offset = file_segment->getDownloadOffset();
|
||||||
|
can_start_from_cache = download_offset > file_offset_of_buffer_end;
|
||||||
|
assert(!can_start_from_cache);
|
||||||
|
|
||||||
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
read_type = ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||||
return getRemoteFSReadBuffer(file_segment, read_type);
|
return getRemoteFSReadBuffer(file_segment, read_type);
|
||||||
}
|
}
|
||||||
@ -247,7 +253,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getReadBufferForFileSegment(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(FileSegmentPtr file_segment)
|
SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(FileSegmentPtr & file_segment)
|
||||||
{
|
{
|
||||||
assert(!file_segment->isDownloader());
|
assert(!file_segment->isDownloader());
|
||||||
assert(file_offset_of_buffer_end >= file_segment->range().left);
|
assert(file_offset_of_buffer_end >= file_segment->range().left);
|
||||||
@ -266,9 +272,21 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
|
|||||||
LOG_TEST(log, "Current file segment: {}, read type: {}, current file offset: {}",
|
LOG_TEST(log, "Current file segment: {}, read type: {}, current file offset: {}",
|
||||||
range.toString(), toString(read_type), file_offset_of_buffer_end);
|
range.toString(), toString(read_type), file_offset_of_buffer_end);
|
||||||
|
|
||||||
// auto last_non_downloaded_offset = getLastNonDownloadedOffset();
|
if (read_type != ReadType::CACHED)
|
||||||
// read_buffer_for_file_segment->setReadUntilPosition(last_non_downloaded_offset ? *last_non_downloaded_offset : range.right + 1); /// [..., range.right]
|
{
|
||||||
read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]
|
// auto last_non_downloaded_offset = getLastNonDownloadedOffset();
|
||||||
|
// read_buffer_for_file_segment->setReadUntilPosition(last_non_downloaded_offset ? *last_non_downloaded_offset : range.right + 1); /// [..., range.right]
|
||||||
|
read_buffer_for_file_segment->setReadUntilPosition(range.right + 1); /// [..., range.right]
|
||||||
|
|
||||||
|
auto current_segment_file_offset = read_buffer_for_file_segment->getFileOffsetOfBufferEnd();
|
||||||
|
auto current_segment_file_read_position = current_segment_file_offset - read_buffer_for_file_segment->available();
|
||||||
|
|
||||||
|
if (read_buffer_for_file_segment->hasPendingData()
|
||||||
|
&& file_offset_of_buffer_end < current_segment_file_offset
|
||||||
|
&& file_offset_of_buffer_end >= current_segment_file_read_position)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (read_type)
|
switch (read_type)
|
||||||
{
|
{
|
||||||
@ -282,7 +300,7 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
|
|||||||
auto state = file_segment->state();
|
auto state = file_segment->state();
|
||||||
|
|
||||||
LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}",
|
LOG_TEST(log, "Cache file: {}. Cached seek to: {}, file size: {}, file segment state: {}, download offset: {}",
|
||||||
file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset());
|
file_reader->getFileName(), seek_offset, file_size, state, file_segment->getDownloadOffset());
|
||||||
|
|
||||||
assert(file_size > 0);
|
assert(file_size > 0);
|
||||||
break;
|
break;
|
||||||
@ -308,12 +326,13 @@ SeekableReadBufferPtr CachedReadBufferFromRemoteFS::getImplementationBuffer(File
|
|||||||
|
|
||||||
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
|
auto impl_range = read_buffer_for_file_segment->getRemainingReadRange();
|
||||||
auto download_offset = file_segment->getDownloadOffset();
|
auto download_offset = file_segment->getDownloadOffset();
|
||||||
if (download_offset != impl_range.left)
|
if (download_offset != static_cast<size_t>(read_buffer_for_file_segment->getPosition()))
|
||||||
throw Exception(
|
throw Exception(
|
||||||
ErrorCodes::LOGICAL_ERROR,
|
ErrorCodes::LOGICAL_ERROR,
|
||||||
"Buffer's offsets mismatch; cached buffer offset: {}, implementation buffer offset: {}, "
|
"Buffer's offsets mismatch; cached buffer offset: {}, download_offset: {}, position: {}, implementation buffer offset: {}, "
|
||||||
"implementation buffer reading until: {}, file segment info: {}",
|
"implementation buffer reading until: {}, file segment info: {}",
|
||||||
file_offset_of_buffer_end, impl_range.left, *impl_range.right, file_segment->getInfoForLog());
|
file_offset_of_buffer_end, download_offset, read_buffer_for_file_segment->getPosition(),
|
||||||
|
impl_range.left, *impl_range.right, file_segment->getInfoForLog());
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -348,7 +367,7 @@ bool CachedReadBufferFromRemoteFS::completeFileSegmentAndGetNext()
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr file_segment)
|
void CachedReadBufferFromRemoteFS::predownload(FileSegmentPtr & file_segment)
|
||||||
{
|
{
|
||||||
if (bytes_to_predownload)
|
if (bytes_to_predownload)
|
||||||
{
|
{
|
||||||
@ -428,11 +447,28 @@ bool CachedReadBufferFromRemoteFS::nextImpl()
|
|||||||
if (current_file_segment_it == file_segments_holder->file_segments.end())
|
if (current_file_segment_it == file_segments_holder->file_segments.end())
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
// SCOPE_EXIT({
|
||||||
|
// if (current_file_segment_it == file_segments_holder->file_segments.end())
|
||||||
|
// return;
|
||||||
|
|
||||||
|
// auto & file_segment = *current_file_segment_it;
|
||||||
|
|
||||||
|
// bool download_current_segment = read_type == ReadType::REMOTE_FS_READ_AND_PUT_IN_CACHE;
|
||||||
|
// if (download_current_segment)
|
||||||
|
// {
|
||||||
|
// bool file_segment_already_completed = !file_segment->isDownloader();
|
||||||
|
// if (!file_segment_already_completed)
|
||||||
|
// file_segment->completeBatchAndResetDownloader();
|
||||||
|
// }
|
||||||
|
|
||||||
|
// assert(!file_segment->isDownloader());
|
||||||
|
// });
|
||||||
|
|
||||||
bytes_to_predownload = 0;
|
bytes_to_predownload = 0;
|
||||||
|
|
||||||
if (implementation_buffer)
|
if (implementation_buffer)
|
||||||
{
|
{
|
||||||
auto file_segment = *current_file_segment_it;
|
auto & file_segment = *current_file_segment_it;
|
||||||
auto current_read_range = file_segment->range();
|
auto current_read_range = file_segment->range();
|
||||||
auto current_state = file_segment->state();
|
auto current_state = file_segment->state();
|
||||||
|
|
||||||
@ -524,26 +560,10 @@ bool CachedReadBufferFromRemoteFS::nextImpl()
|
|||||||
"Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}",
|
"Incorrect segment state. Having read type: {}, Caller id: {}, downloader id: {}, file segment state: {}",
|
||||||
toString(read_type), file_segment->getCallerId(), file_segment->getDownloader(), file_segment->state());
|
toString(read_type), file_segment->getCallerId(), file_segment->getDownloader(), file_segment->state());
|
||||||
|
|
||||||
try
|
if (!result)
|
||||||
{
|
{
|
||||||
if (!result)
|
result = implementation_buffer->next();
|
||||||
{
|
size = implementation_buffer->buffer().size();
|
||||||
result = implementation_buffer->next();
|
|
||||||
size = implementation_buffer->buffer().size();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (...)
|
|
||||||
{
|
|
||||||
tryLogCurrentException(__PRETTY_FUNCTION__);
|
|
||||||
|
|
||||||
if (download_current_segment)
|
|
||||||
file_segment->complete(FileSegment::State::PARTIALLY_DOWNLOADED, true);
|
|
||||||
|
|
||||||
/// Note: If exception happens in another place -- out of scope of this buffer, then
|
|
||||||
/// downloader's FileSegmentsHolder is responsible to call file_segment->complete().
|
|
||||||
|
|
||||||
/// (download_path (if exists) is removed from inside cache)
|
|
||||||
throw;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (result)
|
if (result)
|
||||||
|
@ -34,15 +34,15 @@ public:
|
|||||||
private:
|
private:
|
||||||
void initialize(size_t offset, size_t size);
|
void initialize(size_t offset, size_t size);
|
||||||
|
|
||||||
SeekableReadBufferPtr getImplementationBuffer(FileSegmentPtr file_segment);
|
SeekableReadBufferPtr getImplementationBuffer(FileSegmentPtr & file_segment);
|
||||||
|
|
||||||
SeekableReadBufferPtr getReadBufferForFileSegment(FileSegmentPtr file_segment);
|
SeekableReadBufferPtr getReadBufferForFileSegment(FileSegmentPtr & file_segment);
|
||||||
|
|
||||||
SeekableReadBufferPtr getCacheReadBuffer(size_t offset) const;
|
SeekableReadBufferPtr getCacheReadBuffer(size_t offset) const;
|
||||||
|
|
||||||
std::optional<size_t> getLastNonDownloadedOffset() const;
|
std::optional<size_t> getLastNonDownloadedOffset() const;
|
||||||
|
|
||||||
void predownload(FileSegmentPtr file_segment);
|
void predownload(FileSegmentPtr & file_segment);
|
||||||
|
|
||||||
enum class ReadType
|
enum class ReadType
|
||||||
{
|
{
|
||||||
@ -51,7 +51,7 @@ private:
|
|||||||
REMOTE_FS_READ_AND_PUT_IN_CACHE,
|
REMOTE_FS_READ_AND_PUT_IN_CACHE,
|
||||||
};
|
};
|
||||||
|
|
||||||
SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr file_segment, ReadType read_type_);
|
SeekableReadBufferPtr getRemoteFSReadBuffer(FileSegmentPtr & file_segment, ReadType read_type_);
|
||||||
|
|
||||||
size_t getTotalSizeToRead();
|
size_t getTotalSizeToRead();
|
||||||
bool completeFileSegmentAndGetNext();
|
bool completeFileSegmentAndGetNext();
|
||||||
|
@ -37,15 +37,17 @@ SeekableReadBufferPtr ReadBufferFromS3Gather::createImplementationBuffer(const S
|
|||||||
{
|
{
|
||||||
current_path = path;
|
current_path = path;
|
||||||
|
|
||||||
|
auto cache = settings.remote_fs_cache;
|
||||||
|
bool with_cache = cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache();
|
||||||
|
|
||||||
auto remote_file_reader_creator = [=, this]()
|
auto remote_file_reader_creator = [=, this]()
|
||||||
{
|
{
|
||||||
return std::make_unique<ReadBufferFromS3>(
|
return std::make_unique<ReadBufferFromS3>(
|
||||||
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
client_ptr, bucket, fs::path(metadata.remote_fs_root_path) / path, max_single_read_retries,
|
||||||
settings, /* use_external_buffer */true, read_until_position, true);
|
settings, /* use_external_buffer */true, read_until_position, /* restricted_seek */true);
|
||||||
};
|
};
|
||||||
|
|
||||||
auto cache = settings.remote_fs_cache;
|
if (with_cache)
|
||||||
if (cache && settings.remote_fs_enable_cache && !IFileCache::shouldBypassCache())
|
|
||||||
{
|
{
|
||||||
return std::make_shared<CachedReadBufferFromRemoteFS>(
|
return std::make_shared<CachedReadBufferFromRemoteFS>(
|
||||||
path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size);
|
path, cache, remote_file_reader_creator, settings, read_until_position ? read_until_position : file_size);
|
||||||
|
@ -158,10 +158,11 @@ off_t ReadBufferFromS3::seek(off_t offset_, int whence)
|
|||||||
return offset;
|
return offset;
|
||||||
|
|
||||||
if (impl && restricted_seek)
|
if (impl && restricted_seek)
|
||||||
throw Exception(
|
impl.reset(); /// TODO: Fix this
|
||||||
ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
|
// throw Exception(
|
||||||
"Seek is allowed only before first read attempt from the buffer (current offset: {}, new offset: {}, reading until position: {})",
|
// ErrorCodes::CANNOT_SEEK_THROUGH_FILE,
|
||||||
offset, offset_, read_until_position);
|
// "Seek is allowed only before first read attempt from the buffer (current offset: {}, new offset: {}, reading until position: {}, available: {})",
|
||||||
|
// offset, offset_, read_until_position, available());
|
||||||
|
|
||||||
if (whence != SEEK_SET)
|
if (whence != SEEK_SET)
|
||||||
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
throw Exception("Only SEEK_SET mode is allowed.", ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
|
||||||
|
Loading…
Reference in New Issue
Block a user