ClickHouse/src/Common/FileSegment.h

233 lines
7.7 KiB
C++
Raw Normal View History

2022-01-22 22:56:24 +00:00
#pragma once
#include <boost/noncopyable.hpp>
#include <IO/WriteBufferFromFile.h>
#include <Core/Types.h>
#include <IO/SeekableReadBuffer.h>
2022-01-22 22:56:24 +00:00
#include <list>
namespace Poco { class Logger; }
2022-01-22 22:56:24 +00:00
namespace DB
{
2022-02-18 15:38:23 +00:00
class IFileCache;
2022-01-22 22:56:24 +00:00
class FileSegment;
using FileSegmentPtr = std::shared_ptr<FileSegment>;
using FileSegments = std::list<FileSegmentPtr>;
class FileSegment : boost::noncopyable
{
friend class LRUFileCache;
2022-01-24 22:07:02 +00:00
friend struct FileSegmentsHolder;
2022-01-22 22:56:24 +00:00
public:
2022-01-23 16:51:18 +00:00
using Key = UInt128;
using RemoteFileReaderPtr = std::shared_ptr<SeekableReadBuffer>;
using LocalCacheWriterPtr = std::unique_ptr<WriteBufferFromFile>;
2022-01-23 16:51:18 +00:00
2022-01-22 22:56:24 +00:00
enum class State
{
DOWNLOADED,
/**
* When file segment is first created and returned to user, it has state EMPTY.
* EMPTY state can become DOWNLOADING when getOrSetDownaloder is called successfully
* by any owner of EMPTY state file segment.
*/
EMPTY,
/**
* A newly created file segment never has DOWNLOADING state until call to getOrSetDownloader
* because each cache user might acquire multiple file segments and reads them one by one,
* so only user which actually needs to read this segment earlier than others - becomes a downloader.
*/
DOWNLOADING,
/**
* Space reservation for a file segment is incremental, i.e. downaloder reads buffer_size bytes
* from remote fs -> tries to reserve buffer_size bytes to put them to cache -> writes to cache
2022-01-23 16:51:18 +00:00
* on successful reservation and stops cache write otherwise. Those, who waited for the same file
* file segment, will read downloaded part from cache and remaining part directly from remote fs.
2022-01-22 22:56:24 +00:00
*/
PARTIALLY_DOWNLOADED_NO_CONTINUATION,
/**
2022-01-23 16:51:18 +00:00
* If downloader did not finish download of current file segment for any reason apart from running
2022-01-22 22:56:24 +00:00
* out of cache space, then download can be continued by other owners of this file segment.
*/
PARTIALLY_DOWNLOADED,
/**
* If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
* this file segment as out of cache scope.
*/
SKIP_CACHE,
};
FileSegment(
size_t offset_, size_t size_, const Key & key_,
2022-02-18 15:38:23 +00:00
IFileCache * cache_, State download_state_);
2022-01-22 22:56:24 +00:00
2022-01-23 16:51:18 +00:00
State state() const;
2022-01-22 22:56:24 +00:00
static String stateToString(FileSegment::State state);
2022-01-22 22:56:24 +00:00
/// Represents an interval [left, right] including both boundaries.
struct Range
{
size_t left;
size_t right;
Range(size_t left_, size_t right_) : left(left_), right(right_) {}
2022-01-26 09:35:46 +00:00
bool operator==(const Range & other) const { return left == other.left && right == other.right; }
2022-01-22 22:56:24 +00:00
size_t size() const { return right - left + 1; }
2022-02-18 15:38:23 +00:00
String toString() const { return fmt::format("[{}, {}]", std::to_string(left), std::to_string(right)); }
2022-01-22 22:56:24 +00:00
};
const Range & range() const { return segment_range; }
2022-01-23 16:51:18 +00:00
const Key & key() const { return file_key; }
2022-01-22 22:56:24 +00:00
2022-01-23 16:51:18 +00:00
size_t offset() const { return range().left; }
2022-01-22 22:56:24 +00:00
State wait();
bool reserve(size_t size);
2022-03-16 13:29:21 +00:00
void write(const char * from, size_t size, size_t offset_);
2022-01-22 22:56:24 +00:00
2022-04-07 16:46:46 +00:00
/**
* writeInMemory and finalizeWrite are used together to write a single file with delay.
* Both can be called only once, one after another. Used for writing cache via threadpool
* on wrote operations. TODO: this solution is temporary, until adding a separate cache layer.
*/
2022-04-01 14:45:15 +00:00
void writeInMemory(const char * from, size_t size);
size_t finalizeWrite();
RemoteFileReaderPtr getRemoteFileReader();
2022-01-22 22:56:24 +00:00
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
2022-01-26 09:35:46 +00:00
2022-03-17 19:29:07 +00:00
void resetRemoteFileReader();
2022-01-22 22:56:24 +00:00
String getOrSetDownloader();
String getDownloader() const;
2022-03-06 19:33:07 +00:00
void resetDownloader();
2022-01-22 22:56:24 +00:00
bool isDownloader() const;
2022-02-23 10:12:14 +00:00
bool isDownloaded() const { return is_downloaded.load(); }
static String getCallerId();
2022-02-18 15:38:23 +00:00
size_t getDownloadOffset() const;
2022-01-23 16:51:18 +00:00
2022-04-07 16:46:46 +00:00
size_t getDownloadedSize() const;
void completeBatchAndResetDownloader();
2022-03-11 11:17:17 +00:00
void complete(State state);
2022-01-22 22:56:24 +00:00
2022-02-02 14:25:25 +00:00
String getInfoForLog() const;
2022-04-07 16:46:46 +00:00
size_t getHitsCount() const { return hits_count; }
size_t getRefCount() const { return ref_count; }
2022-03-23 14:35:15 +00:00
2022-04-07 16:46:46 +00:00
void incrementHitsCount() { ++hits_count; }
2022-03-23 14:35:15 +00:00
2022-03-21 18:48:13 +00:00
void assertCorrectness() const;
2022-04-07 23:58:55 +00:00
static FileSegmentPtr getSnapshot(const FileSegmentPtr & file_segment, std::lock_guard<std::mutex> & cache_lock);
2022-04-07 16:46:46 +00:00
2022-04-27 18:01:59 +00:00
void detach(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock);
2022-04-26 10:09:58 +00:00
2022-01-22 22:56:24 +00:00
private:
2022-01-24 22:07:02 +00:00
size_t availableSize() const { return reserved_size - downloaded_size; }
2022-03-17 19:29:07 +00:00
size_t getDownloadedSize(std::lock_guard<std::mutex> & segment_lock) const;
2022-03-18 10:01:35 +00:00
String getInfoForLogImpl(std::lock_guard<std::mutex> & segment_lock) const;
2022-03-21 18:48:13 +00:00
void assertCorrectnessImpl(std::lock_guard<std::mutex> & segment_lock) const;
2022-04-11 15:51:49 +00:00
void assertNotDetached() const;
2022-04-28 20:50:19 +00:00
void assertDetachedStatus(std::lock_guard<std::mutex> & segment_lock) const;
2022-04-28 10:57:22 +00:00
bool hasFinalizedState() const;
2022-04-28 20:50:19 +00:00
bool isDetached(std::lock_guard<std::mutex> & /* segment_lock */) const { return detached; }
2022-04-20 12:10:31 +00:00
2022-04-20 12:10:31 +00:00
2022-02-23 10:12:14 +00:00
void setDownloaded(std::lock_guard<std::mutex> & segment_lock);
2022-04-07 16:46:46 +00:00
void setDownloadFailed(std::lock_guard<std::mutex> & segment_lock);
2022-04-14 11:17:04 +00:00
bool isDownloaderImpl(std::lock_guard<std::mutex> & segment_lock) const;
2022-04-07 16:46:46 +00:00
void wrapWithCacheInfo(Exception & e, const String & message, std::lock_guard<std::mutex> & segment_lock) const;
2022-03-17 19:29:07 +00:00
bool lastFileSegmentHolder() const;
/// complete() without any completion state is called from destructor of
/// FileSegmentsHolder. complete() might check if the caller of the method
/// is the last alive holder of the segment. Therefore, complete() and destruction
/// of the file segment pointer must be done under the same cache mutex.
void complete(std::lock_guard<std::mutex> & cache_lock);
2022-04-27 18:01:59 +00:00
void completeUnlocked(std::lock_guard<std::mutex> & cache_lock, std::lock_guard<std::mutex> & segment_lock);
2022-03-17 19:29:07 +00:00
void completeImpl(
std::lock_guard<std::mutex> & cache_lock,
2022-04-07 16:46:46 +00:00
std::lock_guard<std::mutex> & segment_lock);
2022-03-17 19:29:07 +00:00
2022-04-07 16:46:46 +00:00
static String getCallerIdImpl();
2022-03-17 19:29:07 +00:00
2022-03-09 09:36:52 +00:00
void resetDownloaderImpl(std::lock_guard<std::mutex> & segment_lock);
2022-01-22 22:56:24 +00:00
2022-01-23 16:51:18 +00:00
const Range segment_range;
2022-01-22 22:56:24 +00:00
2022-01-23 16:51:18 +00:00
State download_state;
String downloader_id;
2022-01-22 22:56:24 +00:00
RemoteFileReaderPtr remote_file_reader;
LocalCacheWriterPtr cache_writer;
2022-01-22 22:56:24 +00:00
size_t downloaded_size = 0;
size_t reserved_size = 0;
mutable std::mutex mutex;
std::condition_variable cv;
2022-03-14 16:33:29 +00:00
/// Protects downloaded_size access with actual write into fs.
/// downloaded_size is not protected by download_mutex in methods which
/// can never be run in parallel to FileSegment::write() method
/// as downloaded_size is updated only in FileSegment::write() method.
/// Such methods are identified by isDownloader() check at their start,
/// e.g. they are executed strictly by the same thread, sequentially.
mutable std::mutex download_mutex;
2022-01-23 16:51:18 +00:00
Key file_key;
2022-02-18 15:38:23 +00:00
IFileCache * cache;
Poco::Logger * log;
2022-02-15 10:27:44 +00:00
2022-04-11 15:51:49 +00:00
/// "detached" file segment means that it is not owned by cache ("detached" from cache).
/// In general case, all file segments are owned by cache.
2022-02-15 10:27:44 +00:00
bool detached = false;
2022-02-23 10:12:14 +00:00
std::atomic<bool> is_downloaded{false};
2022-04-07 16:46:46 +00:00
std::atomic<size_t> hits_count = 0; /// cache hits.
std::atomic<size_t> ref_count = 0; /// Used for getting snapshot state
2022-01-22 22:56:24 +00:00
};
2022-02-18 15:38:23 +00:00
struct FileSegmentsHolder : private boost::noncopyable
2022-01-22 22:56:24 +00:00
{
2022-02-21 10:43:03 +00:00
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
2022-01-22 22:56:24 +00:00
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
2022-03-17 19:29:07 +00:00
~FileSegmentsHolder();
2022-01-22 22:56:24 +00:00
2022-01-24 22:07:02 +00:00
FileSegments file_segments{};
2022-02-12 22:20:05 +00:00
String toString();
2022-01-22 22:56:24 +00:00
};
}