2022-01-22 22:56:24 +00:00
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <boost/noncopyable.hpp>
|
|
|
|
#include <IO/WriteBufferFromFile.h>
|
2022-01-26 18:43:23 +00:00
|
|
|
#include <Core/Types.h>
|
|
|
|
#include <IO/SeekableReadBuffer.h>
|
2022-01-22 22:56:24 +00:00
|
|
|
#include <list>
|
|
|
|
|
2022-01-30 11:35:28 +00:00
|
|
|
namespace Poco { class Logger; }
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
namespace DB
|
|
|
|
{
|
|
|
|
|
2022-02-18 15:38:23 +00:00
|
|
|
class IFileCache;
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
class FileSegment;
|
|
|
|
using FileSegmentPtr = std::shared_ptr<FileSegment>;
|
|
|
|
using FileSegments = std::list<FileSegmentPtr>;
|
|
|
|
|
|
|
|
|
|
|
|
class FileSegment : boost::noncopyable
|
|
|
|
{
|
|
|
|
friend class LRUFileCache;
|
2022-01-24 22:07:02 +00:00
|
|
|
friend struct FileSegmentsHolder;
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
public:
|
2022-01-23 16:51:18 +00:00
|
|
|
using Key = UInt128;
|
2022-01-26 18:43:23 +00:00
|
|
|
using RemoteFileReaderPtr = std::shared_ptr<SeekableReadBuffer>;
|
|
|
|
using LocalCacheWriterPtr = std::unique_ptr<WriteBufferFromFile>;
|
2022-01-23 16:51:18 +00:00
|
|
|
|
2022-01-22 22:56:24 +00:00
|
|
|
enum class State
|
|
|
|
{
|
|
|
|
DOWNLOADED,
|
|
|
|
/**
|
|
|
|
* When file segment is first created and returned to user, it has state EMPTY.
|
|
|
|
* EMPTY state can become DOWNLOADING when getOrSetDownaloder is called successfully
|
|
|
|
* by any owner of EMPTY state file segment.
|
|
|
|
*/
|
|
|
|
EMPTY,
|
|
|
|
/**
|
|
|
|
* A newly created file segment never has DOWNLOADING state until call to getOrSetDownloader
|
|
|
|
* because each cache user might acquire multiple file segments and reads them one by one,
|
|
|
|
* so only user which actually needs to read this segment earlier than others - becomes a downloader.
|
|
|
|
*/
|
|
|
|
DOWNLOADING,
|
|
|
|
/**
|
|
|
|
* Space reservation for a file segment is incremental, i.e. downaloder reads buffer_size bytes
|
|
|
|
* from remote fs -> tries to reserve buffer_size bytes to put them to cache -> writes to cache
|
2022-01-23 16:51:18 +00:00
|
|
|
* on successful reservation and stops cache write otherwise. Those, who waited for the same file
|
|
|
|
* file segment, will read downloaded part from cache and remaining part directly from remote fs.
|
2022-01-22 22:56:24 +00:00
|
|
|
*/
|
|
|
|
PARTIALLY_DOWNLOADED_NO_CONTINUATION,
|
|
|
|
/**
|
2022-01-23 16:51:18 +00:00
|
|
|
* If downloader did not finish download of current file segment for any reason apart from running
|
2022-01-22 22:56:24 +00:00
|
|
|
* out of cache space, then download can be continued by other owners of this file segment.
|
|
|
|
*/
|
|
|
|
PARTIALLY_DOWNLOADED,
|
|
|
|
/**
|
|
|
|
* If file segment cannot possibly be downloaded (first space reservation attempt failed), mark
|
|
|
|
* this file segment as out of cache scope.
|
|
|
|
*/
|
|
|
|
SKIP_CACHE,
|
|
|
|
};
|
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
FileSegment(
|
|
|
|
size_t offset_, size_t size_, const Key & key_,
|
2022-02-18 15:38:23 +00:00
|
|
|
IFileCache * cache_, State download_state_);
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
State state() const;
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
static String stateToString(FileSegment::State state);
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
/// Represents an interval [left, right] including both boundaries.
|
|
|
|
struct Range
|
|
|
|
{
|
|
|
|
size_t left;
|
|
|
|
size_t right;
|
|
|
|
|
|
|
|
Range(size_t left_, size_t right_) : left(left_), right(right_) {}
|
|
|
|
|
2022-01-26 09:35:46 +00:00
|
|
|
bool operator==(const Range & other) const { return left == other.left && right == other.right; }
|
|
|
|
|
2022-01-22 22:56:24 +00:00
|
|
|
size_t size() const { return right - left + 1; }
|
|
|
|
|
2022-02-18 15:38:23 +00:00
|
|
|
String toString() const { return fmt::format("[{}, {}]", std::to_string(left), std::to_string(right)); }
|
2022-01-22 22:56:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
const Range & range() const { return segment_range; }
|
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
const Key & key() const { return file_key; }
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
size_t offset() const { return range().left; }
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
State wait();
|
|
|
|
|
|
|
|
bool reserve(size_t size);
|
|
|
|
|
|
|
|
void write(const char * from, size_t size);
|
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
RemoteFileReaderPtr getRemoteFileReader();
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
void setRemoteFileReader(RemoteFileReaderPtr remote_file_reader_);
|
2022-01-26 09:35:46 +00:00
|
|
|
|
2022-01-22 22:56:24 +00:00
|
|
|
String getOrSetDownloader();
|
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
String getDownloader() const;
|
|
|
|
|
2022-01-22 22:56:24 +00:00
|
|
|
bool isDownloader() const;
|
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
static String getCallerId();
|
|
|
|
|
2022-02-18 15:38:23 +00:00
|
|
|
size_t getDownloadOffset() const;
|
2022-01-23 16:51:18 +00:00
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
void completeBatchAndResetDownloader();
|
|
|
|
|
2022-02-15 09:11:33 +00:00
|
|
|
void complete(State state, bool complete_because_of_error = false);
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-02-02 14:25:25 +00:00
|
|
|
String getInfoForLog() const;
|
|
|
|
|
2022-01-22 22:56:24 +00:00
|
|
|
private:
|
2022-01-24 22:07:02 +00:00
|
|
|
size_t availableSize() const { return reserved_size - downloaded_size; }
|
2022-01-23 16:51:18 +00:00
|
|
|
bool lastFileSegmentHolder() const;
|
2022-01-24 22:07:02 +00:00
|
|
|
void complete();
|
|
|
|
void completeImpl(std::lock_guard<std::mutex> & /* segment_lock */);
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
const Range segment_range;
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
State download_state;
|
2022-01-26 18:43:23 +00:00
|
|
|
String downloader_id;
|
2022-01-22 22:56:24 +00:00
|
|
|
|
2022-01-26 18:43:23 +00:00
|
|
|
RemoteFileReaderPtr remote_file_reader;
|
|
|
|
LocalCacheWriterPtr cache_writer;
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
size_t downloaded_size = 0;
|
|
|
|
size_t reserved_size = 0;
|
|
|
|
|
|
|
|
mutable std::mutex mutex;
|
|
|
|
std::condition_variable cv;
|
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
Key file_key;
|
2022-02-18 15:38:23 +00:00
|
|
|
IFileCache * cache;
|
2022-01-30 11:35:28 +00:00
|
|
|
|
|
|
|
Poco::Logger * log;
|
2022-02-15 10:27:44 +00:00
|
|
|
|
|
|
|
bool detached = false;
|
2022-01-22 22:56:24 +00:00
|
|
|
};
|
|
|
|
|
2022-02-18 15:38:23 +00:00
|
|
|
struct FileSegmentsHolder : private boost::noncopyable
|
2022-01-22 22:56:24 +00:00
|
|
|
{
|
2022-02-21 10:43:03 +00:00
|
|
|
explicit FileSegmentsHolder(FileSegments && file_segments_) : file_segments(std::move(file_segments_)) {}
|
2022-01-22 22:56:24 +00:00
|
|
|
FileSegmentsHolder(FileSegmentsHolder && other) : file_segments(std::move(other.file_segments)) {}
|
|
|
|
|
|
|
|
~FileSegmentsHolder()
|
|
|
|
{
|
2022-02-12 22:20:05 +00:00
|
|
|
std::lock_guard lock(mutex);
|
|
|
|
|
2022-01-23 16:51:18 +00:00
|
|
|
/// In CacheableReadBufferFromRemoteFS file segment's downloader removes file segments from
|
|
|
|
/// FileSegmentsHolder right after calling file_segment->complete(), so on destruction here
|
|
|
|
/// remain only uncompleted file segments.
|
2022-01-22 22:56:24 +00:00
|
|
|
|
|
|
|
for (auto & segment : file_segments)
|
2022-01-23 16:51:18 +00:00
|
|
|
segment->complete();
|
2022-01-22 22:56:24 +00:00
|
|
|
}
|
|
|
|
|
2022-01-24 22:07:02 +00:00
|
|
|
FileSegments file_segments{};
|
2022-02-12 22:20:05 +00:00
|
|
|
|
|
|
|
std::mutex mutex;
|
|
|
|
String toString();
|
2022-01-22 22:56:24 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
}
|