mirror of
https://github.com/ClickHouse/ClickHouse.git
synced 2024-11-20 14:42:02 +00:00
fix style
This commit is contained in:
parent
7f54fa726b
commit
ffaf44c1c1
390
src/Common/FileCache.h
Normal file
390
src/Common/FileCache.h
Normal file
@ -0,0 +1,390 @@
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <boost/noncopyable.hpp>
|
||||
#include <map>
|
||||
|
||||
#include "FileCache_fwd.h"
|
||||
#include <IO/ReadSettings.h>
|
||||
#include <Common/logger_useful.h>
|
||||
#include <Common/FileSegment.h>
|
||||
#include <Common/IFileCachePriority.h>
|
||||
#include <Common/LRUFileCache.h>
|
||||
#include <Core/Types.h>
|
||||
|
||||
namespace DB
|
||||
{
|
||||
|
||||
namespace ErrorCodes
|
||||
{
|
||||
extern const int LOGICAL_ERROR;
|
||||
}
|
||||
|
||||
class IFileCache;
|
||||
using FileCachePtr = std::shared_ptr<IFileCache>;
|
||||
|
||||
/**
|
||||
* Local cache for remote filesystem files, represented as a set of non-overlapping non-empty file segments.
|
||||
*/
|
||||
class IFileCache : private boost::noncopyable
|
||||
{
|
||||
friend class FileSegment;
|
||||
friend struct FileSegmentsHolder;
|
||||
friend class FileSegmentRangeWriter;
|
||||
|
||||
public:
|
||||
using Key = UInt128;
|
||||
using Downloader = std::unique_ptr<SeekableReadBuffer>;
|
||||
|
||||
IFileCache(
|
||||
const String & cache_base_path_,
|
||||
const FileCacheSettings & cache_settings_);
|
||||
|
||||
virtual ~IFileCache() = default;
|
||||
|
||||
/// Restore cache from local filesystem.
|
||||
virtual void initialize() = 0;
|
||||
|
||||
virtual void remove(const Key & key) = 0;
|
||||
|
||||
virtual void remove() = 0;
|
||||
|
||||
static bool isReadOnly();
|
||||
|
||||
/// Cache capacity in bytes.
|
||||
size_t capacity() const { return max_size; }
|
||||
|
||||
static Key hash(const String & path);
|
||||
|
||||
String getPathInLocalCache(const Key & key, size_t offset);
|
||||
|
||||
String getPathInLocalCache(const Key & key);
|
||||
|
||||
const String & getBasePath() const { return cache_base_path; }
|
||||
|
||||
virtual std::vector<String> tryGetCachePaths(const Key & key) = 0;
|
||||
|
||||
/**
|
||||
* Given an `offset` and `size` representing [offset, offset + size) bytes interval,
|
||||
* return list of cached non-overlapping non-empty
|
||||
* file segments `[segment1, ..., segmentN]` which intersect with given interval.
|
||||
*
|
||||
* Segments in returned list are ordered in ascending order and represent a full contiguous
|
||||
* interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
|
||||
*
|
||||
* As long as pointers to returned file segments are hold
|
||||
* it is guaranteed that these file segments are not removed from cache.
|
||||
*/
|
||||
virtual FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
/**
|
||||
* Segments in returned list are ordered in ascending order and represent a full contiguous
|
||||
* interval (no holes). Each segment in returned list has state: DOWNLOADED, DOWNLOADING or EMPTY.
|
||||
*
|
||||
* If file segment has state EMPTY, then it is also marked as "detached". E.g. it is "detached"
|
||||
* from cache (not owned by cache), and as a result will never change it's state and will be destructed
|
||||
* with the destruction of the holder, while in getOrSet() EMPTY file segments can eventually change
|
||||
* it's state (and become DOWNLOADED).
|
||||
*/
|
||||
virtual FileSegmentsHolder get(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
virtual FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) = 0;
|
||||
|
||||
virtual FileSegments getSnapshot() const = 0;
|
||||
|
||||
/// For debug.
|
||||
virtual String dumpStructure(const Key & key) = 0;
|
||||
|
||||
virtual size_t getUsedCacheSize() const = 0;
|
||||
|
||||
virtual size_t getFileSegmentsNum() const = 0;
|
||||
|
||||
protected:
|
||||
String cache_base_path;
|
||||
size_t max_size;
|
||||
size_t max_element_size;
|
||||
size_t max_file_segment_size;
|
||||
|
||||
bool is_initialized = false;
|
||||
|
||||
mutable std::mutex mutex;
|
||||
|
||||
virtual bool tryReserve(
|
||||
const Key & key, size_t offset, size_t size,
|
||||
std::lock_guard<std::mutex> & cache_lock) = 0;
|
||||
|
||||
virtual void remove(
|
||||
Key key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) = 0;
|
||||
|
||||
virtual bool isLastFileSegmentHolder(
|
||||
const Key & key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) = 0;
|
||||
|
||||
/// If file segment was partially downloaded and then space reservation fails (because of no
|
||||
/// space left), then update corresponding cache cell metadata (file segment size).
|
||||
virtual void reduceSizeToDownloaded(
|
||||
const Key & key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) = 0;
|
||||
|
||||
void assertInitialized() const;
|
||||
|
||||
protected:
|
||||
using KeyAndOffset = std::pair<Key, size_t>;
|
||||
|
||||
struct KeyAndOffsetHash
|
||||
{
|
||||
std::size_t operator()(const KeyAndOffset & key) const
|
||||
{
|
||||
return std::hash<UInt128>()(key.first) ^ std::hash<UInt64>()(key.second);
|
||||
}
|
||||
};
|
||||
|
||||
using FileCacheRecords = std::unordered_map<KeyAndOffset, IFileCachePriority::Iterator, KeyAndOffsetHash>;
|
||||
|
||||
/// Used to track and control the cache access of each query.
|
||||
/// Through it, we can realize the processing of different queries by the cache layer.
|
||||
struct QueryContext
|
||||
{
|
||||
FileCacheRecords records;
|
||||
FileCachePriorityPtr priority;
|
||||
|
||||
size_t cache_size = 0;
|
||||
size_t max_cache_size;
|
||||
|
||||
bool skip_download_if_exceeds_query_cache;
|
||||
|
||||
QueryContext(size_t max_cache_size_, bool skip_download_if_exceeds_query_cache_)
|
||||
: priority(std::make_shared<LRUFileCache>())
|
||||
, max_cache_size(max_cache_size_)
|
||||
, skip_download_if_exceeds_query_cache(skip_download_if_exceeds_query_cache_) {}
|
||||
|
||||
void remove(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
if (cache_size < size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Deleted cache size exceeds existing cache size");
|
||||
|
||||
if (!skip_download_if_exceeds_query_cache)
|
||||
{
|
||||
auto record = records.find({key, offset});
|
||||
if (record != records.end())
|
||||
{
|
||||
record->second->remove(cache_lock);
|
||||
records.erase({key, offset});
|
||||
}
|
||||
}
|
||||
cache_size -= size;
|
||||
}
|
||||
|
||||
void reserve(const Key & key, size_t offset, size_t size, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
if (cache_size + size > max_cache_size)
|
||||
throw Exception(ErrorCodes::LOGICAL_ERROR, "Reserved cache size exceeds the remaining cache size");
|
||||
|
||||
if (!skip_download_if_exceeds_query_cache)
|
||||
{
|
||||
auto record = records.find({key, offset});
|
||||
if (record == records.end())
|
||||
{
|
||||
auto queue_iter = priority->add(key, offset, 0, cache_lock);
|
||||
record = records.insert({{key, offset}, queue_iter}).first;
|
||||
}
|
||||
record->second->incrementSize(size, cache_lock);
|
||||
}
|
||||
cache_size += size;
|
||||
}
|
||||
|
||||
void use(const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
if (!skip_download_if_exceeds_query_cache)
|
||||
{
|
||||
auto record = records.find({key, offset});
|
||||
if (record != records.end())
|
||||
record->second->use(cache_lock);
|
||||
}
|
||||
}
|
||||
|
||||
size_t getMaxCacheSize() { return max_cache_size; }
|
||||
|
||||
size_t getCacheSize() { return cache_size; }
|
||||
|
||||
FileCachePriorityPtr getPriority() { return priority; }
|
||||
|
||||
bool isSkipDownloadIfExceed() { return skip_download_if_exceeds_query_cache; }
|
||||
};
|
||||
|
||||
using QueryContextPtr = std::shared_ptr<QueryContext>;
|
||||
using QueryContextMap = std::unordered_map<String, QueryContextPtr>;
|
||||
|
||||
QueryContextMap query_map;
|
||||
|
||||
bool enable_filesystem_query_cache_limit;
|
||||
|
||||
QueryContextPtr getCurrentQueryContext(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
QueryContextPtr getQueryContext(const String & query_id, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void removeQueryContext(const String & query_id);
|
||||
|
||||
QueryContextPtr getOrSetQueryContext(const String & query_id, const ReadSettings & settings, std::lock_guard<std::mutex> &);
|
||||
|
||||
public:
|
||||
/// Save a query context information, and adopt different cache policies
|
||||
/// for different queries through the context cache layer.
|
||||
struct QueryContextHolder : private boost::noncopyable
|
||||
{
|
||||
explicit QueryContextHolder(const String & query_id_, IFileCache * cache_, QueryContextPtr context_);
|
||||
|
||||
QueryContextHolder() = default;
|
||||
|
||||
~QueryContextHolder();
|
||||
|
||||
String query_id {};
|
||||
IFileCache * cache = nullptr;
|
||||
QueryContextPtr context = nullptr;
|
||||
};
|
||||
|
||||
QueryContextHolder getQueryContextHolder(const String & query_id, const ReadSettings & settings);
|
||||
};
|
||||
|
||||
class FileCache final : public IFileCache
|
||||
{
|
||||
public:
|
||||
FileCache(
|
||||
const String & cache_base_path_,
|
||||
const FileCacheSettings & cache_settings_);
|
||||
|
||||
FileSegmentsHolder getOrSet(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
FileSegmentsHolder get(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
FileSegments getSnapshot() const override;
|
||||
|
||||
void initialize() override;
|
||||
|
||||
void remove(const Key & key) override;
|
||||
|
||||
void remove() override;
|
||||
|
||||
std::vector<String> tryGetCachePaths(const Key & key) override;
|
||||
|
||||
size_t getUsedCacheSize() const override;
|
||||
|
||||
size_t getFileSegmentsNum() const override;
|
||||
|
||||
private:
|
||||
struct FileSegmentCell : private boost::noncopyable
|
||||
{
|
||||
FileSegmentPtr file_segment;
|
||||
|
||||
/// Iterator is put here on first reservation attempt, if successful.
|
||||
IFileCachePriority::Iterator queue_iterator;
|
||||
|
||||
/// Pointer to file segment is always hold by the cache itself.
|
||||
/// Apart from pointer in cache, it can be hold by cache users, when they call
|
||||
/// getorSet(), but cache users always hold it via FileSegmentsHolder.
|
||||
bool releasable() const { return file_segment.unique(); }
|
||||
|
||||
size_t size() const { return file_segment->reserved_size; }
|
||||
|
||||
FileSegmentCell(FileSegmentPtr file_segment_, FileCache * cache, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegmentCell(FileSegmentCell && other) noexcept
|
||||
: file_segment(std::move(other.file_segment))
|
||||
, queue_iterator(other.queue_iterator) {}
|
||||
};
|
||||
|
||||
using FileSegmentsByOffset = std::map<size_t, FileSegmentCell>;
|
||||
using CachedFiles = std::unordered_map<Key, FileSegmentsByOffset>;
|
||||
|
||||
CachedFiles files;
|
||||
FileCachePriorityPtr main_priority;
|
||||
|
||||
FileCacheRecords stash_records;
|
||||
FileCachePriorityPtr stash_priority;
|
||||
|
||||
size_t max_stash_element_size;
|
||||
size_t enable_cache_hits_threshold;
|
||||
|
||||
Poco::Logger * log;
|
||||
|
||||
FileSegments getImpl(
|
||||
const Key & key, const FileSegment::Range & range,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegmentCell * getCell(
|
||||
const Key & key, size_t offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegmentCell * addCell(
|
||||
const Key & key, size_t offset, size_t size,
|
||||
FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void useCell(const FileSegmentCell & cell, FileSegments & result, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
bool tryReserve(
|
||||
const Key & key, size_t offset, size_t size,
|
||||
std::lock_guard<std::mutex> & cache_lock) override;
|
||||
|
||||
bool tryReserveForMainList(
|
||||
const Key & key, size_t offset, size_t size,
|
||||
QueryContextPtr query_context,
|
||||
std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void remove(
|
||||
Key key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) override;
|
||||
|
||||
bool isLastFileSegmentHolder(
|
||||
const Key & key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) override;
|
||||
|
||||
void reduceSizeToDownloaded(
|
||||
const Key & key, size_t offset,
|
||||
std::lock_guard<std::mutex> & cache_lock,
|
||||
std::lock_guard<std::mutex> & segment_lock) override;
|
||||
|
||||
size_t getAvailableCacheSize() const;
|
||||
|
||||
void loadCacheInfoIntoMemory(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegments splitRangeIntoCells(
|
||||
const Key & key, size_t offset, size_t size, FileSegment::State state, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
String dumpStructureUnlocked(const Key & key_, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void fillHolesWithEmptyFileSegments(
|
||||
FileSegments & file_segments, const Key & key, const FileSegment::Range & range, bool fill_with_detached_file_segments, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
FileSegmentsHolder setDownloading(const Key & key, size_t offset, size_t size) override;
|
||||
|
||||
size_t getUsedCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
size_t getAvailableCacheSizeUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
size_t getFileSegmentsNumUnlocked(std::lock_guard<std::mutex> & cache_lock) const;
|
||||
|
||||
void assertCacheCellsCorrectness(const FileSegmentsByOffset & cells_by_offset, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
public:
|
||||
String dumpStructure(const Key & key_) override;
|
||||
|
||||
void assertCacheCorrectness(const Key & key, std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock);
|
||||
|
||||
void assertPriorityCorrectness(std::lock_guard<std::mutex> & cache_lock);
|
||||
};
|
||||
|
||||
}
|
@ -41,7 +41,7 @@ public:
|
||||
|
||||
virtual void next() = 0;
|
||||
|
||||
virtual bool vaild() const = 0;
|
||||
virtual bool valid() const = 0;
|
||||
|
||||
/// Mark a cache record as recently used, it will update the priority
|
||||
/// of the cache record according to different cache algorithms.
|
||||
|
@ -486,7 +486,7 @@ bool FileCache::tryReserve(const Key & key, size_t offset, size_t size, std::loc
|
||||
};
|
||||
|
||||
/// Select the cache from the LRU queue held by query for expulsion.
|
||||
for (auto iter = query_context->getPriority()->getNewIterator(cache_lock); iter->vaild(); iter->next())
|
||||
for (auto iter = query_context->getPriority()->getNewIterator(cache_lock); iter->valid(); iter->next())
|
||||
{
|
||||
if (!is_overflow())
|
||||
break;
|
||||
@ -596,7 +596,7 @@ bool FileCache::tryReserveForMainList(
|
||||
std::vector<FileSegmentCell *> to_evict;
|
||||
std::vector<FileSegmentCell *> trash;
|
||||
|
||||
for (auto it = main_priority->getNewIterator(cache_lock); it->vaild(); it->next())
|
||||
for (auto it = main_priority->getNewIterator(cache_lock); it->valid(); it->next())
|
||||
{
|
||||
auto entry_key = it->key();
|
||||
auto entry_offset = it->offset();
|
||||
@ -752,7 +752,7 @@ void LRUFileCache::removeIfReleasable(bool remove_persistent_files)
|
||||
std::lock_guard cache_lock(mutex);
|
||||
|
||||
std::vector<FileSegmentPtr> to_remove;
|
||||
for (auto it = main_priority->getNewIterator(cache_lock); it->vaild(); it->next())
|
||||
for (auto it = main_priority->getNewIterator(cache_lock); it->valid(); it->next())
|
||||
{
|
||||
auto key = it->key();
|
||||
auto offset = it->offset();
|
||||
@ -1186,7 +1186,7 @@ void FileCache::assertCacheCorrectness(std::lock_guard<std::mutex> & cache_lock)
|
||||
void FileCache::assertPriorityCorrectness(std::lock_guard<std::mutex> & cache_lock)
|
||||
{
|
||||
[[maybe_unused]] size_t total_size = 0;
|
||||
for (auto it = main_priority->getNewIterator(cache_lock); it->vaild(); it->next())
|
||||
for (auto it = main_priority->getNewIterator(cache_lock); it->valid(); it->next())
|
||||
{
|
||||
auto key = it->key();
|
||||
auto offset = it->offset();
|
||||
|
Loading…
Reference in New Issue
Block a user